1
use std::borrow::Cow;
2
use std::fmt::{Display, Write};
3
use std::hash::Hash;
4
use std::mem::size_of;
5
use std::ops::Deref;
6
use std::str::FromStr;
7

            
8
use actionable::Identifier;
9
use serde::de::Visitor;
10
use serde::{Deserialize, Serialize};
11
use tinyvec::{Array, TinyVec};
12

            
13
use crate::key::{ByteSource, Key, KeyEncoding, KeyKind, KeyVisitor};
14

            
15
/// The serialized representation of a document's unique ID.
16
76619414
#[derive(Default, Ord, Hash, Eq, PartialEq, PartialOrd, Clone)]
17
pub struct DocumentId(TinyVec<[u8; Self::INLINE_SIZE]>);
18

            
19
impl Deref for DocumentId {
20
    type Target = [u8];
21

            
22
337773067
    fn deref(&self) -> &[u8] {
23
337773067
        &self.0
24
337773067
    }
25
}
26

            
27
impl std::fmt::Debug for DocumentId {
28
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29
        f.write_str("DocumentId(")?;
30
        arc_bytes::print_bytes(self, f)?;
31
        f.write_char(')')
32
    }
33
}
34

            
35
impl Display for DocumentId {
36
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37
447
        if let Ok(string) = std::str::from_utf8(self.as_ref()) {
38
450
            if string.bytes().all(|b| (32..=127).contains(&b)) {
39
1
                return f.write_str(string);
40
445
            }
41
1
        }
42

            
43
446
        if let Some((first_nonzero_byte, _)) = self
44
446
            .as_ref()
45
446
            .iter()
46
446
            .copied()
47
446
            .enumerate()
48
69145
            .find(|(_index, b)| *b != 0)
49
        {
50
444
            if first_nonzero_byte > 0 {
51
443
                write!(f, "{first_nonzero_byte:x}$")?;
52
            } else {
53
1
                f.write_char('$')?;
54
            }
55

            
56
446
            for (index, byte) in self[first_nonzero_byte..].iter().enumerate() {
57
446
                if index > 0 {
58
2
                    write!(f, "{byte:02x}")?;
59
                } else {
60
444
                    write!(f, "{byte:x}")?;
61
                }
62
            }
63
444
            Ok(())
64
        } else {
65
            // All zeroes
66
2
            write!(f, "{:x}$", self.len())
67
        }
68
447
    }
69
}
70

            
71
impl<'a> From<DocumentId> for Identifier<'a> {
72
2
    fn from(id: DocumentId) -> Self {
73
2
        Identifier::from(id.to_vec())
74
2
    }
75
}
76

            
77
impl<'a> From<&'a DocumentId> for Identifier<'a> {
78
1346680
    fn from(id: &'a DocumentId) -> Self {
79
1346680
        Identifier::from(&**id)
80
1346680
    }
81
}
82

            
83
1
#[test]
84
1
fn document_id_identifier_tests() {
85
1
    assert_eq!(
86
1
        Identifier::from(DocumentId::new("hello").unwrap()),
87
1
        Identifier::from("hello")
88
1
    );
89
1
    assert_eq!(
90
1
        Identifier::from(DocumentId::from_u64(1)),
91
1
        Identifier::from(1)
92
1
    );
93
1
}
94

            
95
/// An invalid hexadecimal character was encountered.
96
#[derive(thiserror::Error, Debug)]
97
#[error("invalid hexadecimal bytes")]
98
pub struct InvalidHexadecimal;
99

            
100
416
const fn decode_hex_nibble(byte: u8) -> Result<u8, InvalidHexadecimal> {
101
416
    match byte {
102
416
        b'0'..=b'9' => Ok(byte - b'0'),
103
11
        b'A'..=b'F' => Ok(byte - b'A' + 10),
104
11
        b'a'..=b'f' => Ok(byte - b'a' + 10),
105
        _ => Err(InvalidHexadecimal),
106
    }
107
416
}
108

            
109
impl FromStr for DocumentId {
110
    type Err = crate::Error;
111

            
112
206
    fn from_str(s: &str) -> Result<Self, Self::Err> {
113
206
        if s.is_empty() {
114
            return Ok(Self::default());
115
206
        }
116
206

            
117
206
        let bytes = s.as_bytes();
118
418
        if let Some((pound_offset, _)) = s.bytes().enumerate().find(|(_index, b)| *b == b'$') {
119
205
            if pound_offset > 5 {
120
                return Err(crate::Error::DocumentIdTooLong);
121
205
            }
122

            
123
205
            let preceding_zeroes = if pound_offset > 0 {
124
204
                let mut length = TinyVec::<[u8; 1]>::new();
125
204
                decode_big_endian_hex(&bytes[0..pound_offset], &mut length)?;
126
204
                let mut zeroes = [0_u8; size_of::<usize>()];
127
204
                let offset = zeroes.len() - length.len();
128
204
                zeroes[offset..].copy_from_slice(&length);
129
204
                usize::from_be_bytes(zeroes)
130
            } else {
131
1
                0
132
            };
133

            
134
205
            let mut id = TinyVec::new();
135
205
            decode_big_endian_hex(&bytes[pound_offset + 1..], &mut id)?;
136
205
            if preceding_zeroes > 0 {
137
204
                let total_length = preceding_zeroes + id.len();
138
204
                if total_length > Self::MAX_LENGTH {
139
                    return Err(crate::Error::DocumentIdTooLong);
140
204
                }
141
204
                // The full length indicated a longer ID, so we need to prefix some null bytes.
142
204
                id.splice(0..0, std::iter::repeat(0).take(preceding_zeroes));
143
1
            }
144
205
            Ok(Self(id))
145
1
        } else if bytes.len() > Self::MAX_LENGTH {
146
            Err(crate::Error::DocumentIdTooLong)
147
        } else {
148
            // UTF-8 representable
149
1
            Self::try_from(bytes)
150
        }
151
206
    }
152
}
153

            
154
409
fn decode_big_endian_hex<A: Array<Item = u8>>(
155
409
    bytes: &[u8],
156
409
    output: &mut TinyVec<A>,
157
409
) -> Result<(), crate::Error> {
158
409
    let mut chunks = if bytes.len() & 1 == 0 {
159
5
        bytes.chunks_exact(2)
160
    } else {
161
        // Odd amount of bytes, special case the first char
162
404
        output.push(decode_hex_nibble(bytes[0])?);
163
404
        bytes[1..].chunks_exact(2)
164
    };
165
415
    for chunk in &mut chunks {
166
6
        let upper = decode_hex_nibble(chunk[0])?;
167
6
        let lower = decode_hex_nibble(chunk[1])?;
168
6
        output.push(upper << 4 | lower);
169
    }
170
409
    if !chunks.remainder().is_empty() {
171
        return Err(crate::Error::from(InvalidHexadecimal));
172
409
    }
173
409
    Ok(())
174
409
}
175

            
176
1
#[test]
177
1
fn document_id_parsing() {
178
6
    fn test_id(bytes: &[u8], display: &str) {
179
6
        let id = DocumentId::try_from(bytes).unwrap();
180
6
        let as_string = id.to_string();
181
6
        assert_eq!(as_string, display);
182
6
        let parsed = DocumentId::from_str(&as_string).unwrap();
183
6
        assert_eq!(&*parsed, bytes);
184
6
    }
185
1

            
186
1
    test_id(b"hello", "hello");
187
1
    test_id(b"\x00\x0a\xaf\xfa", "1$aaffa");
188
1
    test_id(&1_u128.to_be_bytes(), "f$1");
189
1
    test_id(&17_u8.to_be_bytes(), "$11");
190
1
    test_id(&[0_u8; 63], "3f$");
191
1
    // The above test is the same as this one, at the time of writing, but in
192
1
    // case we update MAX_LENGTH in the future, this extra test will ensure the
193
1
    // max-length formatting is always tested.
194
1
    test_id(
195
1
        &vec![0_u8; DocumentId::MAX_LENGTH],
196
1
        &format!("{:x}$", DocumentId::MAX_LENGTH),
197
1
    );
198
1
}
199

            
200
impl<'a> TryFrom<&'a [u8]> for DocumentId {
201
    type Error = crate::Error;
202

            
203
3246416
    fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
204
3246416
        if bytes.len() <= Self::MAX_LENGTH {
205
3246416
            Ok(Self(TinyVec::from(bytes)))
206
        } else {
207
            Err(crate::Error::DocumentIdTooLong)
208
        }
209
3246416
    }
210
}
211

            
212
impl<'a> TryFrom<Cow<'a, [u8]>> for DocumentId {
213
    type Error = crate::Error;
214

            
215
    fn try_from(bytes: Cow<'a, [u8]>) -> Result<Self, Self::Error> {
216
        Self::try_from(bytes.as_ref())
217
    }
218
}
219

            
220
impl<const N: usize> TryFrom<[u8; N]> for DocumentId {
221
    type Error = crate::Error;
222

            
223
    fn try_from(bytes: [u8; N]) -> Result<Self, Self::Error> {
224
        Self::try_from(&bytes[..])
225
    }
226
}
227

            
228
impl DocumentId {
229
    const INLINE_SIZE: usize = 16;
230
    /// The maximum size able to be stored in a document's unique id.
231
    pub const MAX_LENGTH: usize = 65_535;
232

            
233
    /// Returns a new instance with `value` as the identifier..
234
841244
    pub fn new<PrimaryKey: for<'k> Key<'k>, PrimaryKeyRef: KeyEncoding<PrimaryKey> + ?Sized>(
235
841244
        value: &PrimaryKeyRef,
236
841244
    ) -> Result<Self, crate::Error> {
237
841244
        let bytes = value
238
841244
            .as_ord_bytes()
239
841244
            .map_err(|err| crate::Error::other("key serialization", err))?;
240
841244
        Self::try_from(&bytes[..])
241
841244
    }
242

            
243
    /// Returns a new document ID for a u64. This is equivalent to
244
    /// `DocumentId::new(id)`, but since this function accepts a non-generic
245
    /// type, it can help with type inference in some expressions.
246
    #[must_use]
247
    #[allow(clippy::missing_panics_doc)] // Unwrap is impossible to fail.
248
3645
    pub fn from_u64(id: u64) -> Self {
249
3645
        Self::try_from(&id.to_be_bytes()[..]).unwrap()
250
3645
    }
251

            
252
    /// Returns a new document ID for a u32. This is equivalent to
253
    /// `DocumentId::new(id)`, but since this function accepts a non-generic
254
    /// type, it can help with type inference in some expressions.
255
    #[must_use]
256
    #[allow(clippy::missing_panics_doc)] // Unwrap is impossible to fail.
257
    pub fn from_u32(id: u32) -> Self {
258
        Self::try_from(&id.to_be_bytes()[..]).unwrap()
259
    }
260

            
261
    /// Returns the contained value, deserialized back to its original type.
262
1289651
    pub fn deserialize<'k, PrimaryKey: Key<'k>>(&'k self) -> Result<PrimaryKey, crate::Error> {
263
1289651
        PrimaryKey::from_ord_bytes(ByteSource::Borrowed(self.as_ref()))
264
1289651
            .map_err(|err| crate::Error::other("key serialization", err))
265
1289651
    }
266
}
267

            
268
impl Serialize for DocumentId {
269
157382565
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
270
157382565
    where
271
157382565
        S: serde::Serializer,
272
157382565
    {
273
157382565
        serializer.serialize_bytes(self.as_ref())
274
157382565
    }
275
}
276

            
277
impl<'de> Deserialize<'de> for DocumentId {
278
83031211
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
279
83031211
    where
280
83031211
        D: serde::Deserializer<'de>,
281
83031211
    {
282
83031211
        deserializer.deserialize_byte_buf(DocumentIdVisitor)
283
83031211
    }
284
}
285

            
286
struct DocumentIdVisitor;
287

            
288
impl<'de> Visitor<'de> for DocumentIdVisitor {
289
    type Value = DocumentId;
290

            
291
    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292
        formatter.write_str("a document id (bytes)")
293
    }
294

            
295
83031211
    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
296
83031211
    where
297
83031211
        E: serde::de::Error,
298
83031211
    {
299
83031211
        Ok(DocumentId(TinyVec::from(v)))
300
83031211
    }
301
}
302

            
303
impl<'k> Key<'k> for DocumentId {
304
    const CAN_OWN_BYTES: bool = false;
305

            
306
    fn from_ord_bytes<'e>(bytes: ByteSource<'k, 'e>) -> Result<Self, Self::Error> {
307
        Self::try_from(bytes.as_ref())
308
    }
309
}
310

            
311
impl<PrimaryKey> KeyEncoding<PrimaryKey> for DocumentId
312
where
313
    PrimaryKey: for<'pk> Key<'pk>,
314
{
315
    type Error = crate::Error;
316

            
317
    const LENGTH: Option<usize> = None;
318

            
319
    fn describe<Visitor>(visitor: &mut Visitor)
320
    where
321
        Visitor: KeyVisitor,
322
    {
323
        visitor.visit_type(KeyKind::Bytes);
324
    }
325

            
326
37986
    fn as_ord_bytes(&self) -> Result<Cow<'_, [u8]>, Self::Error> {
327
37986
        Ok(Cow::Borrowed(self))
328
37986
    }
329
}