bytestring/
lib.rs

1//! A UTF-8 encoded read-only string using `Bytes` as storage.
2//!
3//! See docs for [`ByteString`].
4
5#![no_std]
6#![deny(rust_2018_idioms, nonstandard_style)]
7#![warn(future_incompatible, missing_docs)]
8
9extern crate alloc;
10
11use alloc::{
12    boxed::Box,
13    string::{String, ToString},
14    vec::Vec,
15};
16use core::{borrow::Borrow, fmt, hash, ops, str};
17
18use bytes::Bytes;
19
20/// An immutable UTF-8 encoded string using [`Bytes`] as the storage.
21#[derive(Clone, Default, Eq, PartialOrd, Ord)]
22pub struct ByteString(Bytes);
23
24impl ByteString {
25    /// Creates a new empty `ByteString`.
26    pub const fn new() -> Self {
27        ByteString(Bytes::new())
28    }
29
30    /// Get a reference to the underlying `Bytes` object.
31    pub fn as_bytes(&self) -> &Bytes {
32        &self.0
33    }
34
35    /// Unwraps this `ByteString` into the underlying `Bytes` object.
36    pub fn into_bytes(self) -> Bytes {
37        self.0
38    }
39
40    /// Creates a new `ByteString` from a `&'static str`.
41    pub const fn from_static(src: &'static str) -> ByteString {
42        Self(Bytes::from_static(src.as_bytes()))
43    }
44
45    /// Creates a new `ByteString` from a Bytes.
46    ///
47    /// # Safety
48    /// This function is unsafe because it does not check the bytes passed to it are valid UTF-8.
49    /// If this constraint is violated, it may cause memory unsafety issues with future users of
50    /// the `ByteString`, as we assume that `ByteString`s are valid UTF-8. However, the most likely
51    /// issue is that the data gets corrupted.
52    pub const unsafe fn from_bytes_unchecked(src: Bytes) -> ByteString {
53        Self(src)
54    }
55
56    /// Divides one bytestring into two at an index, returning both parts.
57    ///
58    /// # Panics
59    ///
60    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past the end of the last
61    /// code point of the bytestring.
62    pub fn split_at(&self, mid: usize) -> (ByteString, ByteString) {
63        let this: &str = self.as_ref();
64        let _valid_midpoint_check = this.split_at(mid);
65
66        let mut bytes = self.0.clone();
67        let first = bytes.split_to(mid);
68        let last = bytes;
69
70        unsafe {
71            (
72                ByteString::from_bytes_unchecked(first),
73                ByteString::from_bytes_unchecked(last),
74            )
75        }
76    }
77
78    /// Returns a new `ByteString` that is equivalent to the given `subset`.
79    ///
80    /// When processing a `ByteString` buffer with other tools, one often gets a `&str` which is in
81    /// fact a slice of the original `ByteString`; i.e., a subset of it. This function turns that
82    /// `&str` into another `ByteString`, as if one had sliced the `ByteString` with the offsets
83    /// that correspond to `subset`.
84    ///
85    /// Corresponds to [`Bytes::slice_ref`].
86    ///
87    /// This operation is `O(1)`.
88    ///
89    /// # Panics
90    ///
91    /// Panics if `subset` is not a sub-slice of this byte string.
92    ///
93    /// Note that strings which are only subsets from an equality perspective do not uphold this
94    /// requirement; see examples.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// # use bytestring::ByteString;
100    /// let string = ByteString::from_static(" foo ");
101    /// let subset = string.trim();
102    /// let substring = string.slice_ref(subset);
103    /// assert_eq!(substring, "foo");
104    /// ```
105    ///
106    /// ```should_panic
107    /// # use bytestring::ByteString;
108    /// // panics because the given slice is not derived from the original byte string, despite
109    /// // being a logical subset of the string
110    /// ByteString::from_static("foo bar").slice_ref("foo");
111    /// ```
112    pub fn slice_ref(&self, subset: &str) -> Self {
113        Self(self.0.slice_ref(subset.as_bytes()))
114    }
115}
116
117impl PartialEq<str> for ByteString {
118    fn eq(&self, other: &str) -> bool {
119        &self[..] == other
120    }
121}
122
123impl<T: AsRef<str>> PartialEq<T> for ByteString {
124    fn eq(&self, other: &T) -> bool {
125        &self[..] == other.as_ref()
126    }
127}
128
129impl AsRef<ByteString> for ByteString {
130    fn as_ref(&self) -> &ByteString {
131        self
132    }
133}
134
135impl AsRef<[u8]> for ByteString {
136    fn as_ref(&self) -> &[u8] {
137        self.0.as_ref()
138    }
139}
140
141impl AsRef<str> for ByteString {
142    fn as_ref(&self) -> &str {
143        self
144    }
145}
146
147impl hash::Hash for ByteString {
148    fn hash<H: hash::Hasher>(&self, state: &mut H) {
149        (**self).hash(state);
150    }
151}
152
153impl ops::Deref for ByteString {
154    type Target = str;
155
156    #[inline]
157    fn deref(&self) -> &str {
158        let bytes = self.0.as_ref();
159        // SAFETY: UTF-8 validity is guaranteed during construction.
160        unsafe { str::from_utf8_unchecked(bytes) }
161    }
162}
163
164impl Borrow<str> for ByteString {
165    fn borrow(&self) -> &str {
166        self
167    }
168}
169
170impl From<String> for ByteString {
171    #[inline]
172    fn from(value: String) -> Self {
173        Self(Bytes::from(value))
174    }
175}
176
177impl From<&str> for ByteString {
178    #[inline]
179    fn from(value: &str) -> Self {
180        Self(Bytes::copy_from_slice(value.as_ref()))
181    }
182}
183
184impl From<Box<str>> for ByteString {
185    #[inline]
186    fn from(value: Box<str>) -> Self {
187        Self(Bytes::from(value.into_boxed_bytes()))
188    }
189}
190
191impl From<ByteString> for String {
192    #[inline]
193    fn from(value: ByteString) -> Self {
194        value.to_string()
195    }
196}
197
198impl TryFrom<&[u8]> for ByteString {
199    type Error = str::Utf8Error;
200
201    #[inline]
202    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
203        let _ = str::from_utf8(value)?;
204        Ok(ByteString(Bytes::copy_from_slice(value)))
205    }
206}
207
208impl TryFrom<Vec<u8>> for ByteString {
209    type Error = str::Utf8Error;
210
211    #[inline]
212    fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
213        let buf = String::from_utf8(value).map_err(|err| err.utf8_error())?;
214        Ok(ByteString(Bytes::from(buf)))
215    }
216}
217
218impl TryFrom<Bytes> for ByteString {
219    type Error = str::Utf8Error;
220
221    #[inline]
222    fn try_from(value: Bytes) -> Result<Self, Self::Error> {
223        let _ = str::from_utf8(value.as_ref())?;
224        Ok(ByteString(value))
225    }
226}
227
228impl TryFrom<bytes::BytesMut> for ByteString {
229    type Error = str::Utf8Error;
230
231    #[inline]
232    fn try_from(value: bytes::BytesMut) -> Result<Self, Self::Error> {
233        let _ = str::from_utf8(&value)?;
234        Ok(ByteString(value.freeze()))
235    }
236}
237
238macro_rules! array_impls {
239    ($($len:expr)+) => {
240        $(
241            impl TryFrom<[u8; $len]> for ByteString {
242                type Error = str::Utf8Error;
243
244                #[inline]
245                fn try_from(value: [u8; $len]) -> Result<Self, Self::Error> {
246                    ByteString::try_from(&value[..])
247                }
248            }
249
250            impl TryFrom<&[u8; $len]> for ByteString {
251                type Error = str::Utf8Error;
252
253                #[inline]
254                fn try_from(value: &[u8; $len]) -> Result<Self, Self::Error> {
255                    ByteString::try_from(&value[..])
256                }
257            }
258        )+
259    }
260}
261
262array_impls!(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32);
263
264impl fmt::Debug for ByteString {
265    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
266        (**self).fmt(fmt)
267    }
268}
269
270impl fmt::Display for ByteString {
271    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
272        (**self).fmt(fmt)
273    }
274}
275
276#[cfg(feature = "serde")]
277mod serde {
278    use alloc::string::String;
279
280    use serde::{
281        de::{Deserialize, Deserializer},
282        ser::{Serialize, Serializer},
283    };
284
285    use super::ByteString;
286
287    impl Serialize for ByteString {
288        #[inline]
289        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
290        where
291            S: Serializer,
292        {
293            serializer.serialize_str(self.as_ref())
294        }
295    }
296
297    impl<'de> Deserialize<'de> for ByteString {
298        #[inline]
299        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
300        where
301            D: Deserializer<'de>,
302        {
303            String::deserialize(deserializer).map(ByteString::from)
304        }
305    }
306
307    #[cfg(test)]
308    mod serde_impl_tests {
309        use serde::de::DeserializeOwned;
310        use static_assertions::assert_impl_all;
311
312        use super::*;
313
314        assert_impl_all!(ByteString: Serialize, DeserializeOwned);
315    }
316}
317
318#[cfg(test)]
319mod test {
320    use alloc::{borrow::ToOwned, format, vec};
321    use core::{
322        hash::{Hash, Hasher},
323        panic::{RefUnwindSafe, UnwindSafe},
324    };
325
326    use ahash::AHasher;
327    use static_assertions::assert_impl_all;
328
329    use super::*;
330
331    assert_impl_all!(ByteString: Send, Sync, Unpin, Sized);
332    assert_impl_all!(ByteString: Clone, Default, Eq, PartialOrd, Ord);
333    assert_impl_all!(ByteString: fmt::Debug, fmt::Display);
334    assert_impl_all!(ByteString: UnwindSafe, RefUnwindSafe);
335
336    #[test]
337    fn eq() {
338        let s: ByteString = ByteString::from_static("test");
339        assert_eq!(s, "test");
340        assert_eq!(s, *"test");
341        assert_eq!(s, "test".to_owned());
342    }
343
344    #[test]
345    fn new() {
346        let _: ByteString = ByteString::new();
347    }
348
349    #[test]
350    fn as_bytes() {
351        let buf = ByteString::new();
352        assert!(buf.as_bytes().is_empty());
353
354        let buf = ByteString::from("hello");
355        assert_eq!(buf.as_bytes(), "hello");
356    }
357
358    #[test]
359    fn from_bytes_unchecked() {
360        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::new()) };
361        assert!(buf.is_empty());
362
363        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::from("hello")) };
364        assert_eq!(buf, "hello");
365    }
366
367    #[test]
368    fn as_ref() {
369        let buf = ByteString::new();
370
371        let _: &ByteString = buf.as_ref();
372        let _: &[u8] = buf.as_ref();
373    }
374
375    #[test]
376    fn borrow() {
377        let buf = ByteString::new();
378
379        let _: &str = buf.borrow();
380    }
381
382    #[test]
383    fn hash() {
384        let mut hasher1 = AHasher::default();
385        "str".hash(&mut hasher1);
386
387        let mut hasher2 = AHasher::default();
388        let s = ByteString::from_static("str");
389        s.hash(&mut hasher2);
390        assert_eq!(hasher1.finish(), hasher2.finish());
391    }
392
393    #[test]
394    fn from_string() {
395        let s: ByteString = "hello".to_owned().into();
396        assert_eq!(&s, "hello");
397        let t: &str = s.as_ref();
398        assert_eq!(t, "hello");
399    }
400
401    #[test]
402    fn from_str() {
403        let _: ByteString = "str".into();
404        let _: ByteString = "str".to_owned().into_boxed_str().into();
405    }
406
407    #[test]
408    fn to_string() {
409        let buf = ByteString::from("foo");
410        assert_eq!(String::from(buf), "foo");
411    }
412
413    #[test]
414    fn from_static_str() {
415        static _S: ByteString = ByteString::from_static("hello");
416        let _ = ByteString::from_static("str");
417    }
418
419    #[test]
420    fn try_from_slice() {
421        let _ = ByteString::try_from(b"nice bytes").unwrap();
422    }
423
424    #[test]
425    fn try_from_array() {
426        assert_eq!(
427            ByteString::try_from([b'h', b'i']).unwrap(),
428            ByteString::from_static("hi")
429        );
430    }
431
432    #[test]
433    fn try_from_vec() {
434        let _ = ByteString::try_from(vec![b'f', b'o', b'o']).unwrap();
435        ByteString::try_from(vec![0, 159, 146, 150]).unwrap_err();
436    }
437
438    #[test]
439    fn try_from_bytes() {
440        let _ = ByteString::try_from(Bytes::from_static(b"nice bytes")).unwrap();
441    }
442
443    #[test]
444    fn try_from_bytes_mut() {
445        let _ = ByteString::try_from(bytes::BytesMut::from(&b"nice bytes"[..])).unwrap();
446    }
447
448    #[test]
449    fn display() {
450        let buf = ByteString::from("bar");
451        assert_eq!(format!("{buf}"), "bar");
452    }
453
454    #[test]
455    fn debug() {
456        let buf = ByteString::from("baz");
457        assert_eq!(format!("{buf:?}"), r#""baz""#);
458    }
459
460    #[cfg(feature = "serde")]
461    #[test]
462    fn serialize() {
463        let s: ByteString = serde_json::from_str(r#""nice bytes""#).unwrap();
464        assert_eq!(s, "nice bytes");
465    }
466
467    #[cfg(feature = "serde")]
468    #[test]
469    fn deserialize() {
470        let s = serde_json::to_string(&ByteString::from_static("nice bytes")).unwrap();
471        assert_eq!(s, r#""nice bytes""#);
472    }
473
474    #[test]
475    fn slice_ref() {
476        let string = ByteString::from_static(" foo ");
477        let subset = string.trim();
478        // subset is derived from original byte string
479        let substring = string.slice_ref(subset);
480        assert_eq!(substring, "foo");
481    }
482
483    #[test]
484    #[should_panic]
485    fn slice_ref_catches_not_a_subset() {
486        // panics because the given slice is not derived from the original byte string, despite
487        // being a logical subset of the string
488        ByteString::from_static("foo bar").slice_ref("foo");
489    }
490
491    #[test]
492    fn split_at() {
493        let buf = ByteString::from_static("foo bar");
494
495        let (first, last) = buf.split_at(0);
496        assert_eq!(ByteString::from_static(""), first);
497        assert_eq!(ByteString::from_static("foo bar"), last);
498
499        let (first, last) = buf.split_at(4);
500        assert_eq!(ByteString::from_static("foo "), first);
501        assert_eq!(ByteString::from_static("bar"), last);
502
503        let (first, last) = buf.split_at(7);
504        assert_eq!(ByteString::from_static("foo bar"), first);
505        assert_eq!(ByteString::from_static(""), last);
506    }
507
508    #[test]
509    #[should_panic = "byte index 1 is not a char boundary;"]
510    fn split_at_invalid_code_point() {
511        ByteString::from_static("ยต").split_at(1);
512    }
513
514    #[test]
515    #[should_panic = "byte index 9 is out of bounds"]
516    fn split_at_outside_string() {
517        ByteString::from_static("foo").split_at(9);
518    }
519}