xref: /openbmc/linux/rust/kernel/str.rs (revision b18cb00e5a8a1182ef491b770ea1a3dab081dc5b)
1  // SPDX-License-Identifier: GPL-2.0
2  
3  //! String representations.
4  
5  use core::fmt::{self, Write};
6  use core::ops::{self, Deref, Index};
7  
8  use crate::{
9      bindings,
10      error::{code::*, Error},
11  };
12  
13  /// Byte string without UTF-8 validity guarantee.
14  ///
15  /// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning.
16  pub type BStr = [u8];
17  
18  /// Creates a new [`BStr`] from a string literal.
19  ///
20  /// `b_str!` converts the supplied string literal to byte string, so non-ASCII
21  /// characters can be included.
22  ///
23  /// # Examples
24  ///
25  /// ```
26  /// # use kernel::b_str;
27  /// # use kernel::str::BStr;
28  /// const MY_BSTR: &BStr = b_str!("My awesome BStr!");
29  /// ```
30  #[macro_export]
31  macro_rules! b_str {
32      ($str:literal) => {{
33          const S: &'static str = $str;
34          const C: &'static $crate::str::BStr = S.as_bytes();
35          C
36      }};
37  }
38  
39  /// Possible errors when using conversion functions in [`CStr`].
40  #[derive(Debug, Clone, Copy)]
41  pub enum CStrConvertError {
42      /// Supplied bytes contain an interior `NUL`.
43      InteriorNul,
44  
45      /// Supplied bytes are not terminated by `NUL`.
46      NotNulTerminated,
47  }
48  
49  impl From<CStrConvertError> for Error {
50      #[inline]
51      fn from(_: CStrConvertError) -> Error {
52          EINVAL
53      }
54  }
55  
56  /// A string that is guaranteed to have exactly one `NUL` byte, which is at the
57  /// end.
58  ///
59  /// Used for interoperability with kernel APIs that take C strings.
60  #[repr(transparent)]
61  pub struct CStr([u8]);
62  
63  impl CStr {
64      /// Returns the length of this string excluding `NUL`.
65      #[inline]
66      pub const fn len(&self) -> usize {
67          self.len_with_nul() - 1
68      }
69  
70      /// Returns the length of this string with `NUL`.
71      #[inline]
72      pub const fn len_with_nul(&self) -> usize {
73          // SAFETY: This is one of the invariant of `CStr`.
74          // We add a `unreachable_unchecked` here to hint the optimizer that
75          // the value returned from this function is non-zero.
76          if self.0.is_empty() {
77              unsafe { core::hint::unreachable_unchecked() };
78          }
79          self.0.len()
80      }
81  
82      /// Returns `true` if the string only includes `NUL`.
83      #[inline]
84      pub const fn is_empty(&self) -> bool {
85          self.len() == 0
86      }
87  
88      /// Wraps a raw C string pointer.
89      ///
90      /// # Safety
91      ///
92      /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must
93      /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr`
94      /// must not be mutated.
95      #[inline]
96      pub unsafe fn from_char_ptr<'a>(ptr: *const core::ffi::c_char) -> &'a Self {
97          // SAFETY: The safety precondition guarantees `ptr` is a valid pointer
98          // to a `NUL`-terminated C string.
99          let len = unsafe { bindings::strlen(ptr) } + 1;
100          // SAFETY: Lifetime guaranteed by the safety precondition.
101          let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) };
102          // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
103          // As we have added 1 to `len`, the last byte is known to be `NUL`.
104          unsafe { Self::from_bytes_with_nul_unchecked(bytes) }
105      }
106  
107      /// Creates a [`CStr`] from a `[u8]`.
108      ///
109      /// The provided slice must be `NUL`-terminated, does not contain any
110      /// interior `NUL` bytes.
111      pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> {
112          if bytes.is_empty() {
113              return Err(CStrConvertError::NotNulTerminated);
114          }
115          if bytes[bytes.len() - 1] != 0 {
116              return Err(CStrConvertError::NotNulTerminated);
117          }
118          let mut i = 0;
119          // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking,
120          // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`.
121          while i + 1 < bytes.len() {
122              if bytes[i] == 0 {
123                  return Err(CStrConvertError::InteriorNul);
124              }
125              i += 1;
126          }
127          // SAFETY: We just checked that all properties hold.
128          Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) })
129      }
130  
131      /// Creates a [`CStr`] from a `[u8]` without performing any additional
132      /// checks.
133      ///
134      /// # Safety
135      ///
136      /// `bytes` *must* end with a `NUL` byte, and should only have a single
137      /// `NUL` byte (or the string will be truncated).
138      #[inline]
139      pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr {
140          // SAFETY: Properties of `bytes` guaranteed by the safety precondition.
141          unsafe { core::mem::transmute(bytes) }
142      }
143  
144      /// Returns a C pointer to the string.
145      #[inline]
146      pub const fn as_char_ptr(&self) -> *const core::ffi::c_char {
147          self.0.as_ptr() as _
148      }
149  
150      /// Convert the string to a byte slice without the trailing 0 byte.
151      #[inline]
152      pub fn as_bytes(&self) -> &[u8] {
153          &self.0[..self.len()]
154      }
155  
156      /// Convert the string to a byte slice containing the trailing 0 byte.
157      #[inline]
158      pub const fn as_bytes_with_nul(&self) -> &[u8] {
159          &self.0
160      }
161  
162      /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8.
163      ///
164      /// If the contents of the [`CStr`] are valid UTF-8 data, this
165      /// function will return the corresponding [`&str`] slice. Otherwise,
166      /// it will return an error with details of where UTF-8 validation failed.
167      ///
168      /// # Examples
169      ///
170      /// ```
171      /// # use kernel::str::CStr;
172      /// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap();
173      /// assert_eq!(cstr.to_str(), Ok("foo"));
174      /// ```
175      #[inline]
176      pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> {
177          core::str::from_utf8(self.as_bytes())
178      }
179  
180      /// Unsafely convert this [`CStr`] into a [`&str`], without checking for
181      /// valid UTF-8.
182      ///
183      /// # Safety
184      ///
185      /// The contents must be valid UTF-8.
186      ///
187      /// # Examples
188      ///
189      /// ```
190      /// # use kernel::c_str;
191      /// # use kernel::str::CStr;
192      /// // SAFETY: String literals are guaranteed to be valid UTF-8
193      /// // by the Rust compiler.
194      /// let bar = c_str!("ツ");
195      /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
196      /// ```
197      #[inline]
198      pub unsafe fn as_str_unchecked(&self) -> &str {
199          unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
200      }
201  }
202  
203  impl fmt::Display for CStr {
204      /// Formats printable ASCII characters, escaping the rest.
205      ///
206      /// ```
207      /// # use kernel::c_str;
208      /// # use kernel::str::CStr;
209      /// # use kernel::str::CString;
210      /// let penguin = c_str!("��");
211      /// let s = CString::try_from_fmt(fmt!("{}", penguin)).unwrap();
212      /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes());
213      ///
214      /// let ascii = c_str!("so \"cool\"");
215      /// let s = CString::try_from_fmt(fmt!("{}", ascii)).unwrap();
216      /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes());
217      /// ```
218      fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219          for &c in self.as_bytes() {
220              if (0x20..0x7f).contains(&c) {
221                  // Printable character.
222                  f.write_char(c as char)?;
223              } else {
224                  write!(f, "\\x{:02x}", c)?;
225              }
226          }
227          Ok(())
228      }
229  }
230  
231  impl fmt::Debug for CStr {
232      /// Formats printable ASCII characters with a double quote on either end, escaping the rest.
233      ///
234      /// ```
235      /// # use kernel::c_str;
236      /// # use kernel::str::CStr;
237      /// # use kernel::str::CString;
238      /// let penguin = c_str!("��");
239      /// let s = CString::try_from_fmt(fmt!("{:?}", penguin)).unwrap();
240      /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes());
241      ///
242      /// // Embedded double quotes are escaped.
243      /// let ascii = c_str!("so \"cool\"");
244      /// let s = CString::try_from_fmt(fmt!("{:?}", ascii)).unwrap();
245      /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes());
246      /// ```
247      fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248          f.write_str("\"")?;
249          for &c in self.as_bytes() {
250              match c {
251                  // Printable characters.
252                  b'\"' => f.write_str("\\\"")?,
253                  0x20..=0x7e => f.write_char(c as char)?,
254                  _ => write!(f, "\\x{:02x}", c)?,
255              }
256          }
257          f.write_str("\"")
258      }
259  }
260  
261  impl AsRef<BStr> for CStr {
262      #[inline]
263      fn as_ref(&self) -> &BStr {
264          self.as_bytes()
265      }
266  }
267  
268  impl Deref for CStr {
269      type Target = BStr;
270  
271      #[inline]
272      fn deref(&self) -> &Self::Target {
273          self.as_bytes()
274      }
275  }
276  
277  impl Index<ops::RangeFrom<usize>> for CStr {
278      type Output = CStr;
279  
280      #[inline]
281      fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output {
282          // Delegate bounds checking to slice.
283          // Assign to _ to mute clippy's unnecessary operation warning.
284          let _ = &self.as_bytes()[index.start..];
285          // SAFETY: We just checked the bounds.
286          unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) }
287      }
288  }
289  
290  impl Index<ops::RangeFull> for CStr {
291      type Output = CStr;
292  
293      #[inline]
294      fn index(&self, _index: ops::RangeFull) -> &Self::Output {
295          self
296      }
297  }
298  
299  mod private {
300      use core::ops;
301  
302      // Marker trait for index types that can be forward to `BStr`.
303      pub trait CStrIndex {}
304  
305      impl CStrIndex for usize {}
306      impl CStrIndex for ops::Range<usize> {}
307      impl CStrIndex for ops::RangeInclusive<usize> {}
308      impl CStrIndex for ops::RangeToInclusive<usize> {}
309  }
310  
311  impl<Idx> Index<Idx> for CStr
312  where
313      Idx: private::CStrIndex,
314      BStr: Index<Idx>,
315  {
316      type Output = <BStr as Index<Idx>>::Output;
317  
318      #[inline]
319      fn index(&self, index: Idx) -> &Self::Output {
320          &self.as_bytes()[index]
321      }
322  }
323  
324  /// Creates a new [`CStr`] from a string literal.
325  ///
326  /// The string literal should not contain any `NUL` bytes.
327  ///
328  /// # Examples
329  ///
330  /// ```
331  /// # use kernel::c_str;
332  /// # use kernel::str::CStr;
333  /// const MY_CSTR: &CStr = c_str!("My awesome CStr!");
334  /// ```
335  #[macro_export]
336  macro_rules! c_str {
337      ($str:expr) => {{
338          const S: &str = concat!($str, "\0");
339          const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) {
340              Ok(v) => v,
341              Err(_) => panic!("string contains interior NUL"),
342          };
343          C
344      }};
345  }
346  
347  #[cfg(test)]
348  mod tests {
349      use super::*;
350  
351      #[test]
352      fn test_cstr_to_str() {
353          let good_bytes = b"\xf0\x9f\xa6\x80\0";
354          let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap();
355          let checked_str = checked_cstr.to_str().unwrap();
356          assert_eq!(checked_str, "��");
357      }
358  
359      #[test]
360      #[should_panic]
361      fn test_cstr_to_str_panic() {
362          let bad_bytes = b"\xc3\x28\0";
363          let checked_cstr = CStr::from_bytes_with_nul(bad_bytes).unwrap();
364          checked_cstr.to_str().unwrap();
365      }
366  
367      #[test]
368      fn test_cstr_as_str_unchecked() {
369          let good_bytes = b"\xf0\x9f\x90\xA7\0";
370          let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap();
371          let unchecked_str = unsafe { checked_cstr.as_str_unchecked() };
372          assert_eq!(unchecked_str, "��");
373      }
374  }
375  
376  /// Allows formatting of [`fmt::Arguments`] into a raw buffer.
377  ///
378  /// It does not fail if callers write past the end of the buffer so that they can calculate the
379  /// size required to fit everything.
380  ///
381  /// # Invariants
382  ///
383  /// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos`
384  /// is less than `end`.
385  pub(crate) struct RawFormatter {
386      // Use `usize` to use `saturating_*` functions.
387      #[allow(dead_code)]
388      beg: usize,
389      pos: usize,
390      end: usize,
391  }
392  
393  impl RawFormatter {
394      /// Creates a new instance of [`RawFormatter`] with the given buffer pointers.
395      ///
396      /// # Safety
397      ///
398      /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end`
399      /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`].
400      pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self {
401          // INVARIANT: The safety requierments guarantee the type invariants.
402          Self {
403              beg: pos as _,
404              pos: pos as _,
405              end: end as _,
406          }
407      }
408  
409      /// Returns the current insert position.
410      ///
411      /// N.B. It may point to invalid memory.
412      pub(crate) fn pos(&self) -> *mut u8 {
413          self.pos as _
414      }
415  }
416  
417  impl fmt::Write for RawFormatter {
418      fn write_str(&mut self, s: &str) -> fmt::Result {
419          // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we
420          // don't want it to wrap around to 0.
421          let pos_new = self.pos.saturating_add(s.len());
422  
423          // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`.
424          let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos);
425  
426          if len_to_copy > 0 {
427              // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end`
428              // yet, so it is valid for write per the type invariants.
429              unsafe {
430                  core::ptr::copy_nonoverlapping(
431                      s.as_bytes().as_ptr(),
432                      self.pos as *mut u8,
433                      len_to_copy,
434                  )
435              };
436          }
437  
438          self.pos = pos_new;
439          Ok(())
440      }
441  }
442