3 pub trait OsStrExt: private::Sealed {
4 /// Converts to a string slice.
5 fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
6 /// Returns `true` if the given pattern matches a sub-slice of
9 /// Returns `false` if it does not.
14 /// use clap_lex::OsStrExt as _;
15 /// let bananas = std::ffi::OsStr::new("bananas");
17 /// assert!(bananas.contains("nana"));
18 /// assert!(!bananas.contains("apples"));
20 fn contains(&self, needle: &str) -> bool;
21 /// Returns the byte index of the first character of this string slice that
22 /// matches the pattern.
24 /// Returns [`None`] if the pattern doesn't match.
29 /// use clap_lex::OsStrExt as _;
30 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
32 /// assert_eq!(s.find("L"), Some(0));
33 /// assert_eq!(s.find("é"), Some(14));
34 /// assert_eq!(s.find("par"), Some(17));
37 /// Not finding the pattern:
40 /// use clap_lex::OsStrExt as _;
41 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
43 /// assert_eq!(s.find("1"), None);
45 fn find(&self, needle: &str) -> Option<usize>;
46 /// Returns a string slice with the prefix removed.
48 /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
51 /// If the string does not start with `prefix`, returns `None`.
56 /// use std::ffi::OsStr;
57 /// use clap_lex::OsStrExt as _;
58 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
59 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
60 /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
62 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
63 /// Returns `true` if the given pattern matches a prefix of this
66 /// Returns `false` if it does not.
71 /// use clap_lex::OsStrExt as _;
72 /// let bananas = std::ffi::OsStr::new("bananas");
74 /// assert!(bananas.starts_with("bana"));
75 /// assert!(!bananas.starts_with("nana"));
77 fn starts_with(&self, prefix: &str) -> bool;
78 /// An iterator over substrings of this string slice, separated by
79 /// characters matched by a pattern.
86 /// use std::ffi::OsStr;
87 /// use clap_lex::OsStrExt as _;
88 /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
89 /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
91 /// let v: Vec<_> = OsStr::new("").split("X").collect();
92 /// assert_eq!(v, [OsStr::new("")]);
94 /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
95 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
97 /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
98 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
101 /// If a string contains multiple contiguous separators, you will end up
102 /// with empty strings in the output:
105 /// use std::ffi::OsStr;
106 /// use clap_lex::OsStrExt as _;
107 /// let x = OsStr::new("||||a||b|c");
108 /// let d: Vec<_> = x.split("|").collect();
110 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
113 /// Contiguous separators are separated by the empty string.
116 /// use std::ffi::OsStr;
117 /// use clap_lex::OsStrExt as _;
118 /// let x = OsStr::new("(///)");
119 /// let d: Vec<_> = x.split("/").collect();
121 /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
124 /// Separators at the start or end of a string are neighbored
125 /// by empty strings.
128 /// use std::ffi::OsStr;
129 /// use clap_lex::OsStrExt as _;
130 /// let d: Vec<_> = OsStr::new("010").split("0").collect();
131 /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
134 /// When the empty string is used as a separator, it panics
137 /// use std::ffi::OsStr;
138 /// use clap_lex::OsStrExt as _;
139 /// let f: Vec<_> = OsStr::new("rust").split("").collect();
140 /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
143 /// Contiguous separators can lead to possibly surprising behavior
144 /// when whitespace is used as the separator. This code is correct:
147 /// use std::ffi::OsStr;
148 /// use clap_lex::OsStrExt as _;
149 /// let x = OsStr::new(" a b c");
150 /// let d: Vec<_> = x.split(" ").collect();
152 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
155 /// It does _not_ give you:
158 /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
161 /// Use [`split_whitespace`] for this behavior.
163 /// [`split_whitespace`]: str::split_whitespace
164 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
165 /// Splits the string on the first occurrence of the specified delimiter and
166 /// returns prefix before delimiter and suffix after delimiter.
171 /// use std::ffi::OsStr;
172 /// use clap_lex::OsStrExt as _;
173 /// assert_eq!(OsStr::new("cfg").split_once("="), None);
174 /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
175 /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
176 /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
178 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
181 impl OsStrExt for OsStr {
182 fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
183 let bytes = to_bytes(self);
184 std::str::from_utf8(bytes)
187 fn contains(&self, needle: &str) -> bool {
188 self.find(needle).is_some()
191 fn find(&self, needle: &str) -> Option<usize> {
192 let bytes = to_bytes(self);
193 (0..=self.len().checked_sub(needle.len())?)
194 .find(|&x| bytes[x..].starts_with(needle.as_bytes()))
197 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
198 let bytes = to_bytes(self);
199 bytes.strip_prefix(prefix.as_bytes()).map(|s| {
201 // - This came from `to_bytes`
202 // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
203 unsafe { to_os_str_unchecked(s) }
206 fn starts_with(&self, prefix: &str) -> bool {
207 let bytes = to_bytes(self);
208 bytes.starts_with(prefix.as_bytes())
211 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
212 assert_ne!(needle, "");
214 haystack: Some(self),
219 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
220 let start = self.find(needle)?;
221 let end = start + needle.len();
222 let haystack = to_bytes(self);
223 let first = &haystack[0..start];
224 let second = &haystack[end..];
226 // - This came from `to_bytes`
227 // - Since `needle` is `&str`, any split will be along UTF-8 boundarie
228 unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
235 impl Sealed for std::ffi::OsStr {}
238 /// Allow access to raw bytes
240 /// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
241 /// 7-bit ASCII or `&str`
245 /// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
246 /// (since its dependent on rustc)
247 fn to_bytes(s: &OsStr) -> &[u8] {
249 // - Lifetimes are the same
250 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
251 // - The primary contract is that the encoding for invalid surrogate code points is not
252 // guaranteed which isn't a problem here
254 // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
256 unsafe { std::mem::transmute(s) }
259 /// Restore raw bytes as `OsStr`
263 /// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
264 /// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
265 unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
267 // - Lifetimes are the same
268 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
269 // - The primary contract is that the encoding for invalid surrogate code points is not
270 // guaranteed which isn't a problem here
272 // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
274 std::mem::transmute(s)
277 pub struct Split<'s, 'n> {
278 haystack: Option<&'s OsStr>,
282 impl<'s, 'n> Iterator for Split<'s, 'n> {
283 type Item = &'s OsStr;
285 fn next(&mut self) -> Option<Self::Item> {
286 let haystack = self.haystack?;
287 match haystack.split_once(self.needle) {
288 Some((first, second)) => {
289 if !haystack.is_empty() {
290 debug_assert_ne!(haystack, second);
292 self.haystack = Some(second);
296 self.haystack = None;
307 /// `index` must be at a valid UTF-8 boundary
308 pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
309 let bytes = to_bytes(os);
310 let (first, second) = bytes.split_at(index);
311 (to_os_str_unchecked(first), to_os_str_unchecked(second))