Bug 1904750 Prefer DefaultDuration over previous inter-timestamp interval r=media...
[gecko.git] / intl / icu_capi / src / segmenter_line.rs
blobf58c76f66d767958490f4980410b69c704c4877d
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
5 use icu_segmenter::LineBreakOptions;
6 use icu_segmenter::LineBreakStrictness;
7 use icu_segmenter::LineBreakWordOption;
9 #[diplomat::bridge]
10 pub mod ffi {
11     use crate::errors::ffi::ICU4XError;
12     use crate::provider::ffi::ICU4XDataProvider;
13     use alloc::boxed::Box;
14     use core::convert::TryFrom;
15     use icu_segmenter::{
16         LineBreakIteratorLatin1, LineBreakIteratorPotentiallyIllFormedUtf8, LineBreakIteratorUtf16,
17         LineSegmenter,
18     };
20     #[diplomat::opaque]
21     /// An ICU4X line-break segmenter, capable of finding breakpoints in strings.
22     #[diplomat::rust_link(icu::segmenter::LineSegmenter, Struct)]
23     pub struct ICU4XLineSegmenter(LineSegmenter);
25     #[diplomat::rust_link(icu::segmenter::LineBreakStrictness, Enum)]
26     pub enum ICU4XLineBreakStrictness {
27         Loose,
28         Normal,
29         Strict,
30         Anywhere,
31     }
33     #[diplomat::rust_link(icu::segmenter::LineBreakWordOption, Enum)]
34     pub enum ICU4XLineBreakWordOption {
35         Normal,
36         BreakAll,
37         KeepAll,
38     }
40     #[diplomat::rust_link(icu::segmenter::LineBreakOptions, Struct)]
41     #[diplomat::attr(dart, rename = "LineBreakOptions")]
42     pub struct ICU4XLineBreakOptionsV1 {
43         pub strictness: ICU4XLineBreakStrictness,
44         pub word_option: ICU4XLineBreakWordOption,
45         pub ja_zh: bool,
46     }
48     #[diplomat::opaque]
49     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
50     #[diplomat::rust_link(
51         icu::segmenter::LineBreakIteratorPotentiallyIllFormedUtf8,
52         Typedef,
53         compact
54     )]
55     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf8, Typedef, hidden)]
56     pub struct ICU4XLineBreakIteratorUtf8<'a>(LineBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>);
58     #[diplomat::opaque]
59     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
60     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf16, Typedef, compact)]
61     pub struct ICU4XLineBreakIteratorUtf16<'a>(LineBreakIteratorUtf16<'a, 'a>);
63     #[diplomat::opaque]
64     #[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
65     #[diplomat::rust_link(icu::segmenter::LineBreakIteratorLatin1, Typedef, compact)]
66     pub struct ICU4XLineBreakIteratorLatin1<'a>(LineBreakIteratorLatin1<'a, 'a>);
68     impl ICU4XLineSegmenter {
69         /// Construct a [`ICU4XLineSegmenter`] with default options. It automatically loads the best
70         /// available payload data for Burmese, Khmer, Lao, and Thai.
71         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto, FnInStruct)]
72         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto")]
73         pub fn create_auto(
74             provider: &ICU4XDataProvider,
75         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
76             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
77                 LineSegmenter::new_auto [r => Ok(r)],
78                 LineSegmenter::try_new_auto_with_any_provider,
79                 LineSegmenter::try_new_auto_with_buffer_provider,
80                 provider
81             )?)))
82         }
84         /// Construct a [`ICU4XLineSegmenter`] with default options and LSTM payload data for
85         /// Burmese, Khmer, Lao, and Thai.
86         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm, FnInStruct)]
87         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm")]
88         pub fn create_lstm(
89             provider: &ICU4XDataProvider,
90         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
91             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
92                 LineSegmenter::new_lstm [r => Ok(r)],
93                 LineSegmenter::try_new_lstm_with_any_provider,
94                 LineSegmenter::try_new_lstm_with_buffer_provider,
95                 provider,
96             )?)))
97         }
99         /// Construct a [`ICU4XLineSegmenter`] with default options and dictionary payload data for
100         /// Burmese, Khmer, Lao, and Thai..
101         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_dictionary, FnInStruct)]
102         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary")]
103         pub fn create_dictionary(
104             provider: &ICU4XDataProvider,
105         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
106             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
107                 LineSegmenter::new_dictionary [r => Ok(r)],
108                 LineSegmenter::try_new_dictionary_with_any_provider,
109                 LineSegmenter::try_new_dictionary_with_buffer_provider,
110                 provider,
111             )?)))
112         }
114         /// Construct a [`ICU4XLineSegmenter`] with custom options. It automatically loads the best
115         /// available payload data for Burmese, Khmer, Lao, and Thai.
116         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto_with_options, FnInStruct)]
117         #[diplomat::attr(dart, rename = "auto_with_options")]
118         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto_with_options_v1")]
119         pub fn create_auto_with_options_v1(
120             provider: &ICU4XDataProvider,
121             options: ICU4XLineBreakOptionsV1,
122         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
123             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
124                 LineSegmenter::new_auto_with_options [r => Ok(r)],
125                 LineSegmenter::try_new_auto_with_options_with_any_provider,
126                 LineSegmenter::try_new_auto_with_options_with_buffer_provider,
127                 provider,
128                 options.into(),
129             )?)))
130         }
132         /// Construct a [`ICU4XLineSegmenter`] with custom options and LSTM payload data for
133         /// Burmese, Khmer, Lao, and Thai.
134         #[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm_with_options, FnInStruct)]
135         #[diplomat::attr(dart, rename = "lstm_with_options")]
136         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm_with_options_v1")]
137         pub fn create_lstm_with_options_v1(
138             provider: &ICU4XDataProvider,
139             options: ICU4XLineBreakOptionsV1,
140         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
141             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
142                 LineSegmenter::new_lstm_with_options [r => Ok(r)],
143                 LineSegmenter::try_new_lstm_with_options_with_any_provider,
144                 LineSegmenter::try_new_lstm_with_options_with_buffer_provider,
145                 provider,
146                 options.into(),
147             )?)))
148         }
150         /// Construct a [`ICU4XLineSegmenter`] with custom options and dictionary payload data for
151         /// Burmese, Khmer, Lao, and Thai.
152         #[diplomat::rust_link(
153             icu::segmenter::LineSegmenter::new_dictionary_with_options,
154             FnInStruct
155         )]
156         #[diplomat::attr(dart, rename = "dictionary_with_options")]
157         #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary_with_options_v1")]
158         pub fn create_dictionary_with_options_v1(
159             provider: &ICU4XDataProvider,
160             options: ICU4XLineBreakOptionsV1,
161         ) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
162             Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
163                 LineSegmenter::new_dictionary_with_options [r => Ok(r)],
164                 LineSegmenter::try_new_dictionary_with_options_with_any_provider,
165                 LineSegmenter::try_new_dictionary_with_options_with_buffer_provider,
166                 provider,
167                 options.into(),
168             )?)))
169         }
171         /// Segments a string.
172         ///
173         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
174         /// to the WHATWG Encoding Standard.
175         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf8, FnInStruct)]
176         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_str, FnInStruct, hidden)]
177         #[diplomat::attr(dart, disable)]
178         pub fn segment_utf8<'a>(
179             &'a self,
180             input: &'a DiplomatStr,
181         ) -> Box<ICU4XLineBreakIteratorUtf8<'a>> {
182             Box::new(ICU4XLineBreakIteratorUtf8(self.0.segment_utf8(input)))
183         }
185         /// Segments a string.
186         ///
187         /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
188         /// to the WHATWG Encoding Standard.
189         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf16, FnInStruct)]
190         #[diplomat::attr(dart, rename = "segment")]
191         pub fn segment_utf16<'a>(
192             &'a self,
193             input: &'a DiplomatStr16,
194         ) -> Box<ICU4XLineBreakIteratorUtf16<'a>> {
195             Box::new(ICU4XLineBreakIteratorUtf16(self.0.segment_utf16(input)))
196         }
198         /// Segments a Latin-1 string.
199         #[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_latin1, FnInStruct)]
200         #[diplomat::attr(dart, disable)]
201         pub fn segment_latin1<'a>(
202             &'a self,
203             input: &'a [u8],
204         ) -> Box<ICU4XLineBreakIteratorLatin1<'a>> {
205             Box::new(ICU4XLineBreakIteratorLatin1(self.0.segment_latin1(input)))
206         }
207     }
209     impl<'a> ICU4XLineBreakIteratorUtf8<'a> {
210         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
211         /// out of range of a 32-bit signed integer.
212         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
213         #[diplomat::rust_link(
214             icu::segmenter::LineBreakIterator::Item,
215             AssociatedTypeInStruct,
216             hidden
217         )]
218         pub fn next(&mut self) -> i32 {
219             self.0
220                 .next()
221                 .and_then(|u| i32::try_from(u).ok())
222                 .unwrap_or(-1)
223         }
224     }
226     impl<'a> ICU4XLineBreakIteratorUtf16<'a> {
227         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
228         /// out of range of a 32-bit signed integer.
229         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
230         #[diplomat::rust_link(
231             icu::segmenter::LineBreakIterator::Item,
232             AssociatedTypeInStruct,
233             hidden
234         )]
235         pub fn next(&mut self) -> i32 {
236             self.0
237                 .next()
238                 .and_then(|u| i32::try_from(u).ok())
239                 .unwrap_or(-1)
240         }
241     }
243     impl<'a> ICU4XLineBreakIteratorLatin1<'a> {
244         /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
245         /// out of range of a 32-bit signed integer.
246         #[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
247         #[diplomat::rust_link(
248             icu::segmenter::LineBreakIterator::Item,
249             AssociatedTypeInStruct,
250             hidden
251         )]
252         pub fn next(&mut self) -> i32 {
253             self.0
254                 .next()
255                 .and_then(|u| i32::try_from(u).ok())
256                 .unwrap_or(-1)
257         }
258     }
261 impl From<ffi::ICU4XLineBreakStrictness> for LineBreakStrictness {
262     fn from(other: ffi::ICU4XLineBreakStrictness) -> Self {
263         match other {
264             ffi::ICU4XLineBreakStrictness::Loose => Self::Loose,
265             ffi::ICU4XLineBreakStrictness::Normal => Self::Normal,
266             ffi::ICU4XLineBreakStrictness::Strict => Self::Strict,
267             ffi::ICU4XLineBreakStrictness::Anywhere => Self::Anywhere,
268         }
269     }
272 impl From<ffi::ICU4XLineBreakWordOption> for LineBreakWordOption {
273     fn from(other: ffi::ICU4XLineBreakWordOption) -> Self {
274         match other {
275             ffi::ICU4XLineBreakWordOption::Normal => Self::Normal,
276             ffi::ICU4XLineBreakWordOption::BreakAll => Self::BreakAll,
277             ffi::ICU4XLineBreakWordOption::KeepAll => Self::KeepAll,
278         }
279     }
282 impl From<ffi::ICU4XLineBreakOptionsV1> for LineBreakOptions {
283     fn from(other: ffi::ICU4XLineBreakOptionsV1) -> Self {
284         let mut options = LineBreakOptions::default();
285         options.strictness = other.strictness.into();
286         options.word_option = other.word_option.into();
287         options.ja_zh = other.ja_zh;
288         options
289     }