Bug 1856013 [wpt PR 42249] - Add test for insertion of new <details name> elements...
[gecko.git] / intl / lwbrk / WordBreaker.h
blob88ccf1a380280f0c00c6d695e0a127991b8ea5ec
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef mozilla_intl_WordBreaker_h__
6 #define mozilla_intl_WordBreaker_h__
8 #include "nsStringFwd.h"
9 #include <cstdint>
11 #define NS_WORDBREAKER_NEED_MORE_TEXT -1
13 namespace mozilla {
14 namespace intl {
16 struct WordRange {
17 uint32_t mBegin;
18 uint32_t mEnd;
21 class WordBreaker final {
22 public:
23 // WordBreaker is a utility class with only static methods. No need to
24 // instantiate it.
25 WordBreaker() = delete;
26 ~WordBreaker() = delete;
28 // Find the word boundary by scanning forward and backward from aPos.
30 // @return WordRange where mBegin equals to the offset to first character in
31 // the word and mEnd equals to the offset to the last character plus 1. mEnd
32 // can be aText.Lengh() if the desired word is at the end of aText.
34 // If aPos is already at the end of aText or beyond, both mBegin and mEnd
35 // equals to aText.Length().
37 // If setting StopAtPunctuation, even if using UAX#29 word segmenter rule,
38 // there will be break opportunities on characters with punctuation class.
39 enum class FindWordOptions { None, StopAtPunctuation };
41 static WordRange FindWord(
42 const nsAString& aText, uint32_t aPos,
43 const FindWordOptions aOptions = FindWordOptions::None);
45 // Find the next word break opportunity starting from aPos + 1. It can return
46 // aLen if there's no break opportunity between [aPos + 1, aLen - 1].
48 // If aPos is already at the end of aText or beyond, i.e. aPos >= aLen, return
49 // NS_WORDBREAKER_NEED_MORE_TEXT.
51 // DEPRECATED: Use WordBreakIteratorUtf16 instead.
52 static int32_t Next(const char16_t* aText, uint32_t aLen, uint32_t aPos);
54 private:
55 enum WordBreakClass : uint8_t {
56 kWbClassSpace = 0,
57 kWbClassAlphaLetter,
58 kWbClassPunct,
59 kWbClassHanLetter,
60 kWbClassKatakanaLetter,
61 kWbClassHiraganaLetter,
62 kWbClassHWKatakanaLetter,
63 kWbClassScriptioContinua
66 static WordBreakClass GetClass(char16_t aChar);
69 } // namespace intl
70 } // namespace mozilla
72 #endif /* mozilla_intl_WordBreaker_h__ */