1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef mozilla_intl_WordBreaker_h__
6 #define mozilla_intl_WordBreaker_h__
8 #include "nsStringFwd.h"
11 #define NS_WORDBREAKER_NEED_MORE_TEXT -1
21 class WordBreaker final
{
23 // WordBreaker is a utility class with only static methods. No need to
25 WordBreaker() = delete;
26 ~WordBreaker() = delete;
28 // Find the word boundary by scanning forward and backward from aPos.
30 // @return WordRange where mBegin equals to the offset to first character in
31 // the word and mEnd equals to the offset to the last character plus 1. mEnd
32 // can be aText.Lengh() if the desired word is at the end of aText.
34 // If aPos is already at the end of aText or beyond, both mBegin and mEnd
35 // equals to aText.Length().
37 // If setting StopAtPunctuation, even if using UAX#29 word segmenter rule,
38 // there will be break opportunities on characters with punctuation class.
39 enum class FindWordOptions
{ None
, StopAtPunctuation
};
41 static WordRange
FindWord(
42 const nsAString
& aText
, uint32_t aPos
,
43 const FindWordOptions aOptions
= FindWordOptions::None
);
45 // Find the next word break opportunity starting from aPos + 1. It can return
46 // aLen if there's no break opportunity between [aPos + 1, aLen - 1].
48 // If aPos is already at the end of aText or beyond, i.e. aPos >= aLen, return
49 // NS_WORDBREAKER_NEED_MORE_TEXT.
51 // DEPRECATED: Use WordBreakIteratorUtf16 instead.
52 static int32_t Next(const char16_t
* aText
, uint32_t aLen
, uint32_t aPos
);
55 enum WordBreakClass
: uint8_t {
60 kWbClassKatakanaLetter
,
61 kWbClassHiraganaLetter
,
62 kWbClassHWKatakanaLetter
,
63 kWbClassScriptioContinua
66 static WordBreakClass
GetClass(char16_t aChar
);
70 } // namespace mozilla
72 #endif /* mozilla_intl_WordBreaker_h__ */