Bug 1589531 [wpt PR 19775] - [LayoutNG] Reland flex-item alignment, a=testonly
[gecko.git] / intl / lwbrk / WordBreaker.cpp
blob0b7a51870b8f3802d3cd1629b321dd0cde6b8b27
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "mozilla/intl/WordBreaker.h"
7 #include "mozilla/Preferences.h"
9 using mozilla::intl::WordBreakClass;
10 using mozilla::intl::WordBreaker;
11 using mozilla::intl::WordRange;
13 /*static*/
14 already_AddRefed<WordBreaker> WordBreaker::Create() {
15 return RefPtr<WordBreaker>(new WordBreaker()).forget();
18 bool WordBreaker::BreakInBetween(const char16_t* aText1, uint32_t aTextLen1,
19 const char16_t* aText2, uint32_t aTextLen2) {
20 MOZ_ASSERT(nullptr != aText1, "null ptr");
21 MOZ_ASSERT(nullptr != aText2, "null ptr");
23 if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2)) return false;
25 return GetClass(aText1[aTextLen1 - 1]) != GetClass(aText2[0]);
28 #define IS_ASCII(c) (0 == (0xFF80 & (c)))
29 #define ASCII_IS_ALPHA(c) \
30 ((('a' <= (c)) && ((c) <= 'z')) || (('A' <= (c)) && ((c) <= 'Z')))
31 #define ASCII_IS_DIGIT(c) (('0' <= (c)) && ((c) <= '9'))
32 #define ASCII_IS_SPACE(c) \
33 ((' ' == (c)) || ('\t' == (c)) || ('\r' == (c)) || ('\n' == (c)))
34 #define IS_ALPHABETICAL_SCRIPT(c) ((c) < 0x2E80)
36 // we change the beginning of IS_HAN from 0x4e00 to 0x3400 to relfect
37 // Unicode 3.0
38 #define IS_HAN(c) \
39 ((0x3400 <= (c)) && ((c) <= 0x9fff)) || ((0xf900 <= (c)) && ((c) <= 0xfaff))
40 #define IS_KATAKANA(c) ((0x30A0 <= (c)) && ((c) <= 0x30FF))
41 #define IS_HIRAGANA(c) ((0x3040 <= (c)) && ((c) <= 0x309F))
42 #define IS_HALFWIDTHKATAKANA(c) ((0xFF60 <= (c)) && ((c) <= 0xFF9F))
43 #define IS_THAI(c) (0x0E00 == (0xFF80 & (c))) // Look at the higest 9 bits
45 /* static */
46 WordBreakClass WordBreaker::GetClass(char16_t c) {
47 // The pref is cached on first call; changes will require a browser restart.
48 static bool sStopAtUnderscore =
49 Preferences::GetBool("layout.word_select.stop_at_underscore", false);
51 // begin of the hack
53 if (IS_ALPHABETICAL_SCRIPT(c)) {
54 if (IS_ASCII(c)) {
55 if (ASCII_IS_SPACE(c)) {
56 return kWbClassSpace;
57 } else if (ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c) ||
58 (c == '_' && !sStopAtUnderscore)) {
59 return kWbClassAlphaLetter;
60 } else {
61 return kWbClassPunct;
63 } else if (IS_THAI(c)) {
64 return kWbClassThaiLetter;
65 } else if (c == 0x00A0 /*NBSP*/) {
66 return kWbClassSpace;
67 } else {
68 return kWbClassAlphaLetter;
70 } else {
71 if (IS_HAN(c)) {
72 return kWbClassHanLetter;
73 } else if (IS_KATAKANA(c)) {
74 return kWbClassKatakanaLetter;
75 } else if (IS_HIRAGANA(c)) {
76 return kWbClassHiraganaLetter;
77 } else if (IS_HALFWIDTHKATAKANA(c)) {
78 return kWbClassHWKatakanaLetter;
79 } else {
80 return kWbClassAlphaLetter;
83 return static_cast<WordBreakClass>(0);
86 WordRange WordBreaker::FindWord(const char16_t* aText, uint32_t aTextLen,
87 uint32_t aOffset) {
88 WordRange range;
89 MOZ_ASSERT(nullptr != aText, "null ptr");
90 MOZ_ASSERT(0 != aTextLen, "len = 0");
91 MOZ_ASSERT(aOffset <= aTextLen, "aOffset > aTextLen");
93 range.mBegin = aTextLen + 1;
94 range.mEnd = aTextLen + 1;
96 if (!aText || aOffset > aTextLen) return range;
98 WordBreakClass c = GetClass(aText[aOffset]);
99 uint32_t i;
100 // Scan forward
101 range.mEnd--;
102 for (i = aOffset + 1; i <= aTextLen; i++) {
103 if (c != GetClass(aText[i])) {
104 range.mEnd = i;
105 break;
109 // Scan backward
110 range.mBegin = 0;
111 for (i = aOffset; i > 0; i--) {
112 if (c != GetClass(aText[i - 1])) {
113 range.mBegin = i;
114 break;
117 if (kWbClassThaiLetter == c) {
118 // need to call Thai word breaker from here
119 // we should pass the whole Thai segment to the thai word breaker to find a
120 // shorter answer
122 return range;
125 int32_t WordBreaker::NextWord(const char16_t* aText, uint32_t aLen,
126 uint32_t aPos) {
127 WordBreakClass c1, c2;
128 uint32_t cur = aPos;
129 if (cur == aLen) return NS_WORDBREAKER_NEED_MORE_TEXT;
130 c1 = GetClass(aText[cur]);
132 for (cur++; cur < aLen; cur++) {
133 c2 = GetClass(aText[cur]);
134 if (c2 != c1) break;
136 if (kWbClassThaiLetter == c1) {
137 // need to call Thai word breaker from here
138 // we should pass the whole Thai segment to the thai word breaker to find a
139 // shorter answer
141 if (cur == aLen) return NS_WORDBREAKER_NEED_MORE_TEXT;
142 return cur;