Bumping manifests a=b2g-bump
[gecko.git] / intl / unicharutil / util / GreekCasing.cpp
blob5c805c275662c6f8a4d198ae0611d4de019ef63d
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "GreekCasing.h"
7 #include "nsUnicharUtils.h"
9 // Custom uppercase mapping for Greek; see bug 307039 for details
10 #define GREEK_LOWER_ALPHA 0x03B1
11 #define GREEK_LOWER_ALPHA_TONOS 0x03AC
12 #define GREEK_LOWER_ALPHA_OXIA 0x1F71
13 #define GREEK_LOWER_EPSILON 0x03B5
14 #define GREEK_LOWER_EPSILON_TONOS 0x03AD
15 #define GREEK_LOWER_EPSILON_OXIA 0x1F73
16 #define GREEK_LOWER_ETA 0x03B7
17 #define GREEK_LOWER_ETA_TONOS 0x03AE
18 #define GREEK_LOWER_ETA_OXIA 0x1F75
19 #define GREEK_LOWER_IOTA 0x03B9
20 #define GREEK_LOWER_IOTA_TONOS 0x03AF
21 #define GREEK_LOWER_IOTA_OXIA 0x1F77
22 #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA
23 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390
24 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3
25 #define GREEK_LOWER_OMICRON 0x03BF
26 #define GREEK_LOWER_OMICRON_TONOS 0x03CC
27 #define GREEK_LOWER_OMICRON_OXIA 0x1F79
28 #define GREEK_LOWER_UPSILON 0x03C5
29 #define GREEK_LOWER_UPSILON_TONOS 0x03CD
30 #define GREEK_LOWER_UPSILON_OXIA 0x1F7B
31 #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB
32 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0
33 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3
34 #define GREEK_LOWER_OMEGA 0x03C9
35 #define GREEK_LOWER_OMEGA_TONOS 0x03CE
36 #define GREEK_LOWER_OMEGA_OXIA 0x1F7D
37 #define GREEK_UPPER_ALPHA 0x0391
38 #define GREEK_UPPER_EPSILON 0x0395
39 #define GREEK_UPPER_ETA 0x0397
40 #define GREEK_UPPER_IOTA 0x0399
41 #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA
42 #define GREEK_UPPER_OMICRON 0x039F
43 #define GREEK_UPPER_UPSILON 0x03A5
44 #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB
45 #define GREEK_UPPER_OMEGA 0x03A9
46 #define GREEK_UPPER_ALPHA_TONOS 0x0386
47 #define GREEK_UPPER_ALPHA_OXIA 0x1FBB
48 #define GREEK_UPPER_EPSILON_TONOS 0x0388
49 #define GREEK_UPPER_EPSILON_OXIA 0x1FC9
50 #define GREEK_UPPER_ETA_TONOS 0x0389
51 #define GREEK_UPPER_ETA_OXIA 0x1FCB
52 #define GREEK_UPPER_IOTA_TONOS 0x038A
53 #define GREEK_UPPER_IOTA_OXIA 0x1FDB
54 #define GREEK_UPPER_OMICRON_TONOS 0x038C
55 #define GREEK_UPPER_OMICRON_OXIA 0x1FF9
56 #define GREEK_UPPER_UPSILON_TONOS 0x038E
57 #define GREEK_UPPER_UPSILON_OXIA 0x1FEB
58 #define GREEK_UPPER_OMEGA_TONOS 0x038F
59 #define GREEK_UPPER_OMEGA_OXIA 0x1FFB
60 #define COMBINING_ACUTE_ACCENT 0x0301
61 #define COMBINING_DIAERESIS 0x0308
62 #define COMBINING_ACUTE_TONE_MARK 0x0341
63 #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344
65 namespace mozilla {
67 uint32_t
68 GreekCasing::UpperCase(uint32_t aCh, GreekCasing::State& aState)
70 switch (aCh) {
71 case GREEK_UPPER_ALPHA:
72 case GREEK_LOWER_ALPHA:
73 aState = kAlpha;
74 return GREEK_UPPER_ALPHA;
76 case GREEK_UPPER_EPSILON:
77 case GREEK_LOWER_EPSILON:
78 aState = kEpsilon;
79 return GREEK_UPPER_EPSILON;
81 case GREEK_UPPER_ETA:
82 case GREEK_LOWER_ETA:
83 aState = kEta;
84 return GREEK_UPPER_ETA;
86 case GREEK_UPPER_IOTA:
87 aState = kIota;
88 return GREEK_UPPER_IOTA;
90 case GREEK_UPPER_OMICRON:
91 case GREEK_LOWER_OMICRON:
92 aState = kOmicron;
93 return GREEK_UPPER_OMICRON;
95 case GREEK_UPPER_UPSILON:
96 switch (aState) {
97 case kOmicron:
98 aState = kOmicronUpsilon;
99 break;
100 default:
101 aState = kUpsilon;
102 break;
104 return GREEK_UPPER_UPSILON;
106 case GREEK_UPPER_OMEGA:
107 case GREEK_LOWER_OMEGA:
108 aState = kOmega;
109 return GREEK_UPPER_OMEGA;
111 // iota and upsilon may be the second vowel of a diphthong
112 case GREEK_LOWER_IOTA:
113 switch (aState) {
114 case kAlphaAcc:
115 case kEpsilonAcc:
116 case kOmicronAcc:
117 case kUpsilonAcc:
118 aState = kStart;
119 return GREEK_UPPER_IOTA_DIALYTIKA;
120 default:
121 break;
123 aState = kIota;
124 return GREEK_UPPER_IOTA;
126 case GREEK_LOWER_UPSILON:
127 switch (aState) {
128 case kAlphaAcc:
129 case kEpsilonAcc:
130 case kEtaAcc:
131 case kOmicronAcc:
132 aState = kStart;
133 return GREEK_UPPER_UPSILON_DIALYTIKA;
134 case kOmicron:
135 aState = kOmicronUpsilon;
136 break;
137 default:
138 aState = kUpsilon;
139 break;
141 return GREEK_UPPER_UPSILON;
143 case GREEK_UPPER_IOTA_DIALYTIKA:
144 case GREEK_LOWER_IOTA_DIALYTIKA:
145 case GREEK_UPPER_UPSILON_DIALYTIKA:
146 case GREEK_LOWER_UPSILON_DIALYTIKA:
147 case COMBINING_DIAERESIS:
148 aState = kDiaeresis;
149 return ToUpperCase(aCh);
151 // remove accent if it follows a vowel or diaeresis,
152 // and set appropriate state for diphthong detection
153 case COMBINING_ACUTE_ACCENT:
154 case COMBINING_ACUTE_TONE_MARK:
155 switch (aState) {
156 case kAlpha:
157 aState = kAlphaAcc;
158 return uint32_t(-1); // omit this char from result string
159 case kEpsilon:
160 aState = kEpsilonAcc;
161 return uint32_t(-1);
162 case kEta:
163 aState = kEtaAcc;
164 return uint32_t(-1);
165 case kIota:
166 aState = kIotaAcc;
167 return uint32_t(-1);
168 case kOmicron:
169 aState = kOmicronAcc;
170 return uint32_t(-1);
171 case kUpsilon:
172 aState = kUpsilonAcc;
173 return uint32_t(-1);
174 case kOmicronUpsilon:
175 aState = kStart; // this completed a diphthong
176 return uint32_t(-1);
177 case kOmega:
178 aState = kOmegaAcc;
179 return uint32_t(-1);
180 case kDiaeresis:
181 aState = kStart;
182 return uint32_t(-1);
183 default:
184 break;
186 break;
188 // combinations with dieresis+accent just strip the accent,
189 // and reset to start state (don't form diphthong with following vowel)
190 case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
191 case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
192 aState = kStart;
193 return GREEK_UPPER_IOTA_DIALYTIKA;
195 case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
196 case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
197 aState = kStart;
198 return GREEK_UPPER_UPSILON_DIALYTIKA;
200 case COMBINING_GREEK_DIALYTIKA_TONOS:
201 aState = kStart;
202 return COMBINING_DIAERESIS;
204 // strip accents from vowels, and note the vowel seen so that we can detect
205 // diphthongs where diaeresis needs to be added
206 case GREEK_LOWER_ALPHA_TONOS:
207 case GREEK_LOWER_ALPHA_OXIA:
208 case GREEK_UPPER_ALPHA_TONOS:
209 case GREEK_UPPER_ALPHA_OXIA:
210 aState = kAlphaAcc;
211 return GREEK_UPPER_ALPHA;
213 case GREEK_LOWER_EPSILON_TONOS:
214 case GREEK_LOWER_EPSILON_OXIA:
215 case GREEK_UPPER_EPSILON_TONOS:
216 case GREEK_UPPER_EPSILON_OXIA:
217 aState = kEpsilonAcc;
218 return GREEK_UPPER_EPSILON;
220 case GREEK_LOWER_ETA_TONOS:
221 case GREEK_LOWER_ETA_OXIA:
222 case GREEK_UPPER_ETA_TONOS:
223 case GREEK_UPPER_ETA_OXIA:
224 aState = kEtaAcc;
225 return GREEK_UPPER_ETA;
227 case GREEK_LOWER_IOTA_TONOS:
228 case GREEK_LOWER_IOTA_OXIA:
229 case GREEK_UPPER_IOTA_TONOS:
230 case GREEK_UPPER_IOTA_OXIA:
231 aState = kIotaAcc;
232 return GREEK_UPPER_IOTA;
234 case GREEK_LOWER_OMICRON_TONOS:
235 case GREEK_LOWER_OMICRON_OXIA:
236 case GREEK_UPPER_OMICRON_TONOS:
237 case GREEK_UPPER_OMICRON_OXIA:
238 aState = kOmicronAcc;
239 return GREEK_UPPER_OMICRON;
241 case GREEK_LOWER_UPSILON_TONOS:
242 case GREEK_LOWER_UPSILON_OXIA:
243 case GREEK_UPPER_UPSILON_TONOS:
244 case GREEK_UPPER_UPSILON_OXIA:
245 switch (aState) {
246 case kOmicron:
247 aState = kStart; // this completed a diphthong
248 break;
249 default:
250 aState = kUpsilonAcc;
251 break;
253 return GREEK_UPPER_UPSILON;
255 case GREEK_LOWER_OMEGA_TONOS:
256 case GREEK_LOWER_OMEGA_OXIA:
257 case GREEK_UPPER_OMEGA_TONOS:
258 case GREEK_UPPER_OMEGA_OXIA:
259 aState = kOmegaAcc;
260 return GREEK_UPPER_OMEGA;
263 // all other characters just reset the state, and use standard mappings
264 aState = kStart;
265 return ToUpperCase(aCh);
268 } // namespace mozilla