1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "GreekCasing.h"
7 #include "nsUnicharUtils.h"
9 // Custom uppercase mapping for Greek; see bug 307039 for details
10 #define GREEK_LOWER_ALPHA 0x03B1
11 #define GREEK_LOWER_ALPHA_TONOS 0x03AC
12 #define GREEK_LOWER_ALPHA_OXIA 0x1F71
13 #define GREEK_LOWER_EPSILON 0x03B5
14 #define GREEK_LOWER_EPSILON_TONOS 0x03AD
15 #define GREEK_LOWER_EPSILON_OXIA 0x1F73
16 #define GREEK_LOWER_ETA 0x03B7
17 #define GREEK_LOWER_ETA_TONOS 0x03AE
18 #define GREEK_LOWER_ETA_OXIA 0x1F75
19 #define GREEK_LOWER_IOTA 0x03B9
20 #define GREEK_LOWER_IOTA_TONOS 0x03AF
21 #define GREEK_LOWER_IOTA_OXIA 0x1F77
22 #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA
23 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390
24 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3
25 #define GREEK_LOWER_OMICRON 0x03BF
26 #define GREEK_LOWER_OMICRON_TONOS 0x03CC
27 #define GREEK_LOWER_OMICRON_OXIA 0x1F79
28 #define GREEK_LOWER_UPSILON 0x03C5
29 #define GREEK_LOWER_UPSILON_TONOS 0x03CD
30 #define GREEK_LOWER_UPSILON_OXIA 0x1F7B
31 #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB
32 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0
33 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3
34 #define GREEK_LOWER_OMEGA 0x03C9
35 #define GREEK_LOWER_OMEGA_TONOS 0x03CE
36 #define GREEK_LOWER_OMEGA_OXIA 0x1F7D
37 #define GREEK_UPPER_ALPHA 0x0391
38 #define GREEK_UPPER_EPSILON 0x0395
39 #define GREEK_UPPER_ETA 0x0397
40 #define GREEK_UPPER_IOTA 0x0399
41 #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA
42 #define GREEK_UPPER_OMICRON 0x039F
43 #define GREEK_UPPER_UPSILON 0x03A5
44 #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB
45 #define GREEK_UPPER_OMEGA 0x03A9
46 #define GREEK_UPPER_ALPHA_TONOS 0x0386
47 #define GREEK_UPPER_ALPHA_OXIA 0x1FBB
48 #define GREEK_UPPER_EPSILON_TONOS 0x0388
49 #define GREEK_UPPER_EPSILON_OXIA 0x1FC9
50 #define GREEK_UPPER_ETA_TONOS 0x0389
51 #define GREEK_UPPER_ETA_OXIA 0x1FCB
52 #define GREEK_UPPER_IOTA_TONOS 0x038A
53 #define GREEK_UPPER_IOTA_OXIA 0x1FDB
54 #define GREEK_UPPER_OMICRON_TONOS 0x038C
55 #define GREEK_UPPER_OMICRON_OXIA 0x1FF9
56 #define GREEK_UPPER_UPSILON_TONOS 0x038E
57 #define GREEK_UPPER_UPSILON_OXIA 0x1FEB
58 #define GREEK_UPPER_OMEGA_TONOS 0x038F
59 #define GREEK_UPPER_OMEGA_OXIA 0x1FFB
60 #define COMBINING_ACUTE_ACCENT 0x0301
61 #define COMBINING_DIAERESIS 0x0308
62 #define COMBINING_ACUTE_TONE_MARK 0x0341
63 #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344
68 GreekCasing::UpperCase(uint32_t aCh
, GreekCasing::State
& aState
)
71 case GREEK_UPPER_ALPHA
:
72 case GREEK_LOWER_ALPHA
:
74 return GREEK_UPPER_ALPHA
;
76 case GREEK_UPPER_EPSILON
:
77 case GREEK_LOWER_EPSILON
:
79 return GREEK_UPPER_EPSILON
;
84 return GREEK_UPPER_ETA
;
86 case GREEK_UPPER_IOTA
:
88 return GREEK_UPPER_IOTA
;
90 case GREEK_UPPER_OMICRON
:
91 case GREEK_LOWER_OMICRON
:
93 return GREEK_UPPER_OMICRON
;
95 case GREEK_UPPER_UPSILON
:
98 aState
= kOmicronUpsilon
;
104 return GREEK_UPPER_UPSILON
;
106 case GREEK_UPPER_OMEGA
:
107 case GREEK_LOWER_OMEGA
:
109 return GREEK_UPPER_OMEGA
;
111 // iota and upsilon may be the second vowel of a diphthong
112 case GREEK_LOWER_IOTA
:
119 return GREEK_UPPER_IOTA_DIALYTIKA
;
124 return GREEK_UPPER_IOTA
;
126 case GREEK_LOWER_UPSILON
:
133 return GREEK_UPPER_UPSILON_DIALYTIKA
;
135 aState
= kOmicronUpsilon
;
141 return GREEK_UPPER_UPSILON
;
143 case GREEK_UPPER_IOTA_DIALYTIKA
:
144 case GREEK_LOWER_IOTA_DIALYTIKA
:
145 case GREEK_UPPER_UPSILON_DIALYTIKA
:
146 case GREEK_LOWER_UPSILON_DIALYTIKA
:
147 case COMBINING_DIAERESIS
:
149 return ToUpperCase(aCh
);
151 // remove accent if it follows a vowel or diaeresis,
152 // and set appropriate state for diphthong detection
153 case COMBINING_ACUTE_ACCENT
:
154 case COMBINING_ACUTE_TONE_MARK
:
158 return uint32_t(-1); // omit this char from result string
160 aState
= kEpsilonAcc
;
169 aState
= kOmicronAcc
;
172 aState
= kUpsilonAcc
;
174 case kOmicronUpsilon
:
175 aState
= kStart
; // this completed a diphthong
188 // combinations with dieresis+accent just strip the accent,
189 // and reset to start state (don't form diphthong with following vowel)
190 case GREEK_LOWER_IOTA_DIALYTIKA_TONOS
:
191 case GREEK_LOWER_IOTA_DIALYTIKA_OXIA
:
193 return GREEK_UPPER_IOTA_DIALYTIKA
;
195 case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS
:
196 case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA
:
198 return GREEK_UPPER_UPSILON_DIALYTIKA
;
200 case COMBINING_GREEK_DIALYTIKA_TONOS
:
202 return COMBINING_DIAERESIS
;
204 // strip accents from vowels, and note the vowel seen so that we can detect
205 // diphthongs where diaeresis needs to be added
206 case GREEK_LOWER_ALPHA_TONOS
:
207 case GREEK_LOWER_ALPHA_OXIA
:
208 case GREEK_UPPER_ALPHA_TONOS
:
209 case GREEK_UPPER_ALPHA_OXIA
:
211 return GREEK_UPPER_ALPHA
;
213 case GREEK_LOWER_EPSILON_TONOS
:
214 case GREEK_LOWER_EPSILON_OXIA
:
215 case GREEK_UPPER_EPSILON_TONOS
:
216 case GREEK_UPPER_EPSILON_OXIA
:
217 aState
= kEpsilonAcc
;
218 return GREEK_UPPER_EPSILON
;
220 case GREEK_LOWER_ETA_TONOS
:
221 case GREEK_LOWER_ETA_OXIA
:
222 case GREEK_UPPER_ETA_TONOS
:
223 case GREEK_UPPER_ETA_OXIA
:
225 return GREEK_UPPER_ETA
;
227 case GREEK_LOWER_IOTA_TONOS
:
228 case GREEK_LOWER_IOTA_OXIA
:
229 case GREEK_UPPER_IOTA_TONOS
:
230 case GREEK_UPPER_IOTA_OXIA
:
232 return GREEK_UPPER_IOTA
;
234 case GREEK_LOWER_OMICRON_TONOS
:
235 case GREEK_LOWER_OMICRON_OXIA
:
236 case GREEK_UPPER_OMICRON_TONOS
:
237 case GREEK_UPPER_OMICRON_OXIA
:
238 aState
= kOmicronAcc
;
239 return GREEK_UPPER_OMICRON
;
241 case GREEK_LOWER_UPSILON_TONOS
:
242 case GREEK_LOWER_UPSILON_OXIA
:
243 case GREEK_UPPER_UPSILON_TONOS
:
244 case GREEK_UPPER_UPSILON_OXIA
:
247 aState
= kStart
; // this completed a diphthong
250 aState
= kUpsilonAcc
;
253 return GREEK_UPPER_UPSILON
;
255 case GREEK_LOWER_OMEGA_TONOS
:
256 case GREEK_LOWER_OMEGA_OXIA
:
257 case GREEK_UPPER_OMEGA_TONOS
:
258 case GREEK_UPPER_OMEGA_OXIA
:
260 return GREEK_UPPER_OMEGA
;
263 // all other characters just reset the state, and use standard mappings
265 return ToUpperCase(aCh
);
268 } // namespace mozilla