1 % ----------------------------------------------------------------------------
2 % This "ucharclasses" package sets up XeTeX character classes based on which
3 % unicode block a character is found in. It then allows transition rules to be defined
4 % when entering or leaving particular unicode blocks, the code of which gets inserted
5 % automatically when a transition from a character from one unicode block to a
6 % character from another unicode block is encountered by XeTeX
8 % Current compatibility should be Unicode 15.0
11 % v2.5-2.6: Werner Lemberg
12 % v2.4: Werner Lemberg, Shreeshrii
13 % v2.1-2.3: Qing Lee, Werner Lemberg
14 % v2.0: Enrico Gregorio
15 % v1.0: Mike "Pomax" Kamermans
17 % Significant updates:
18 % v2.6: Unicode 15 support
19 % v2.5: Unicode 14 support
20 % v2.4: Unicode 13 support
21 % v2.3: Unicode 10 support
22 % v2.2: Unicode 8.0 and LaTeX2e support
23 % v2.1: Uplift for the intercharclass updates introduced in XeTeX 0.99994
24 % v2.0: Rewritten to Vastly improve performance.
25 % v1.0: Unicode block switching using XeTeX intercharclasses.
27 % License: public domain (https://www.ctan.org/license/pd)
29 % ----------------------------------------------------------------------------
31 \ProvidesPackage{ucharclasses
}[2022/
10/
20 v2.6
.0 Unicode block character classes for XeLaTeX
]
33 \newif\if@ucharclassverbose
34 \DeclareOption{verbose
}{\@ucharclassverbosetrue
}
36 % ----------------------------------------------------------------------------
37 % The package options allow you to selectively enable certain unicode blocks
38 % ----------------------------------------------------------------------------
40 % We first define all blocks in a list together with their start and end
43 % Starting with XeTeX version 3.14159265-2.6-0.99994, the number of
44 % \XeTeXcharclass registers was extended from 256 to 4096 entries; some not
45 % so important blocks are thus provided only for this and newer versions.
46 % The boundary of character classes was changed from 255 to 4095
47 % correspondingly. The primitive \XeTeXinterwordspaceshaping was introduced
48 % by XeTeX 0.99994; we use it as a flag to identify this version.
50 % However, earlier version of LaTeX2e (before 2016/04/22 v2.0q) didn't provide
51 % support for 4096 entries; we thus have to override the hard-coded limit.
52 \ifdefined\e@alloc@intercharclass@top
53 \chardef\@ucharclass@boundary=
\e@alloc@intercharclass@top
55 \ifdefined\XeTeXinterwordspaceshaping
56 \chardef\@ucharclass@boundary=
4095 %
57 \def\newXeTeXintercharclass{%
58 \e@alloc
\XeTeXcharclass\chardef
59 \xe@alloc@intercharclass
\m@ne\@ucharclass@boundary
}
61 \chardef\@ucharclass@boundary=\@cclv
66 % Unicode 5.1 block definitions
67 \do{AegeanNumbers
}{"
010100}{"
01013F
}
68 \do{AlphabeticPresentationForms
}{"
0FB00
}{"
0FB4F
}
69 \do{AncientGreekMusicalNotation
}{"
01D200
}{"
01D24F
}
70 \do{AncientGreekNumbers
}{"
010140}{"
01018F
}
71 \do{AncientSymbols
}{"
010190}{"
0101CF
}
72 \do{Arabic
}{"
0600}{"
06FF
}
73 \do{ArabicPresentationFormsA
}{"
0FB50
}{"
0FDFF
}
74 \do{ArabicPresentationFormsB
}{"
0FE70
}{"
0FEFF
}
75 \do{ArabicSupplement
}{"
0750}{"
077F
}
76 \do{Armenian
}{"
0530}{"
058F
}
77 \do{Arrows
}{"
02190}{"
021FF
}
78 \do{Balinese
}{"
01B00
}{"
01B7F
}
79 \do{BasicLatin
}{"
0020}{"
007F
} % 0000..007F in Unicode standard
80 \do{Bengali
}{"
0980}{"
09FF
}
81 \do{BlockElements
}{"
02580}{"
0259F
}
82 \do{Bopomofo
}{"
03100}{"
0312F
}
83 \do{BopomofoExtended
}{"
031A0
}{"
031BF
}
84 \do{BoxDrawing
}{"
02500}{"
0257F
}
85 \do{BraillePatterns
}{"
02800}{"
028FF
}
86 \do{Buginese
}{"
01A00
}{"
01A1F
}
87 \do{Buhid
}{"
01740}{"
0175F
}
88 \do{ByzantineMusicalSymbols
}{"
01D000
}{"
01D0FF
}
90 \do{Cham
}{"
0AA00
}{"
0AA5F
}
91 \do{Cherokee
}{"
013A0
}{"
013FF
}
92 \do{CJKCompatibility
}{"
03300}{"
033FF
}
93 \do{CJKCompatibilityForms
}{"
0FE30
}{"
0FE4F
}
94 \do{CJKCompatibilityIdeographs
}{"
0F900
}{"
0FAFF
}
95 \do{CJKCompatibilityIdeographsSupplement
}{"
02F800
}{"
02FA1F
}
96 \do{CJKRadicalsSupplement
}{"
02E80
}{"
02EFF
}
97 \do{CJKStrokes
}{"
031C0
}{"
031EF
}
98 \do{CJKSymbolsAndPunctuation
}{"
03000}{"
0303F
}
99 \do{CJKUnifiedIdeographs
}{"
04E00
}{"
09FFF
}
100 \do{CJKUnifiedIdeographsExtensionA
}{"
03400}{"
04DBF
}
101 \do{CJKUnifiedIdeographsExtensionB
}{"
020000}{"
02A6DF
}
102 \do{CombiningDiacriticalMarks
}{"
0300}{"
036F
}
103 \do{CombiningDiacriticalMarksForSymbols
}{"
020D0
}{"
020FF
}
104 \do{CombiningDiacriticalMarksSupplement
}{"
01DC0
}{"
01DFF
}
105 \do{CombiningHalfMarks
}{"
0FE20
}{"
0FE2F
}
106 \do{ControlPictures
}{"
02400}{"
0243F
}
107 \do{Coptic
}{"
02C80
}{"
02CFF
}
108 \do{CountingRodNumerals
}{"
01D360
}{"
01D37F
}
109 \do{Cuneiform
}{"
012000}{"
0123FF
}
110 \do{CuneiformNumbersAndPunctuation
}{"
012400}{"
01247F
}
111 \do{CurrencySymbols
}{"
020A0
}{"
020CF
}
112 \do{CypriotSyllabary
}{"
010800}{"
01083F
}
113 \do{Cyrillic
}{"
0400}{"
04FF
}
114 \do{CyrillicExtendedA
}{"
02DE0
}{"
02DFF
}
115 \do{CyrillicExtendedB
}{"
0A640
}{"
0A69F
}
116 \do{CyrillicSupplement
}{"
0500}{"
052F
}
117 \do{Deseret
}{"
010400}{"
01044F
}
118 \do{DevanagariDanDa
}{"
0964}{"
0965}
119 \do{DevanagariMarks
}{"
0951}{"
0954}
120 \do{DevanagariPostDanDa
}{"
0966}{"
097F
}
121 \do{DevanagariPostMarks
}{"
0955}{"
0963}
122 \do{DevanagariPreMarks
}{"
0900}{"
0950}
123 \do{Dingbats
}{"
02700}{"
027BF
}
124 \do{DominoTiles
}{"
01F030
}{"
01F09F
}
125 \do{EnclosedAlphanumerics
}{"
02460}{"
024FF
}
126 \do{EnclosedCJKLettersAndMonths
}{"
03200}{"
032FF
}
127 \do{Ethiopic
}{"
01200}{"
0137F
}
128 \do{EthiopicExtended
}{"
02D80
}{"
02DDF
}
129 \do{EthiopicSupplement
}{"
01380}{"
0139F
}
130 \do{GeneralPunctuation
}{"
02000}{"
0206F
}
131 \do{GeometricShapes
}{"
025A0
}{"
025FF
}
132 \do{Georgian
}{"
010A0
}{"
010FF
}
133 \do{GeorgianSupplement
}{"
02D00
}{"
02D2F
}
134 \do{Glagolitic
}{"
02C00
}{"
02C5F
}
136 \do{GreekAndCoptic
}{"
0370}{"
03FF
}
137 \do{GreekExtended
}{"
01F00
}{"
01FFF
}
138 \do{Gujarati
}{"
0A80
}{"
0AFF
}
139 \do{Gurmukhi
}{"
0A00
}{"
0A7F
}
140 \do{HalfwidthAndFullwidthForms
}{"
0FF00
}{"
0FFEF
}
141 \do{HangulCompatibilityJamo
}{"
03130}{"
0318F
}
142 \do{HangulJamo
}{"
01100}{"
011FF
}
143 \do{HangulSyllables
}{"
0AC00
}{"
0D7AF
}
144 \do{Hanunoo
}{"
01720}{"
0173F
}
145 \do{Hebrew
}{"
0590}{"
05FF
}
146 \do{Hiragana
}{"
03040}{"
0309F
}
147 \do{IdeographicDescriptionCharacters
}{"
02FF0
}{"
02FFF
}
148 \do{IPAExtensions
}{"
0250}{"
02AF
}
149 \do{Kanbun
}{"
03190}{"
0319F
}
150 \do{KangxiRadicals
}{"
02F00
}{"
02FDF
}
151 \do{Kannada
}{"
0C80
}{"
0CFF
}
152 \do{Katakana
}{"
030A0
}{"
030FF
}
153 \do{KatakanaPhoneticExtensions
}{"
031F0
}{"
031FF
}
154 \do{KayahLi
}{"
0A900
}{"
0A92F
}
155 % Kharoshthi (see below)
156 \do{Khmer
}{"
01780}{"
017FF
}
157 \do{KhmerSymbols
}{"
019E0
}{"
019FF
}
158 \do{Lao
}{"
0E80
}{"
0EFF
}
159 \do{LatinExtendedAdditional
}{"
01E00
}{"
01EFF
}
160 \do{LatinExtendedA
}{"
0100}{"
017F
}
161 \do{LatinExtendedB
}{"
0180}{"
024F
}
162 \do{LatinExtendedC
}{"
02C60
}{"
02C7F
}
163 \do{LatinExtendedD
}{"
0A720
}{"
0A7FF
}
164 \do{LatinSupplement
}{"
0080}{"
00FF
}
165 \do{Lepcha
}{"
01C00
}{"
01C4F
}
166 \do{LetterlikeSymbols
}{"
02100}{"
0214F
}
167 \do{Limbu
}{"
01900}{"
0194F
}
168 \do{LinearBIdeograms
}{"
010080}{"
0100FF
}
169 \do{LinearBSyllabary
}{"
010000}{"
01007F
}
172 \do{MahjongTiles
}{"
01F000
}{"
01F02F
}
173 \do{Malayalam
}{"
0D00
}{"
0D7F
}
174 \do{MathematicalAlphanumericSymbols
}{"
01D400
}{"
01D7FF
}
175 \do{MathematicalOperators
}{"
02200}{"
022FF
}
176 \do{MiscellaneousMathematicalSymbolsA
}{"
027C0
}{"
027EF
}
177 \do{MiscellaneousMathematicalSymbolsB
}{"
02980}{"
029FF
}
178 \do{MiscellaneousSymbols
}{"
02600}{"
026FF
}
179 \do{MiscellaneousSymbolsAndArrows
}{"
02B00
}{"
02BFF
}
180 \do{MiscellaneousTechnical
}{"
02300}{"
023FF
}
181 \do{ModifierToneLetters
}{"
0A700
}{"
0A71F
}
182 \do{Mongolian
}{"
01800}{"
018AF
}
183 \do{MusicalSymbols
}{"
01D100
}{"
01D1FF
}
184 \do{Myanmar
}{"
01000}{"
0109F
}
185 \do{NewTaiLue
}{"
01980}{"
019DF
}
186 \do{NKo
}{"
07C0
}{"
07FF
}
187 \do{NumberForms
}{"
02150}{"
0218F
}
188 \do{Ogham
}{"
01680}{"
0169F
}
189 \do{OlChiki
}{"
01C50
}{"
01C7F
}
190 % OldItalic (see below)
191 \do{OldPersian
}{"
0103A0
}{"
0103DF
}
192 \do{OpticalCharacterRecognition
}{"
02440}{"
0245F
}
193 \do{Oriya
}{"
0B00
}{"
0B7F
}
194 \do{Osmanya
}{"
010480}{"
0104AF
}
195 \do{PhagsPa
}{"
0A840
}{"
0A87F
}
196 % PhaistosDisc (see below)
197 \do{Phoenician
}{"
010900}{"
01091F
}
198 \do{PhoneticExtensions
}{"
01D00
}{"
01D7F
}
199 \do{PhoneticExtensionsSupplement
}{"
01D80
}{"
01DBF
}
200 \do{PrivateUseArea
}{"
0E000
}{"
0F8FF
}
201 \do{Rejang
}{"
0A930
}{"
0A95F
}
202 \do{Runic
}{"
016A0
}{"
016FF
}
203 \do{Saurashtra
}{"
0A880
}{"
0A8DF
}
204 \do{Shavian
}{"
010450}{"
01047F
}
205 \do{Sinhala
}{"
0D80
}{"
0DFF
}
206 \do{SmallFormVariants
}{"
0FE50
}{"
0FE6F
}
207 \do{SpacingModifierLetters
}{"
02B0
}{"
02FF
}
208 \do{Sundanese
}{"
01B80
}{"
01BBF
}
209 \do{SuperscriptsAndSubscripts
}{"
02070}{"
0209F
}
210 \do{SupplementalArrowsA
}{"
027F0
}{"
027FF
}
211 \do{SupplementalArrowsB
}{"
02900}{"
0297F
}
212 \do{SupplementalMathematicalOperators
}{"
02A00
}{"
02AFF
}
213 \do{SupplementalPunctuation
}{"
02E00
}{"
02E7F
}
214 % SupplementaryPrivateUseAreaA (see below)
215 % SupplementaryPrivateUseAreaB (see below)
216 \do{SylotiNagri
}{"
0A800
}{"
0A82F
}
217 \do{Syriac
}{"
0700}{"
074F
}
218 \do{Tagalog
}{"
01700}{"
0171F
}
219 \do{Tagbanwa
}{"
01760}{"
0177F
}
220 \do{Tags
}{"
0E0000
}{"
0E007F
}
221 \do{TaiLe
}{"
01950}{"
0197F
}
222 \do{TaiXuanJingSymbols
}{"
01D300
}{"
01D35F
}
223 \do{Tamil
}{"
0B80
}{"
0BFF
}
224 \do{Telugu
}{"
0C00
}{"
0C7F
}
225 \do{Thaana
}{"
0780}{"
07BF
}
226 \do{Thai
}{"
0E00
}{"
0E7F
}
227 \do{Tibetan
}{"
0F00
}{"
0FFF
}
228 \do{Tifinagh
}{"
02D30
}{"
02D7F
}
229 \do{Ugaritic
}{"
010380}{"
01039F
}
230 \do{UnifiedCanadianAboriginalSyllabics
}{"
01400}{"
0167F
}
231 \do{Vai
}{"
0A500
}{"
0A63F
}
232 \do{VerticalForms
}{"
0FE10
}{"
0FE1F
}
233 \do{YiRadicals
}{"
0A490
}{"
0A4CF
}
234 \do{YiSyllables
}{"
0A000
}{"
0A48F
}
235 \do{YijingHexagramSymbols
}{"
04DC0
}{"
04DFF
}
236 % Unicode 5.2 additions
237 \do{Avestan
}{"
010B00
}{"
010B3F
}
238 \do{Bamum
}{"
0A6A0
}{"
0A6FF
}
239 \do{CJKUnifiedIdeographsExtensionC
}{"
02A700
}{"
02B73F
}
240 \do{CommonIndicNumberForms
}{"
0A830
}{"
0A83F
}
241 \do{DevanagariExtended
}{"
0A8E0
}{"
0A8FF
}
242 \do{EgyptianHieroglyphs
}{"
013000}{"
01342F
}
243 \do{EnclosedAlphanumericSupplement
}{"
01F100
}{"
01F1FF
}
244 \do{EnclosedIdeographicSupplement
}{"
01F200
}{"
01F2FF
}
245 \do{HangulJamoExtendedA
}{"
0A960
}{"
0A97F
}
246 \do{HangulJamoExtendedB
}{"
0D7B0
}{"
0D7FF
}
247 \do{ImperialAramaic
}{"
010840}{"
01085F
}
248 \do{InscriptionalPahlavi
}{"
010B60
}{"
010B7F
}
249 \do{InscriptionalParthian
}{"
010B40
}{"
010B5F
}
250 \do{Javanese
}{"
0A980
}{"
0A9DF
}
251 \do{Kaithi
}{"
011080}{"
0110CF
}
252 \do{Lisu
}{"
0A4D0
}{"
0A4FF
}
253 \do{MeeteiMayek
}{"
0ABC0
}{"
0ABFF
}
254 \do{MyanmarExtendedA
}{"
0AA60
}{"
0AA7F
}
255 % OldSouthArabian (see below)
256 % OldTurkic (see below)
257 \do{RumiNumeralSymbols
}{"
010E60
}{"
010E7F
}
258 \do{Samaritan
}{"
0800}{"
083F
}
259 \do{TaiTham
}{"
01A20
}{"
01AAF
}
260 \do{TaiViet
}{"
0AA80
}{"
0AADF
}
261 \do{UnifiedCanadianAboriginalSyllabicsExtended
}{"
018B0
}{"
018FF
}
262 \do{VedicExtensions
}{"
01CD0
}{"
01CFF
}
263 % Unicode 6.0 additions
264 \do{AlchemicalSymbols
}{"
01F700
}{"
01F77F
}
265 \do{BamumSupplement
}{"
016800}{"
016A3F
}
266 \do{Batak
}{"
01BC0
}{"
01BFF
}
267 \do{Brahmi
}{"
011000}{"
01107F
}
268 \do{CJKUnifiedIdeographsExtensionD
}{"
02B740
}{"
02B81F
}
269 \do{Emoticons
}{"
01F600
}{"
01F64F
}
270 \do{EthiopicExtendedA
}{"
0AB00
}{"
0AB2F
}
271 \do{KanaSupplement
}{"
01B000
}{"
01B0FF
}
272 \do{Mandaic
}{"
0840}{"
085F
}
273 \do{MiscellaneousSymbolsAndPictographs
}{"
01F300
}{"
01F5FF
}
274 \do{PlayingCards
}{"
01F0A0
}{"
01F0FF
}
275 \do{TransportAndMapSymbols
}{"
01F680
}{"
01F6FF
}
276 % Unicode 6.1 additions
277 \do{ArabicExtendedA
}{"
08A0
}{"
08FF
}
278 \do{ArabicMathematicalAlphabeticSymbols
}{"
01EE00
}{"
01EEFF
}
279 \do{Chakma
}{"
011100}{"
01114F
}
280 \do{MeeteiMayekExtensions
}{"
0AAE0
}{"
0AAFF
}
281 \do{MeroiticCursive
}{"
0109A0
}{"
0109FF
}
282 \do{MeroiticHieroglyphs
}{"
010980}{"
01099F
}
283 \do{Miao
}{"
016F00
}{"
016F9F
}
284 \do{Sharada
}{"
011180}{"
0111DF
}
285 \do{SoraSompeng
}{"
0110D0
}{"
0110FF
}
286 \do{SundaneseSupplement
}{"
01CC0
}{"
01CCF
}
287 \do{Takri
}{"
011680}{"
0116CF
}
288 % Unicode 7.0 additions
289 \do{BassaVah
}{"
016AD0
}{"
016AFF
}
290 \do{CaucasianAlbanian
}{"
010530}{"
01056F
}
291 \do{CombiningDiacriticalMarksExtended
}{"
01AB0
}{"
01AFF
}
292 \do{CopticEpactNumbers
}{"
0102E0
}{"
0102FF
}
293 % Duployan (see below)
294 \do{Elbasan
}{"
010500}{"
01052F
}
295 \do{GeometricShapesExtended
}{"
01F780
}{"
01F7FF
}
296 \do{Grantha
}{"
011300}{"
01137F
}
297 \do{Khojki
}{"
011200}{"
01124F
}
298 \do{Khudawadi
}{"
0112B0
}{"
0112FF
}
299 \do{LatinExtendedE
}{"
0AB30
}{"
0AB6F
}
300 \do{LinearA
}{"
010600}{"
01077F
}
301 \do{Mahajani
}{"
011150}{"
01117F
}
302 % Manichaean (see below)
303 \do{MendeKikakui
}{"
01E800
}{"
01E8DF
}
304 \do{Modi
}{"
011600}{"
01165F
}
305 \do{Mro
}{"
016A40
}{"
016A6F
}
306 \do{MyanmarExtendedB
}{"
0A9E0
}{"
0A9FF
}
307 % Nabataean (see below)
308 % OldNorthArabian (see below)
309 % OldPermic (see below)
310 \do{OrnamentalDingbats
}{"
01F650
}{"
01F67F
}
311 \do{PahawhHmong
}{"
016B00
}{"
016B8F
}
312 % Palmyrene (see below)
313 \do{PauCinHau
}{"
011AC0
}{"
011AFF
}
314 % PsalterPahlavi (see below)
315 % ShorthandFormatControls (see below)
316 \do{Siddham
}{"
011580}{"
0115FF
}
317 \do{SinhalaArchaicNumbers
}{"
0111E0
}{"
0111FF
}
318 \do{SupplementalArrowsC
}{"
01F800
}{"
01F8FF
}
319 \do{Tirhuta
}{"
011480}{"
0114DF
}
320 \do{WarangCiti
}{"
0118A0
}{"
0118FF
}
321 % Unicode 8.0 additions needed for classes
322 \do{CherokeeSupplement
}{"
0AB70
}{"
0ABBF
}
323 \do{CJKUnifiedIdeographsExtensionE
}{"
02B820
}{"
02CEAF
}
324 \do{SupplementalSymbolsAndPictographs
}{"
01F900
}{"
01F9FF
}
325 % Unicode 9.0 additions needed for classes
326 \do{CyrillicExtendedC
}{"
01C80
}{"
01C8F
}
327 \do{GlagoliticSupplement
}{"
01E000
}{"
01E02F
}
328 \do{IdeographicSymbolsAndPunctuation
}{"
016FE0
}{"
016FFF
}
329 \do{MongolianSupplement
}{"
011660}{"
01167F
}
330 % Unicode 10.0 additions needed for classes
331 \do{CJKUnifiedIdeographsExtensionF
}{"
02CEB0
}{"
02EBEF
}
332 \do{KanaExtendedA
}{"
01B100
}{"
01B12F
}
333 \do{SyriacSupplement
}{"
0860}{"
086F
}
334 % Unicode 11.0 additions needed for classes
335 \do{GeorgianExtended
}{"
01C90
}{"
01CBF
}
336 % Unicode 12.0 additions needed for classes
337 \do{SmallKanaExtension
}{"
01B130
}{"
01B16F
}
338 \do{SymbolsAndPictographsExtendedA
}{"
01FA70
}{"
01FAFF
}
339 % Unicode 13.0 additions needed for classes
340 \do{CJKUnifiedIdeographsExtensionG
}{"
030000}{"
03134F
}
341 % Unicode 14.0 additions needed for classes
342 \do{ArabicExtendedB
}{"
0870}{"
089F
}
343 \do{EthiopicExtendedB
}{"
01E7E0
}{"
01E7FF
}
344 \do{KanaExtendedB
}{"
01AFF0
}{"
01AFFF
}
345 \do{LatinExtendedF
}{"
010780}{"
0107BF
}
346 \do{LatinExtendedG
}{"
01DF00
}{"
01DFFF
}
347 \do{UnifiedCanadianAboriginalSyllabicsExtendedA
}{"
011AB0
}{"
011ABF
}
348 % Unicode 15.0 additions needed for classes
349 \do{ArabicExtendedC
}{"
010EC0
}{"
010EFF
}
350 \do{CJKUnifiedIdeographsExtensionH
}{"
031350}{"
0323AF
}
351 \do{CyrillicExtendedD
}{"
01E030
}{"
01E08F
}
352 \do{DevanagariExtendedA
}{"
011B00
}{"
011B5F
}
354 \ifdefined\XeTeXinterwordspaceshaping
355 % Unicode 5.1 block definitions
356 \do{Carian
}{"
0102A0
}{"
0102DF
}
357 \do{Gothic
}{"
010330}{"
01034F
}
358 \do{Kharoshthi
}{"
010A00
}{"
010A5F
}
359 \do{Lycian
}{"
010280}{"
01029F
}
360 \do{Lydian
}{"
010920}{"
01093F
}
361 \do{OldItalic
}{"
010300}{"
01032F
}
362 \do{PhaistosDisc
}{"
0101D0
}{"
0101FF
}
363 \do{SupplementaryPrivateUseAreaA
}{"
0F0000
}{"
0FFFFF
}
364 \do{SupplementaryPrivateUseAreaB
}{"
0100000}{"
010FFFF
}
365 % Unicode 5.2 additions
366 \do{OldSouthArabian
}{"
010A60
}{"
010A7F
}
367 \do{OldTurkic
}{"
010C00
}{"
010C4F
}
368 % Unicode 7.0 additions
369 \do{Duployan
}{"
01BC00
}{"
01BC9F
}
370 \do{Manichaean
}{"
010AC0
}{"
010AFF
}
371 \do{Nabataean
}{"
010880}{"
0108AF
}
372 \do{OldNorthArabian
}{"
010A80
}{"
010A9F
}
373 \do{OldPermic
}{"
010350}{"
01037F
}
374 \do{Palmyrene
}{"
010860}{"
01087F
}
375 \do{PsalterPahlavi
}{"
010B80
}{"
010BAF
}
376 \do{ShorthandFormatControls
}{"
01BCA0
}{"
01BCAF
}
377 % Unicode 8.0 additions
378 \do{Ahom
}{"
011700}{"
01174F
}
379 \do{AnatolianHieroglyphs
}{"
014400}{"
01467F
}
380 \do{EarlyDynasticCuneiform
}{"
012480}{"
01254F
}
381 \do{Hatran
}{"
0108E0
}{"
0108FF
}
382 \do{Multani
}{"
011280}{"
0112AF
}
383 \do{OldHungarian
}{"
010C80
}{"
010CFF
}
384 \do{SuttonSignWriting
}{"
01D800
}{"
01DAAF
}
385 % Unicode 9.0 additions
386 \do{Adlam
}{"
01E900
}{"
01E95F
}
387 \do{Bhaiksuki
}{"
011C00
}{"
011C6F
}
388 \do{Marchen
}{"
011C70
}{"
011CBF
}
389 \do{Newa
}{"
011400}{"
01147F
}
390 \do{Osage
}{"
0104B0
}{"
0104FF
}
391 \do{Tangut
}{"
017000}{"
0187FF
}
392 \do{TangutComponents
}{"
018800}{"
018AFF
}
393 % Unicode 10.0 additions
394 \do{MasaramGondi
}{"
011D00
}{"
011D5F
}
395 \do{Nushu
}{"
01B170
}{"
01B2FF
}
396 \do{Soyombo
}{"
011A50
}{"
011AAF
}
397 \do{ZanabazarSquare
}{"
011A00
}{"
011A4F
}
398 % Unicode 11.0 additions
399 \do{ChessSymbols
}{"
01FA00
}{"
01FA6F
}
400 \do{Dogra
}{"
011800}{"
01184F
}
401 \do{GunjalaGondi
}{"
011D60
}{"
011DAF
}
402 \do{HanifiRohingya
}{"
010D00
}{"
010D3F
}
403 \do{IndicSiyaqNumbers
}{"
01EC70
}{"
01ECBF
}
404 \do{Makasar
}{"
011EE0
}{"
011EFF
}
405 \do{MayanNumerals
}{"
01D2E0
}{"
01D2FF
}
406 \do{Medefaidrin
}{"
016E40
}{"
016E9F
}
407 \do{OldSogdian
}{"
010F00
}{"
010F2F
}
408 \do{Sogdian
}{"
010F30
}{"
010F6F
}
409 % Unicode 12.0 additions
410 % The range was extended in Unicode 15.0
411 \do{EgyptianHieroglyphFormatControls
}{"
013430}{"
01345F
}
412 \do{Elymaic
}{"
010FE0
}{"
010FFF
}
413 \do{Nandinagari
}{"
0119A0
}{"
0119FF
}
414 \do{NyiakengPuachueHmong
}{"
01E100
}{"
01E14F
}
415 \do{OttomanSiyaqNumbers
}{"
01ED00
}{"
01ED4F
}
416 \do{TamilSupplement
}{"
011FC0
}{"
011FFF
}
417 \do{Wancho
}{"
01E2C0
}{"
01E2FF
}
418 % Unicode 13.0 additions
419 \do{Chorasmian
}{"
010FB0
}{"
010FDF
}
420 \do{DivesAkuru
}{"
011900}{"
01195F
}
421 \do{KhitanSmallScript
}{"
018B00
}{"
018CFF
}
422 \do{LisuSupplement
}{"
011FB0
}{"
011FBF
}
423 \do{SymbolsForLegacyComputing
}{"
01FB00
}{"
01FBFF
}
424 \do{TangutSupplement
}{"
018D00
}{"
018D7F
}
425 \do{Yezidi
}{"
010E80
}{"
010EBF
}
426 % Unicode 14.0 additions
427 \do{CyproMinoan
}{"
012F90
}{"
012FFF
}
428 \do{OldUighur
}{"
010F70
}{"
010FAF
}
429 \do{Tangsa
}{"
016A70
}{"
016ACF
}
430 \do{Toto
}{"
01E290
}{"
01E2BF
}
431 \do{Vithkuqi
}{"
010570}{"
0105BF
}
432 \do{ZnamennyMusicalNotation
}{"
01CF00
}{"
01CFCF
}
433 % Unicode 15.0 additions
434 \do{KaktovikNumerals
}{"
01D2C0
}{"
01D2DF
}
435 \do{Kawi
}{"
011F00
}{"
011F5F
}
436 \do{NagMundari
}{"
01E4D0
}{"
01E4FF
}
440 % ----------------------------------------------------------------------------
441 % Option handling lets the user turn off "load all" and selectively enable
442 % only those blocks they are interested in
443 % ----------------------------------------------------------------------------
445 % Each option starts with \overrideClassLoading; so any specified
446 % option will set |\if@overrideClassLoading| to true; when one has
447 % been scanned it's not necessary to set the conditional again. Then
448 % for block X we let \enableX to \@empty so that later on we can check
451 \newif\if@overrideClassLoading
452 \newcommand{\overrideClassLoading}{\@overrideClassLoadingtrue
453 \let\overrideClassLoading\relax}
455 \def\do#1#2#3{\DeclareOption{#1}%
456 {\overrideClassLoading\expandafter\let\csname enable
#1\endcsname\@empty
}}
457 % We execute the list with this definition of \do
461 % We define lists also for these groups
465 \doclass{CanadianSyllabics
}
466 \doclass{CherokeeFull
}
472 \doclass{EthiopicFull
}
473 \doclass{GeorgianFull
}
478 \doclass{Mathematics
}
479 \doclass{MongolianFull
}
480 \doclass{MyanmarFull
}
482 \doclass{Punctuation
}
483 \doclass{SundaneseFull
}
496 \do{ArabicPresentationFormsA
}
497 \do{ArabicPresentationFormsB
}
498 \do{ArabicSupplement
}
501 \def\CanadianSyllabicsClasses{
502 \do{UnifiedCanadianAboriginalSyllabics
}
503 \do{UnifiedCanadianAboriginalSyllabicsExtended
}
504 \do{UnifiedCanadianAboriginalSyllabicsExtendedA
}
507 \def\CherokeeFullClasses{
509 \do{CherokeeSupplement
}
514 \do{BopomofoExtended
}
515 \do{CJKCompatibility
}
516 \do{CJKCompatibilityForms
}
517 \do{CJKCompatibilityIdeographs
}
518 \do{CJKCompatibilityIdeographsSupplement
}
519 \do{CJKRadicalsSupplement
}
521 \do{CJKSymbolsAndPunctuation
}
522 \do{CJKUnifiedIdeographs
}
523 \do{CJKUnifiedIdeographsExtensionA
}
524 \do{CJKUnifiedIdeographsExtensionB
}
525 \do{CJKUnifiedIdeographsExtensionC
}
526 \do{CJKUnifiedIdeographsExtensionD
}
527 \do{CJKUnifiedIdeographsExtensionE
}
528 \do{CJKUnifiedIdeographsExtensionF
}
529 \do{CJKUnifiedIdeographsExtensionG
}
530 \do{CJKUnifiedIdeographsExtensionH
}
531 \do{EnclosedCJKLettersAndMonths
}
532 \do{EnclosedIdeographicSupplement
}
533 \do{IdeographicDescriptionCharacters
}
534 \do{IdeographicSymbolsAndPunctuation
}
540 \do{BopomofoExtended
}
541 \do{CJKCompatibility
}
542 \do{CJKCompatibilityForms
}
543 \do{CJKCompatibilityIdeographs
}
544 \do{CJKCompatibilityIdeographsSupplement
}
545 \do{CJKRadicalsSupplement
}
547 \do{CJKSymbolsAndPunctuation
}
548 \do{CJKUnifiedIdeographs
}
549 \do{CJKUnifiedIdeographsExtensionA
}
550 \do{CJKUnifiedIdeographsExtensionB
}
551 \do{CJKUnifiedIdeographsExtensionC
}
552 \do{CJKUnifiedIdeographsExtensionD
}
553 \do{CJKUnifiedIdeographsExtensionE
}
554 \do{CJKUnifiedIdeographsExtensionF
}
555 \do{CJKUnifiedIdeographsExtensionG
}
556 \do{EnclosedCJKLettersAndMonths
}
557 \do{EnclosedIdeographicSupplement
}
558 \do{HalfwidthAndFullwidthForms
}
559 \do{HangulCompatibilityJamo
}
561 \do{HangulJamoExtendedA
}
562 \do{HangulJamoExtendedB
}
565 \do{IdeographicDescriptionCharacters
}
566 \do{IdeographicSymbolsAndPunctuation
}
573 \do{KatakanaPhoneticExtensions
}
574 \do{SmallKanaExtension
}
577 \def\CyrillicsClasses{
579 \do{CyrillicExtendedA
}
580 \do{CyrillicExtendedB
}
581 \do{CyrillicExtendedC
}
582 \do{CyrillicExtendedD
}
583 \do{CyrillicSupplement
}
584 \do{GlagoliticSupplement
}
588 \def\DevanagariClasses{
590 \do{DevanagariPostDanDa
}
591 \do{DevanagariPostMarks
}
592 \do{DevanagariPreMarks
}
593 \do{DevanagariExtendedA
}
596 \def\DiacriticsClasses{
597 \do{CombiningDiacriticalMarks
}
598 \do{CombiningDiacriticalMarksExtended
}
599 \do{CombiningDiacriticalMarksForSymbols
}
600 \do{CombiningDiacriticalMarksSupplement
}
601 \do{CombiningHalfMarks
}
602 \do{ModifierToneLetters
}
603 \do{SpacingModifierLetters
}
606 \def\EthiopicFullClasses{
608 \do{EthiopicExtended
}
609 \do{EthiopicExtendedA
}
610 \do{EthiopicExtendedB
}
611 \do{EthiopicSupplement
}
614 \def\GeorgianFullClasses{
616 \do{GeorgianExtended
}
617 \do{GeorgianSupplement
}
622 \do{CopticEpactNumbers
}
628 \do{HangulCompatibilityJamo
}
630 \do{HangulJamoExtendedA
}
631 \do{HangulJamoExtendedB
}
635 \def\JapaneseClasses{
636 \do{CJKUnifiedIdeographs
}
637 \do{HalfwidthAndFullwidthForms
}
644 \do{KatakanaPhoneticExtensions
}
648 \do{AlphabeticPresentationForms
}
650 \do{LatinExtendedAdditional
}
661 \def\MathematicsClasses{
662 \do{ArabicMathematicalAlphabeticSymbols
}
663 \do{MathematicalAlphanumericSymbols
}
664 \do{MathematicalOperators
}
665 \do{MiscellaneousMathematicalSymbolsA
}
666 \do{MiscellaneousMathematicalSymbolsB
}
667 \do{SupplementalMathematicalOperators
}
670 \def\MongolianFullClasses{
672 \do{MongolianSupplement
}
675 \def\MyanmarFullClasses{
677 \do{MyanmarExtendedA
}
678 \do{MyanmarExtendedB
}
681 \def\PhoneticsClasses{
683 \do{PhoneticExtensions
}
684 \do{PhoneticExtensionsSupplement
}
687 \def\PunctuationClasses{
688 \do{GeneralPunctuation
}
689 \do{SupplementalPunctuation
}
692 \def\SundaneseFullClasses{
694 \do{SundaneseSupplement
}
698 \do{AlchemicalSymbols
}
701 \do{ByzantineMusicalSymbols
}
707 \do{GeometricShapesExtended
}
708 \do{LetterlikeSymbols
}
709 \do{MiscellaneousSymbols
}
710 \do{MiscellaneousSymbolsAndArrows
}
711 \do{MiscellaneousSymbolsAndPictographs
}
712 \do{MiscellaneousTechnical
}
714 \do{OrnamentalDingbats
}
715 \do{SupplementalArrowsA
}
716 \do{SupplementalArrowsB
}
717 \do{SupplementalArrowsC
}
718 \do{SupplementalSymbolsAndPictographs
}
719 \do{SymbolsAndPictographsExtendedA
}
720 \do{TransportAndMapSymbols
}
723 \def\SyriacFullClasses{
725 \do{SyriacSupplement
}
728 \def\VedicMarksClasses{
729 \do{DevanagariExtended
}
739 % While adding scripts defined in more recent Unicode versions it was
740 % necessary to move some scripts into the block for XeTeX 0.99994 and newer;
741 % those are tagged with a 'see below' comment.
745 % AnatolianHieroglyphs (see below)
746 \do{AncientGreekMusicalNotation
}
747 \do{AncientGreekNumbers
}
764 \do{CaucasianAlbanian
}
766 \do{CommonIndicNumberForms
}
768 \do{CountingRodNumerals
}
770 \do{CuneiformNumbersAndPunctuation
}
771 \do{CypriotSyllabary
}
774 % Duployan (see below)
775 \do{EarlyDynasticCuneiform
}
776 \do{EgyptianHieroglyphs
}
778 \do{EnclosedAlphanumerics
}
779 \do{EnclosedAlphanumericSupplement
}
788 \do{InscriptionalPahlavi
}
789 \do{InscriptionalParthian
}
794 % Kharoshthi (see below)
803 \do{LinearBIdeograms
}
804 \do{LinearBSyllabary
}
812 % Manichaean (see below)
814 \do{MeeteiMayekExtensions
}
817 \do{MeroiticHieroglyphs
}
822 % Nabataean (see below)
827 % OldHungarian (see below)
828 % OldItalic (see below)
829 % OldNorthArabian (see below)
830 % OldPermic (see below)
832 % OldSouthArabian (see below)
833 % OldTurkic (see below)
834 \do{OpticalCharacterRecognition
}
838 % Palmyrene (see below)
841 % PhaistosDisc (see below)
845 % PsalterPahlavi (see below)
847 \do{RumiNumeralSymbols
}
853 % ShorthandFormatControls (see below)
856 \do{SinhalaArchaicNumbers
}
857 \do{SmallFormVariants
}
859 \do{SuperscriptsAndSubscripts
}
860 % SupplementaryPrivateUseAreaA (see below)
861 % SupplementaryPrivateUseAreaB (see below)
862 % SuttonSignWriting (see below)
870 \do{TaiXuanJingSymbols
}
883 \do{YijingHexagramSymbols
}
885 \ifdefined\XeTeXinterwordspaceshaping
888 \do{AnatolianHieroglyphs
}
897 \do{EgyptianHieroglyphFormatControls
}
899 \do{GeorgianExtended
}
903 \do{IndicSiyaqNumbers
}
904 \do{KaktovikNumerals
}
908 \do{KhitanSmallScript
}
923 \do{NyiakengPuachueHmong
}
933 \do{OttomanSiyaqNumbers
}
937 \do{ShorthandFormatControls
}
940 \do{SupplementaryPrivateUseAreaA
}
941 \do{SupplementaryPrivateUseAreaB
}
942 \do{SuttonSignWriting
}
943 \do{SymbolsForLegacyComputing
}
947 \do{TangutComponents
}
948 \do{TangutSupplement
}
954 \do{ZnamennyMusicalNotation
}
958 % For each class group Z we define the relative option
959 % \DeclareOption{Z}{\overrideClassLoading\enableX1\enableX2...\enableXn}
960 % where X1, X2, ..., Xn are the blocks belonging to class Z
963 \unexpanded{\expandafter\let\csname enable
#1\endcsname\@empty
}}
965 \begingroup\edef\x{\endgroup\noexpand\DeclareOption{#1}{%
966 \noexpand\overrideClassLoading\csname #1Classes
\endcsname}}\x}
970 \ProcessOptions\relax
972 % If no option has been given, \if@overrideClassLoading will still be
973 % false, and in this case we enable *all* blocks (again by defining
974 % \enableX equal to \@empty for each block X
976 \if@overrideClassLoading
\else
977 \def\do#1#2#3{\expandafter\let\csname enable
#1\endcsname\@empty
}
981 % ----------------------------------------------------------------------------
982 % After dealing with the options, make sure we have the necessary packages available
983 % ----------------------------------------------------------------------------
985 % because this package relies on XeTeX's intercharclass sytem, better require XeTeX
986 \RequirePackage{ifxetex
}
989 % ----------------------------------------------------------------------------
990 % This package heavily exploits XeTeX's intercharclass system!
991 % ----------------------------------------------------------------------------
993 % enable/disable commands
994 \newcommand{\disableTransitionRules}{\XeTeXinterchartokenstate =
\z@
}
995 \newcommand{\enableTransitionRules}{\XeTeXinterchartokenstate = \@ne
}
998 \let\uccoff\disableTransitionRules
999 \let\uccon\enableTransitionRules
1001 % make sure it's turned on
1002 \enableTransitionRules
1004 % ----------------------------------------------------------------------------
1005 % And now, finally, we can start loading all the requested blocks
1006 % ----------------------------------------------------------------------------
1008 % \message{Package ucharclasses Message: Assigning character classes per
1009 % Unicode block (this may take a while)}
1011 %% We record the last allocated class before allocating ours;
1012 %% \newXeTeXintercharclass saves in the counter
1013 %% \xe@alloc@intercharclass the last allocated class number; initially
1014 %% it's 3, but some other code might have allocated interchar classes
1015 %% before loading this package; if \enableX is defined (to \@empty,
1016 %% but that's irrelevant), an intercharclass is allocated by using the
1017 %% list \AllClasses; two cases for block X:
1019 %% (1) \enableX is defined: then \do{X}{a}{b} will become
1020 %% \@defineUnicodeClass{X}{a}{b} which in turn will execute
1021 %% \newXeTeXintercharclass\XClass and start a loop assigning code
1022 %% points from a to b to this class
1024 %% (2) \enableX is not defined: then \do{X}{a}{b} will become
1025 %% \@gobblethree{X}{a}{b} and so nothing will be performed
1027 \chardef\@classstart=
\xe@alloc@intercharclass
1029 \providecommand\@gobblethree
[3]{}
1031 \ifcsname enable
#1\endcsname
1032 \expandafter\@defineUnicodeClass
1034 \expandafter\@gobblethree
1037 \def\@defineUnicodeClass
#1#2#3{%
1038 \if@ucharclassverbose
\typeout{Defining
#1 Class
}\fi
1039 \expandafter\newXeTeXintercharclass\csname #1Class
\endcsname
1042 \if@ucharclassverbose
1043 \typeout{\XeTeXcharclass\number\count@=
1044 \expandafter\string\csname #1Class
\endcsname}%
1046 \XeTeXcharclass\count@=
\csname #1Class
\endcsname
1053 % finally, we record the end of our charclass range
1054 \chardef\@classend=
\xe@alloc@intercharclass
1057 %%% Our assigned classes go from \@classstart (excluded) to \@classend (included)
1059 % ----------------------------------------------------------------------------
1060 % Use: \setTransitionsFor{block name}{when entering this block}{when leaving this block}
1061 % ----------------------------------------------------------------------------
1063 \def\setTransitionsFor#1#2#3{%
1064 \ifcsname enable
#1\endcsname
1065 \count@=\@classstart
1066 \loop\ifnum\count@<\@classend
1068 \ifnum\count@=
\csname #1Class
\endcsname\else
1069 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
1070 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#3}%
1073 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
1074 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#3}%
1076 \if@ucharclassverbose
1077 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1083 % ----------------------------------------------------------------------------
1084 % Use: \setTransitionTo{block name}{what to do when entering this block}
1085 % ----------------------------------------------------------------------------
1087 \def\setTransitionTo#1#2{%
1088 \ifcsname enable
#1\endcsname
1089 \count@=\@classstart
1090 \loop\ifnum\count@<\@classend
1092 \ifnum\count@=
\csname #1Class
\endcsname\else
1093 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
1096 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
1098 \if@ucharclassverbose
1099 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1105 % ----------------------------------------------------------------------------
1106 % Use: \setTransitionFrom{block name }{what to do when leaving this block}
1107 % ----------------------------------------------------------------------------
1109 \def\setTransitionFrom#1#2{%
1110 \ifcsname enable
#1\endcsname
1111 \count@=\@classstart
1112 \loop\ifnum\count@<\@classend
1114 \ifnum\count@=
\csname #1Class
\endcsname\else
1115 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#2}%
1118 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#2}%
1120 \if@ucharclassverbose
1121 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1127 % ----------------------------------------------------------------------------
1128 % Informal Block Rules - for these, to/from must always be defined
1130 % Available informal groups are:
1133 % - CanadianSyllabics
1135 % - Chinese (including bopomofo)
1136 % - CJK (Chinese/Japanese/Korean)
1143 % - Japanese (it is advised to set CJK first to a catch-all, then set
1144 % Japanese for specifics)
1145 % - Korean (=Hangul) (same comment as for Japanese)
1157 % - Other (I am not a fan of lump groups. I hope to un-lump most of it)
1159 % ----------------------------------------------------------------------------
1161 %% For each class group Z we define \setTransitionsForX as
1162 %% \newcommand\setTransitionsForZ[2]{%
1163 %% \setTransitionsFor{X1}{#1}{#2}
1164 %% \setTransitionsFor{X2}{#1}{#2}
1166 %% \setTransitionsFor{Xn}{#1}{#2}}
1167 %% where X1, X2, ..., Xn are the blocks in group Z
1169 \def\do#1{\noexpand\setTransitionsFor{#1}{###
#1}{###
#2}}
1171 \begingroup\edef\x{\endgroup
1172 \noexpand\newcommand
1173 \unexpanded\expandafter{\csname setTransitionsFor
#1\endcsname}[2]%
1174 {\csname #1Classes
\endcsname}}\x}
1178 % ----------------------------------------------------------------------------
1180 % based on the previous informal groups, we can define a catch-all transition
1183 % ----------------------------------------------------------------------------
1185 %% The following is equivalent to define
1186 %% \newcommand{\setDefaultTransitions[2]{
1187 %% \setTransitionsForArabic{#1}{#2}
1189 %% \setTransitionsForOther{#1}{#2}}
1192 \expandafter\noexpand\csname setTransitionsFor
#1\endcsname{###
#1}{###
#2}}
1193 \begingroup\edef\x{\endgroup
1194 \noexpand\newcommand\noexpand\setDefaultTransitions[2]{%
1197 % ----------------------------------------------------------------------------
1199 \let\do\@undefined
\let\doclass\@undefined
1202 % End of file `ucharclasses.sty'.