1 % ----------------------------------------------------------------------------
2 % This "ucharclasses" package sets up XeTeX character classes based on which
3 % unicode block a character is found in. It then allows transition rules to be defined
4 % when entering or leaving particular unicode blocks, the code of which gets inserted
5 % automatically when a transition from a character from one unicode block to a
6 % character from another unicode block is encountered by XeTeX
8 % Current compatibility should be Unicode 10.0.
11 % v2.1-2.3: Qing Lee, Werner Lemberg
12 % v2.0: Enrico Gregorio
13 % v1.0: Mike "Pomax" Kamermans
15 % Significant updates:
16 % v2.4: Unicode 13 support
17 % v2.3: Unicode 10 support
18 % v2.2: Unicode 8.0 and LaTeX2e support
19 % v2.1: Uplift for the intercharclass updates introduced in XeTeX 0.99994
20 % v2.0: Rewritten to Vastly improve performance.
21 % v1.0: Unicode block switching using XeTeX intercharclasses.
23 % License: public domain (https://www.ctan.org/license/pd)
25 % ----------------------------------------------------------------------------
27 \ProvidesPackage{ucharclasses
}[2021/
02/
16 v2.4
.0 Unicode block character classes for XeLaTeX
]
29 \newif\if@ucharclassverbose
30 \DeclareOption{verbose
}{\@ucharclassverbosetrue
}
32 % ----------------------------------------------------------------------------
33 % The package options allow you to selectively enable certain unicode blocks
34 % ----------------------------------------------------------------------------
36 % We first define all blocks in a list together with their start and end
39 % Starting with XeTeX version 3.14159265-2.6-0.99994, the number of
40 % \XeTeXcharclass registers was extended from 256 to 4096 entries; some not
41 % so important blocks are thus provided only for this and newer versions.
42 % The boundary of character class was changed from 255 to 4095 correspondingly.
43 % The primitive \XeTeXinterwordspaceshaping was introduced by XeTeX 0.99994;
44 % we use it as a flag to identify this version.
46 % However, earlier version of LaTeX2e (before 2016/04/22 v2.0q) didn't provide
47 % support for 4096 entries; we thus have to override the hard-coded limit.
48 \ifdefined\e@alloc@intercharclass@top
49 \chardef\@ucharclass@boundary=
\e@alloc@intercharclass@top
51 \ifdefined\XeTeXinterwordspaceshaping
52 \chardef\@ucharclass@boundary=
4095 %
53 \def\newXeTeXintercharclass{%
54 \e@alloc
\XeTeXcharclass\chardef
55 \xe@alloc@intercharclass
\m@ne\@ucharclass@boundary
}
57 \chardef\@ucharclass@boundary=\@cclv
62 % Unicode 5.1 block definitions
63 \do{AegeanNumbers
}{"
010100}{"
01013F
}
64 \do{AlphabeticPresentationForms
}{"
0FB00
}{"
0FB4F
}
65 \do{AncientGreekMusicalNotation
}{"
01D200
}{"
01D24F
}
66 \do{AncientGreekNumbers
}{"
010140}{"
01018F
}
67 \do{AncientSymbols
}{"
010190}{"
0101CF
}
68 \do{Arabic
}{"
0600}{"
06FF
}
69 \do{ArabicPresentationFormsA
}{"
0FB50
}{"
0FDFF
}
70 \do{ArabicPresentationFormsB
}{"
0FE70
}{"
0FEFF
}
71 \do{ArabicSupplement
}{"
0750}{"
077F
}
72 \do{Armenian
}{"
0530}{"
058F
}
73 \do{Arrows
}{"
02190}{"
021FF
}
74 \do{Balinese
}{"
01B00
}{"
01B7F
}
75 \do{BasicLatin
}{"
0020}{"
007F
} % 0000..007F in Unicode standard
76 \do{Bengali
}{"
0980}{"
09FF
}
77 \do{BlockElements
}{"
02580}{"
0259F
}
78 \do{Bopomofo
}{"
03100}{"
0312F
}
79 \do{BopomofoExtended
}{"
031A0
}{"
031BF
}
80 \do{BoxDrawing
}{"
02500}{"
0257F
}
81 \do{BraillePatterns
}{"
02800}{"
028FF
}
82 \do{Buginese
}{"
01A00
}{"
01A1F
}
83 \do{Buhid
}{"
01740}{"
0175F
}
84 \do{ByzantineMusicalSymbols
}{"
01D000
}{"
01D0FF
}
86 \do{Cham
}{"
0AA00
}{"
0AA5F
}
87 \do{Cherokee
}{"
013A0
}{"
013FF
}
88 \do{CJKCompatibility
}{"
03300}{"
033FF
}
89 \do{CJKCompatibilityForms
}{"
0FE30
}{"
0FE4F
}
90 \do{CJKCompatibilityIdeographs
}{"
0F900
}{"
0FAFF
}
91 \do{CJKCompatibilityIdeographsSupplement
}{"
02F800
}{"
02FA1F
}
92 \do{CJKRadicalsSupplement
}{"
02E80
}{"
02EFF
}
93 \do{CJKStrokes
}{"
031C0
}{"
031EF
}
94 \do{CJKSymbolsAndPunctuation
}{"
03000}{"
0303F
}
95 \do{CJKUnifiedIdeographs
}{"
04E00
}{"
09FFF
}
96 \do{CJKUnifiedIdeographsExtensionA
}{"
03400}{"
04DBF
}
97 \do{CJKUnifiedIdeographsExtensionB
}{"
020000}{"
02A6DF
}
98 \do{CombiningDiacriticalMarks
}{"
0300}{"
036F
}
99 \do{CombiningDiacriticalMarksForSymbols
}{"
020D0
}{"
020FF
}
100 \do{CombiningDiacriticalMarksSupplement
}{"
01DC0
}{"
01DFF
}
101 \do{CombiningHalfMarks
}{"
0FE20
}{"
0FE2F
}
102 \do{ControlPictures
}{"
02400}{"
0243F
}
103 \do{Coptic
}{"
02C80
}{"
02CFF
}
104 \do{CountingRodNumerals
}{"
01D360
}{"
01D37F
}
105 \do{Cuneiform
}{"
012000}{"
0123FF
}
106 \do{CuneiformNumbersAndPunctuation
}{"
012400}{"
01247F
}
107 \do{CurrencySymbols
}{"
020A0
}{"
020CF
}
108 \do{CypriotSyllabary
}{"
010800}{"
01083F
}
109 \do{Cyrillic
}{"
0400}{"
04FF
}
110 \do{CyrillicExtendedA
}{"
02DE0
}{"
02DFF
}
111 \do{CyrillicExtendedB
}{"
0A640
}{"
0A69F
}
112 \do{CyrillicSupplement
}{"
0500}{"
052F
}
113 \do{Deseret
}{"
010400}{"
01044F
}
114 \do{DevanagariDanDa
}{"
0964}{"
0965}
115 \do{DevanagariMarks
}{"
0951}{"
0954}
116 \do{DevanagariPostDanDa
}{"
0966}{"
097F
}
117 \do{DevanagariPostMarks
}{"
0955}{"
0963}
118 \do{DevanagariPreMarks
}{"
0900}{"
0950}
119 \do{Dingbats
}{"
02700}{"
027BF
}
120 \do{DominoTiles
}{"
01F030
}{"
01F09F
}
121 \do{EnclosedAlphanumerics
}{"
02460}{"
024FF
}
122 \do{EnclosedCJKLettersAndMonths
}{"
03200}{"
032FF
}
123 \do{Ethiopic
}{"
01200}{"
0137F
}
124 \do{EthiopicExtended
}{"
02D80
}{"
02DDF
}
125 \do{EthiopicSupplement
}{"
01380}{"
0139F
}
126 \do{GeneralPunctuation
}{"
02000}{"
0206F
}
127 \do{GeometricShapes
}{"
025A0
}{"
025FF
}
128 \do{Georgian
}{"
010A0
}{"
010FF
}
129 \do{GeorgianSupplement
}{"
02D00
}{"
02D2F
}
130 \do{Glagolitic
}{"
02C00
}{"
02C5F
}
131 \do{Gothic
}{"
010330}{"
01034F
}
132 \do{GreekAndCoptic
}{"
0370}{"
03FF
}
133 \do{GreekExtended
}{"
01F00
}{"
01FFF
}
134 \do{Gujarati
}{"
0A80
}{"
0AFF
}
135 \do{Gurmukhi
}{"
0A00
}{"
0A7F
}
136 \do{HalfwidthAndFullwidthForms
}{"
0FF00
}{"
0FFEF
}
137 \do{HangulCompatibilityJamo
}{"
03130}{"
0318F
}
138 \do{HangulJamo
}{"
01100}{"
011FF
}
139 \do{HangulSyllables
}{"
0AC00
}{"
0D7AF
}
140 \do{Hanunoo
}{"
01720}{"
0173F
}
141 \do{Hebrew
}{"
0590}{"
05FF
}
142 \do{Hiragana
}{"
03040}{"
0309F
}
143 \do{IdeographicDescriptionCharacters
}{"
02FF0
}{"
02FFF
}
144 \do{IPAExtensions
}{"
0250}{"
02AF
}
145 \do{Kanbun
}{"
03190}{"
0319F
}
146 \do{KangxiRadicals
}{"
02F00
}{"
02FDF
}
147 \do{Kannada
}{"
0C80
}{"
0CFF
}
148 \do{Katakana
}{"
030A0
}{"
030FF
}
149 \do{KatakanaPhoneticExtensions
}{"
031F0
}{"
031FF
}
150 \do{KayahLi
}{"
0A900
}{"
0A92F
}
151 \do{Kharoshthi
}{"
010A00
}{"
010A5F
}
152 \do{Khmer
}{"
01780}{"
017FF
}
153 \do{KhmerSymbols
}{"
019E0
}{"
019FF
}
154 \do{Lao
}{"
0E80
}{"
0EFF
}
155 \do{LatinExtendedAdditional
}{"
01E00
}{"
01EFF
}
156 \do{LatinExtendedA
}{"
0100}{"
017F
}
157 \do{LatinExtendedB
}{"
0180}{"
024F
}
158 \do{LatinExtendedC
}{"
02C60
}{"
02C7F
}
159 \do{LatinExtendedD
}{"
0A720
}{"
0A7FF
}
160 \do{LatinSupplement
}{"
0080}{"
00FF
}
161 \do{Lepcha
}{"
01C00
}{"
01C4F
}
162 \do{LetterlikeSymbols
}{"
02100}{"
0214F
}
163 \do{Limbu
}{"
01900}{"
0194F
}
164 \do{LinearBIdeograms
}{"
010080}{"
0100FF
}
165 \do{LinearBSyllabary
}{"
010000}{"
01007F
}
166 \do{Lycian
}{"
010280}{"
01029F
}
167 \do{Lydian
}{"
010920}{"
01093F
}
168 \do{MahjongTiles
}{"
01F000
}{"
01F02F
}
169 \do{Malayalam
}{"
0D00
}{"
0D7F
}
170 \do{MathematicalAlphanumericSymbols
}{"
01D400
}{"
01D7FF
}
171 \do{MathematicalOperators
}{"
02200}{"
022FF
}
172 \do{MiscellaneousMathematicalSymbolsA
}{"
027C0
}{"
027EF
}
173 \do{MiscellaneousMathematicalSymbolsB
}{"
02980}{"
029FF
}
174 \do{MiscellaneousSymbols
}{"
02600}{"
026FF
}
175 \do{MiscellaneousSymbolsAndArrows
}{"
02B00
}{"
02BFF
}
176 \do{MiscellaneousTechnical
}{"
02300}{"
023FF
}
177 \do{ModifierToneLetters
}{"
0A700
}{"
0A71F
}
178 \do{Mongolian
}{"
01800}{"
018AF
}
179 \do{MusicalSymbols
}{"
01D100
}{"
01D1FF
}
180 \do{Myanmar
}{"
01000}{"
0109F
}
181 \do{NewTaiLue
}{"
01980}{"
019DF
}
182 \do{NKo
}{"
07C0
}{"
07FF
}
183 \do{NumberForms
}{"
02150}{"
0218F
}
184 \do{Ogham
}{"
01680}{"
0169F
}
185 \do{OlChiki
}{"
01C50
}{"
01C7F
}
186 % OldItalic (see below)
187 \do{OldPersian
}{"
0103A0
}{"
0103DF
}
188 \do{OpticalCharacterRecognition
}{"
02440}{"
0245F
}
189 \do{Oriya
}{"
0B00
}{"
0B7F
}
190 \do{Osmanya
}{"
010480}{"
0104AF
}
191 \do{PhagsPa
}{"
0A840
}{"
0A87F
}
192 % PhaistosDisc (see below)
193 \do{Phoenician
}{"
010900}{"
01091F
}
194 \do{PhoneticExtensions
}{"
01D00
}{"
01D7F
}
195 \do{PhoneticExtensionsSupplement
}{"
01D80
}{"
01DBF
}
196 \do{PrivateUseArea
}{"
0E000
}{"
0F8FF
}
197 \do{Rejang
}{"
0A930
}{"
0A95F
}
198 \do{Runic
}{"
016A0
}{"
016FF
}
199 \do{Saurashtra
}{"
0A880
}{"
0A8DF
}
200 \do{Shavian
}{"
010450}{"
01047F
}
201 \do{Sinhala
}{"
0D80
}{"
0DFF
}
202 \do{SmallFormVariants
}{"
0FE50
}{"
0FE6F
}
203 \do{SpacingModifierLetters
}{"
02B0
}{"
02FF
}
204 \do{Sundanese
}{"
01B80
}{"
01BBF
}
205 \do{SuperscriptsAndSubscripts
}{"
02070}{"
0209F
}
206 \do{SupplementalArrowsA
}{"
027F0
}{"
027FF
}
207 \do{SupplementalArrowsB
}{"
02900}{"
0297F
}
208 \do{SupplementalMathematicalOperators
}{"
02A00
}{"
02AFF
}
209 \do{SupplementalPunctuation
}{"
02E00
}{"
02E7F
}
210 % SupplementaryPrivateUseAreaA (see below)
211 % SupplementaryPrivateUseAreaB (see below)
212 \do{SylotiNagri
}{"
0A800
}{"
0A82F
}
213 \do{Syriac
}{"
0700}{"
074F
}
214 \do{Tagalog
}{"
01700}{"
0171F
}
215 \do{Tagbanwa
}{"
01760}{"
0177F
}
216 \do{Tags
}{"
0E0000
}{"
0E007F
}
217 \do{TaiLe
}{"
01950}{"
0197F
}
218 \do{TaiXuanJingSymbols
}{"
01D300
}{"
01D35F
}
219 \do{Tamil
}{"
0B80
}{"
0BFF
}
220 \do{Telugu
}{"
0C00
}{"
0C7F
}
221 \do{Thaana
}{"
0780}{"
07BF
}
222 \do{Thai
}{"
0E00
}{"
0E7F
}
223 \do{Tibetan
}{"
0F00
}{"
0FFF
}
224 \do{Tifinagh
}{"
02D30
}{"
02D7F
}
225 \do{Ugaritic
}{"
010380}{"
01039F
}
226 \do{UnifiedCanadianAboriginalSyllabics
}{"
01400}{"
0167F
}
227 \do{Vai
}{"
0A500
}{"
0A63F
}
228 \do{VerticalForms
}{"
0FE10
}{"
0FE1F
}
229 \do{YiRadicals
}{"
0A490
}{"
0A4CF
}
230 \do{YiSyllables
}{"
0A000
}{"
0A48F
}
231 \do{YijingHexagramSymbols
}{"
04DC0
}{"
04DFF
}
232 % Unicode 5.2 additions
233 \do{Avestan
}{"
010B00
}{"
010B3F
}
234 \do{Bamum
}{"
0A6A0
}{"
0A6FF
}
235 \do{CJKUnifiedIdeographsExtensionC
}{"
02A700
}{"
02B73F
}
236 \do{CommonIndicNumberForms
}{"
0A830
}{"
0A83F
}
237 \do{DevanagariExtended
}{"
0A8E0
}{"
0A8FF
}
238 \do{EgyptianHieroglyphs
}{"
013000}{"
01342F
}
239 \do{EnclosedAlphanumericSupplement
}{"
01F100
}{"
01F1FF
}
240 \do{EnclosedIdeographicSupplement
}{"
01F200
}{"
01F2FF
}
241 \do{HangulJamoExtendedA
}{"
0A960
}{"
0A97F
}
242 \do{HangulJamoExtendedB
}{"
0D7B0
}{"
0D7FF
}
243 \do{ImperialAramaic
}{"
010840}{"
01085F
}
244 \do{InscriptionalPahlavi
}{"
010B60
}{"
010B7F
}
245 \do{InscriptionalParthian
}{"
010B40
}{"
010B5F
}
246 \do{Javanese
}{"
0A980
}{"
0A9DF
}
247 \do{Kaithi
}{"
011080}{"
0110CF
}
248 \do{Lisu
}{"
0A4D0
}{"
0A4FF
}
249 \do{MeeteiMayek
}{"
0ABC0
}{"
0ABFF
}
250 \do{MyanmarExtendedA
}{"
0AA60
}{"
0AA7F
}
251 % OldSouthArabian (see below)
252 % OldTurkic (see below)
253 \do{RumiNumeralSymbols
}{"
010E60
}{"
010E7F
}
254 \do{Samaritan
}{"
0800}{"
083F
}
255 \do{TaiTham
}{"
01A20
}{"
01AAF
}
256 \do{TaiViet
}{"
0AA80
}{"
0AADF
}
257 \do{UnifiedCanadianAboriginalSyllabicsExtended
}{"
018B0
}{"
018FF
}
258 \do{VedicExtensions
}{"
01CD0
}{"
01CFF
}
259 % Unicode 6.0 additions
260 \do{AlchemicalSymbols
}{"
01F700
}{"
01F77F
}
261 \do{BamumSupplement
}{"
016800}{"
016A3F
}
262 \do{Batak
}{"
01BC0
}{"
01BFF
}
263 \do{Brahmi
}{"
011000}{"
01107F
}
264 \do{CJKUnifiedIdeographsExtensionD
}{"
02B740
}{"
02B81F
}
265 \do{Emoticons
}{"
01F600
}{"
01F64F
}
266 \do{EthiopicExtendedA
}{"
0AB00
}{"
0AB2F
}
267 \do{KanaSupplement
}{"
01B000
}{"
01B0FF
}
268 \do{Mandaic
}{"
0840}{"
085F
}
269 \do{MiscellaneousSymbolsAndPictographs
}{"
01F300
}{"
01F5FF
}
270 \do{PlayingCards
}{"
01F0A0
}{"
01F0FF
}
271 \do{TransportAndMapSymbols
}{"
01F680
}{"
01F6FF
}
272 % Unicode 6.1 additions
273 \do{ArabicExtendedA
}{"
08A0
}{"
08FF
}
274 \do{ArabicMathematicalAlphabeticSymbols
}{"
01EE00
}{"
01EEFF
}
275 \do{Chakma
}{"
011100}{"
01114F
}
276 \do{MeeteiMayekExtensions
}{"
0AAE0
}{"
0AAFF
}
277 \do{MeroiticCursive
}{"
0109A0
}{"
0109FF
}
278 \do{MeroiticHieroglyphs
}{"
010980}{"
01099F
}
279 \do{Miao
}{"
016F00
}{"
016F9F
}
280 \do{Sharada
}{"
011180}{"
0111DF
}
281 \do{SoraSompeng
}{"
0110D0
}{"
0110FF
}
282 \do{SundaneseSupplement
}{"
01CC0
}{"
01CCF
}
283 \do{Takri
}{"
011680}{"
0116CF
}
284 % Unicode 7.0 additions
285 \do{BassaVah
}{"
016AD0
}{"
016AFF
}
286 \do{CaucasianAlbanian
}{"
010530}{"
01056F
}
287 \do{CombiningDiacriticalMarksExtended
}{"
01AB0
}{"
01AFF
}
288 \do{CopticEpactNumbers
}{"
0102E0
}{"
0102FF
}
289 % Duployan (see below)
290 \do{Elbasan
}{"
010500}{"
01052F
}
291 \do{GeometricShapesExtended
}{"
01F780
}{"
01F7FF
}
292 \do{Grantha
}{"
011300}{"
01137F
}
293 \do{Khojki
}{"
011200}{"
01124F
}
294 \do{Khudawadi
}{"
0112B0
}{"
0112FF
}
295 \do{LatinExtendedE
}{"
0AB30
}{"
0AB6F
}
296 \do{LinearA
}{"
010600}{"
01077F
}
297 \do{Mahajani
}{"
011150}{"
01117F
}
298 \do{Manichaean
}{"
010AC0
}{"
010AFF
}
299 \do{MendeKikakui
}{"
01E800
}{"
01E8DF
}
300 \do{Modi
}{"
011600}{"
01165F
}
301 \do{Mro
}{"
016A40
}{"
016A6F
}
302 \do{MyanmarExtendedB
}{"
0A9E0
}{"
0A9FF
}
303 \do{Nabataean
}{"
010880}{"
0108AF
}
304 % OldNorthArabian (see below)
305 \do{OldPermic
}{"
010350}{"
01037F
}
306 \do{OrnamentalDingbats
}{"
01F650
}{"
01F67F
}
307 \do{PahawhHmong
}{"
016B00
}{"
016B8F
}
308 \do{Palmyrene
}{"
010860}{"
01087F
}
309 \do{PauCinHau
}{"
011AC0
}{"
011AFF
}
310 \do{PsalterPahlavi
}{"
010B80
}{"
010BAF
}
311 % ShorthandFormatControls (see below)
312 \do{Siddham
}{"
011580}{"
0115FF
}
313 \do{SinhalaArchaicNumbers
}{"
0111E0
}{"
0111FF
}
314 \do{SupplementalArrowsC
}{"
01F800
}{"
01F8FF
}
315 \do{Tirhuta
}{"
011480}{"
0114DF
}
316 \do{WarangCiti
}{"
0118A0
}{"
0118FF
}
317 % Unicode 8.0 additions
319 % AnatolianHieroglyphs (see below)
320 \do{CherokeeSupplement
}{"
0AB70
}{"
0ABBF
}
321 \do{CJKUnifiedIdeographsExtensionE
}{"
02B820
}{"
02CEAF
}
322 % EarlyDynasticCuneiform (see below)
324 % Multani (see below)
325 \do{OldHungarian
}{"
010C80
}{"
010CFF
}
326 \do{SupplementalSymbolsAndPictographs
}{"
01F900
}{"
01F9FF
}
327 % SuttonSignWriting (see below)
328 % Unicode 9.0 additions needed for classes
329 \do{CyrillicExtendedC
}{"
01C80
}{"
01C8F
}
330 \do{GlagoliticSupplement
}{"
01E000
}{"
01E02F
}
331 \do{IdeographicSymbolsAndPunctuation
}{"
016FE0
}{"
016FFF
}
332 \do{MongolianSupplement
}{"
011660}{"
01167F
}
333 % Unicode 10.0 additions needed for classes
334 \do{CJKUnifiedIdeographsExtensionF
}{"
02CEB0
}{"
02EBEF
}
335 \do{KanaExtendedA
}{"
01B100
}{"
01B12F
}
336 \do{SyriacSupplement
}{"
0860}{"
086F
}
337 % Unicode 11.0 additions needed for classes
338 \do{GeorgianExtended
}{"
01C90
}{"
01CBF
}
339 % Unicode 12.0 additions needed for classes
340 \do{SmallKanaExtension
}{"
01B130
}{"
01B16F
}
341 \do{SymbolsAndPictographsExtendedA
}{"
01FA70
}{"
01FAFF
}
342 % Unicode 13.0 additions needed for classes
343 \do{CJKUnifiedIdeographsExtensionG
}{"
030000}{"
03134F
}
345 \ifdefined\XeTeXinterwordspaceshaping
346 % Unicode 5.1 block definitions
347 \do{Carian
}{"
0102A0
}{"
0102DF
}
348 \do{OldItalic
}{"
010300}{"
01032F
}
349 \do{PhaistosDisc
}{"
0101D0
}{"
0101FF
}
350 \do{SupplementaryPrivateUseAreaA
}{"
0F0000
}{"
0FFFFF
}
351 \do{SupplementaryPrivateUseAreaB
}{"
0100000}{"
010FFFF
}
352 % Unicode 5.2 additions
353 \do{OldSouthArabian
}{"
010A60
}{"
010A7F
}
354 \do{OldTurkic
}{"
010C00
}{"
010C4F
}
355 % Unicode 7.0 additions
356 \do{Duployan
}{"
01BC00
}{"
01BC9F
}
357 \do{OldNorthArabian
}{"
010A80
}{"
010A9F
}
358 \do{ShorthandFormatControls
}{"
01BCA0
}{"
01BCAF
}
359 % Unicode 8.0 additions
360 \do{Ahom
}{"
011700}{"
01173F
}
361 \do{AnatolianHieroglyphs
}{"
014400}{"
01467F
}
362 \do{EarlyDynasticCuneiform
}{"
012480}{"
01254F
}
363 \do{Hatran
}{"
0108E0
}{"
0108FF
}
364 \do{Multani
}{"
011280}{"
0112AF
}
365 \do{SuttonSignWriting
}{"
01D800
}{"
01DAAF
}
366 % Unicode 9.0 additions
367 \do{Adlam
}{"
01E900
}{"
01E95F
}
368 \do{Bhaiksuki
}{"
011C00
}{"
011C6F
}
369 \do{Marchen
}{"
011C70
}{"
011CBF
}
370 \do{Newa
}{"
011400}{"
01147F
}
371 \do{Osage
}{"
0104B0
}{"
0104FF
}
372 \do{Tangut
}{"
017000}{"
0187FF
}
373 \do{TangutComponents
}{"
018800}{"
018AFF
}
374 % Unicode 10.0 additions
375 \do{MasaramGondi
}{"
011D00
}{"
011D5F
}
376 \do{Nushu
}{"
01B170
}{"
01B2FF
}
377 \do{Soyombo
}{"
011A50
}{"
011AAF
}
378 \do{ZanabazarSquare
}{"
011A00
}{"
011A4F
}
379 % Unicode 11.0 additions
380 \do{ChessSymbols
}{"
01FA00
}{"
01FA6F
}
381 \do{Dogra
}{"
011800}{"
01184F
}
382 \do{GunjalaGondi
}{"
011D60
}{"
011DAF
}
383 \do{HanifiRohingya
}{"
010D00
}{"
010D3F
}
384 \do{IndicSiyaqNumbers
}{"
01EC70
}{"
01ECBF
}
385 \do{Makasar
}{"
011EE0
}{"
011EFF
}
386 \do{MayanNumerals
}{"
01D2E0
}{"
01D2FF
}
387 \do{Medefaidrin
}{"
016E40
}{"
016E9F
}
388 \do{OldSogdian
}{"
010F00
}{"
010F2F
}
389 \do{Sogdian
}{"
010F30
}{"
010F6F
}
390 % Unicode 12.0 additions
391 \do{EgyptianHieroglyphFormatControls
}{"
013430}{"
01343F
}
392 \do{Elymaic
}{"
010FE0
}{"
010FFF
}
393 \do{Nandinagari
}{"
0119A0
}{"
0119FF
}
394 \do{NyiakengPuachueHmong
}{"
01E100
}{"
01E14F
}
395 \do{OttomanSiyaqNumbers
}{"
01ED00
}{"
01ED4F
}
396 \do{TamilSupplement
}{"
011FC0
}{"
011FFF
}
397 \do{Wancho
}{"
01E2C0
}{"
01E2FF
}
398 % Unicode 13.0 additions
399 \do{Chorasmian
}{"
010FB0
}{"
010FDF
}
400 \do{DivesAkuru
}{"
011900}{"
01195F
}
401 \do{KhitanSmallScript
}{"
018B00
}{"
018CFF
}
402 \do{LisuSupplement
}{"
011FB0
}{"
011FBF
}
403 \do{SymbolsForLegacyComputing
}{"
01FB00
}{"
01FBFF
}
404 \do{TangutSupplement
}{"
018D00
}{"
018D8F
}
405 \do{Yezidi
}{"
010E80
}{"
010EBF
}
409 % ----------------------------------------------------------------------------
410 % Option handling lets the user turn off "load all" and selectively enable only those blocks
411 % they are interested in
412 % ----------------------------------------------------------------------------
414 % Each option starts with \overrideClassLoading; so any specified
415 % option will set |\if@overrideClassLoading| to true; when one has
416 % been scanned it's not necessary to set the conditional again. Then
417 % for block X we let \enableX to \@empty so that later on we can check
420 \newif\if@overrideClassLoading
421 \newcommand{\overrideClassLoading}{\@overrideClassLoadingtrue
422 \let\overrideClassLoading\relax}
424 \def\do#1#2#3{\DeclareOption{#1}%
425 {\overrideClassLoading\expandafter\let\csname enable
#1\endcsname\@empty
}}
426 % We execute the list with this definition of \do
430 % We define lists also for these groups
434 \doclass{CanadianSyllabics
}
435 \doclass{CherokeeFull
}
441 \doclass{EthiopicFull
}
442 \doclass{GeorgianFull
}
447 \doclass{Mathematics
}
448 \doclass{MongolianFull
}
449 \doclass{MyanmarFull
}
451 \doclass{Punctuation
}
452 \doclass{SundaneseFull
}
463 \do{ArabicPresentationFormsA
}
464 \do{ArabicPresentationFormsB
}
465 \do{ArabicSupplement
}
468 \def\CanadianSyllabicsClasses{
469 \do{UnifiedCanadianAboriginalSyllabics
}
470 \do{UnifiedCanadianAboriginalSyllabicsExtended
}
473 \def\CherokeeFullClasses{
475 \do{CherokeeSupplement
}
480 \do{BopomofoExtended
}
481 \do{CJKCompatibility
}
482 \do{CJKCompatibilityForms
}
483 \do{CJKCompatibilityIdeographs
}
484 \do{CJKCompatibilityIdeographsSupplement
}
485 \do{CJKRadicalsSupplement
}
487 \do{CJKSymbolsAndPunctuation
}
488 \do{CJKUnifiedIdeographs
}
489 \do{CJKUnifiedIdeographsExtensionA
}
490 \do{CJKUnifiedIdeographsExtensionB
}
491 \do{CJKUnifiedIdeographsExtensionC
}
492 \do{CJKUnifiedIdeographsExtensionD
}
493 \do{CJKUnifiedIdeographsExtensionE
}
494 \do{CJKUnifiedIdeographsExtensionF
}
495 \do{CJKUnifiedIdeographsExtensionG
}
496 \do{EnclosedCJKLettersAndMonths
}
497 \do{EnclosedIdeographicSupplement
}
498 \do{IdeographicDescriptionCharacters
}
499 \do{IdeographicSymbolsAndPunctuation
}
505 \do{BopomofoExtended
}
506 \do{CJKCompatibility
}
507 \do{CJKCompatibilityForms
}
508 \do{CJKCompatibilityIdeographs
}
509 \do{CJKCompatibilityIdeographsSupplement
}
510 \do{CJKRadicalsSupplement
}
512 \do{CJKSymbolsAndPunctuation
}
513 \do{CJKUnifiedIdeographs
}
514 \do{CJKUnifiedIdeographsExtensionA
}
515 \do{CJKUnifiedIdeographsExtensionB
}
516 \do{CJKUnifiedIdeographsExtensionC
}
517 \do{CJKUnifiedIdeographsExtensionD
}
518 \do{CJKUnifiedIdeographsExtensionE
}
519 \do{CJKUnifiedIdeographsExtensionF
}
520 \do{CJKUnifiedIdeographsExtensionG
}
521 \do{EnclosedCJKLettersAndMonths
}
522 \do{EnclosedIdeographicSupplement
}
523 \do{HalfwidthAndFullwidthForms
}
524 \do{HangulCompatibilityJamo
}
526 \do{HangulJamoExtendedA
}
527 \do{HangulJamoExtendedB
}
530 \do{IdeographicDescriptionCharacters
}
531 \do{IdeographicSymbolsAndPunctuation
}
537 \do{KatakanaPhoneticExtensions
}
538 \do{SmallKanaExtension
}
541 \def\CyrillicsClasses{
543 \do{CyrillicExtendedA
}
544 \do{CyrillicExtendedB
}
545 \do{CyrillicExtendedC
}
546 \do{CyrillicSupplement
}
547 \do{GlagoliticSupplement
}
551 \def\DevanagariClasses{
553 \do{DevanagariPostDanDa
}
554 \do{DevanagariPostMarks
}
555 \do{DevanagariPreMarks
}
558 \def\DiacriticsClasses{
559 \do{CombiningDiacriticalMarks
}
560 \do{CombiningDiacriticalMarksExtended
}
561 \do{CombiningDiacriticalMarksForSymbols
}
562 \do{CombiningDiacriticalMarksSupplement
}
563 \do{CombiningHalfMarks
}
564 \do{ModifierToneLetters
}
565 \do{SpacingModifierLetters
}
568 \def\EthiopicFullClasses{
570 \do{EthiopicExtended
}
571 \do{EthiopicExtendedA
}
572 \do{EthiopicSupplement
}
575 \def\GeorgianFullClasses{
577 \do{GeorgianExtended
}
578 \do{GeorgianSupplement
}
583 \do{CopticEpactNumbers
}
589 \do{HangulCompatibilityJamo
}
591 \do{HangulJamoExtendedA
}
592 \do{HangulJamoExtendedB
}
596 \def\JapaneseClasses{
597 \do{CJKUnifiedIdeographs
}
598 \do{HalfwidthAndFullwidthForms
}
605 \do{KatakanaPhoneticExtensions
}
609 \do{AlphabeticPresentationForms
}
611 \do{LatinExtendedAdditional
}
620 \def\MathematicsClasses{
621 \do{ArabicMathematicalAlphabeticSymbols
}
622 \do{MathematicalAlphanumericSymbols
}
623 \do{MathematicalOperators
}
624 \do{MiscellaneousMathematicalSymbolsA
}
625 \do{MiscellaneousMathematicalSymbolsB
}
626 \do{SupplementalMathematicalOperators
}
629 \def\MongolianFullClasses{
631 \do{MongolianSupplement
}
634 \def\MyanmarFullClasses{
636 \do{MyanmarExtendedA
}
637 \do{MyanmarExtendedB
}
640 \def\PhoneticsClasses{
642 \do{PhoneticExtensions
}
643 \do{PhoneticExtensionsSupplement
}
646 \def\PunctuationClasses{
647 \do{GeneralPunctuation
}
648 \do{SupplementalPunctuation
}
651 \def\SundaneseFullClasses{
653 \do{SundaneseSupplement
}
657 \do{AlchemicalSymbols
}
660 \do{ByzantineMusicalSymbols
}
666 \do{GeometricShapesExtended
}
667 \do{LetterlikeSymbols
}
668 \do{MiscellaneousSymbols
}
669 \do{MiscellaneousSymbolsAndArrows
}
670 \do{MiscellaneousSymbolsAndPictographs
}
671 \do{MiscellaneousTechnical
}
673 \do{OrnamentalDingbats
}
674 \do{SupplementalArrowsA
}
675 \do{SupplementalArrowsB
}
676 \do{SupplementalArrowsC
}
677 \do{SupplementalSymbolsAndPictographs
}
678 \do{SymbolsAndPictographsExtendedA
}
679 \do{TransportAndMapSymbols
}
682 \def\SyriacFullClasses{
684 \do{SyriacSupplement
}
687 \def\VedicMarksClasses{
688 \do{DevanagariExtended
}
701 % AnatolianHieroglyphs (see below)
702 \do{AncientGreekMusicalNotation
}
703 \do{AncientGreekNumbers
}
720 \do{CaucasianAlbanian
}
722 \do{CommonIndicNumberForms
}
724 \do{CountingRodNumerals
}
726 \do{CuneiformNumbersAndPunctuation
}
727 \do{CypriotSyllabary
}
730 % Duployan (see below)
731 \do{EarlyDynasticCuneiform
}
732 \do{EgyptianHieroglyphs
}
734 \do{EnclosedAlphanumerics
}
735 \do{EnclosedAlphanumericSupplement
}
744 \do{InscriptionalPahlavi
}
745 \do{InscriptionalParthian
}
759 \do{LinearBIdeograms
}
760 \do{LinearBSyllabary
}
770 \do{MeeteiMayekExtensions
}
773 \do{MeroiticHieroglyphs
}
784 % OldItalic (see below)
785 % OldNorthArabian (see below)
788 % OldSouthArabian (see below)
789 % OldTurkic (see below)
790 \do{OpticalCharacterRecognition
}
797 % PhaistosDisc (see below)
803 \do{RumiNumeralSymbols
}
809 % ShorthandFormatControls (see below)
812 \do{SinhalaArchaicNumbers
}
813 \do{SmallFormVariants
}
815 \do{SuperscriptsAndSubscripts
}
816 % SupplementaryPrivateUseAreaA (see below)
817 % SupplementaryPrivateUseAreaB (see below)
818 % SuttonSignWriting (see below)
826 \do{TaiXuanJingSymbols
}
839 \do{YijingHexagramSymbols
}
841 \ifdefined\XeTeXinterwordspaceshaping
844 \do{AnatolianHieroglyphs
}
852 \do{EgyptianHieroglyphFormatControls
}
854 \do{GeorgianExtended
}
857 \do{IndicSiyaqNumbers
}
859 \do{KhitanSmallScript
}
869 \do{NyiakengPuachueHmong
}
876 \do{OttomanSiyaqNumbers
}
878 \do{ShorthandFormatControls
}
881 \do{SupplementaryPrivateUseAreaA
}
882 \do{SupplementaryPrivateUseAreaB
}
883 \do{SuttonSignWriting
}
884 \do{SymbolsForLegacyComputing
}
887 \do{TangutComponents
}
888 \do{TangutSupplement
}
895 % For each class group Z we define the relative option
896 % \DeclareOption{Z}{\overrideClassLoading\enableX1\enableX2...\enableXn}
897 % where X1, X2, ..., Xn are the blocks belonging to class Z
900 \unexpanded{\expandafter\let\csname enable
#1\endcsname\@empty
}}
902 \begingroup\edef\x{\endgroup\noexpand\DeclareOption{#1}{%
903 \noexpand\overrideClassLoading\csname #1Classes
\endcsname}}\x}
907 \ProcessOptions\relax
909 % If no option has been given, \if@overrideClassLoading will still be
910 % false, and in this case we enable *all* blocks (again by defining
911 % \enableX equal to \@empty for each block X
913 \if@overrideClassLoading
\else
914 \def\do#1#2#3{\expandafter\let\csname enable
#1\endcsname\@empty
}
918 % ----------------------------------------------------------------------------
919 % After dealing with the options, make sure we have the necessary packages available
920 % ----------------------------------------------------------------------------
922 % because this package relies on XeTeX's intercharclass sytem, better require XeTeX
923 \RequirePackage{ifxetex
}
926 % ----------------------------------------------------------------------------
927 % This package heavily exploits XeTeX's intercharclass system!
928 % ----------------------------------------------------------------------------
930 % enable/disable commands
931 \newcommand{\disableTransitionRules}{\XeTeXinterchartokenstate =
\z@
}
932 \newcommand{\enableTransitionRules}{\XeTeXinterchartokenstate = \@ne
}
935 \let\uccoff\disableTransitionRules
936 \let\uccon\enableTransitionRules
938 % make sure it's turned on
939 \enableTransitionRules
941 % ----------------------------------------------------------------------------
942 % And now, finally, we can start loading all the requested blocks
943 % ----------------------------------------------------------------------------
945 % \message{Package ucharclasses Message: Assigning character classes per
946 % Unicode block (this may take a while)}
948 %% We record the last allocated class before allocating ours;
949 %% \newXeTeXintercharclass saves in the counter
950 %% \xe@alloc@intercharclass the last allocated class number; initially
951 %% it's 3, but some other code might have allocated interchar classes
952 %% before loading this package; if \enableX is defined (to \@empty,
953 %% but that's irrelevant), an intercharclass is allocated by using the
954 %% list \AllClasses; two cases for block X:
956 %% (1) \enableX is defined: then \do{X}{a}{b} will become
957 %% \@defineUnicodeClass{X}{a}{b} which in turn will execute
958 %% \newXeTeXintercharclass\XClass and start a loop assigning code
959 %% points from a to b to this class
961 %% (2) \enableX is not defined: then \do{X}{a}{b} will become
962 %% \@gobblethree{X}{a}{b} and so nothing will be performed
964 \chardef\@classstart=
\xe@alloc@intercharclass
966 \providecommand\@gobblethree
[3]{}
968 \ifcsname enable
#1\endcsname
969 \expandafter\@defineUnicodeClass
971 \expandafter\@gobblethree
974 \def\@defineUnicodeClass
#1#2#3{%
975 \if@ucharclassverbose
\typeout{Defining
#1 Class
}\fi
976 \expandafter\newXeTeXintercharclass\csname #1Class
\endcsname
979 \if@ucharclassverbose
980 \typeout{\XeTeXcharclass\number\count@=
981 \expandafter\string\csname #1Class
\endcsname}%
983 \XeTeXcharclass\count@=
\csname #1Class
\endcsname
990 % finally, we record the end of our charclass range
991 \chardef\@classend=
\xe@alloc@intercharclass
994 %%% Our assigned classes go from \@classstart (excluded) to \@classend (included)
996 % ----------------------------------------------------------------------------
997 % Use: \setTransitionsFor{block name}{when entering this block}{when leaving this block}
998 % ----------------------------------------------------------------------------
1000 \def\setTransitionsFor#1#2#3{%
1001 \ifcsname enable
#1\endcsname
1002 \count@=\@classstart
1003 \loop\ifnum\count@<\@classend
1005 \ifnum\count@=
\csname #1Class
\endcsname\else
1006 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
1007 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#3}%
1010 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
1011 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#3}%
1013 \if@ucharclassverbose
1014 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1020 % ----------------------------------------------------------------------------
1021 % Use: \setTransitionTo{block name}{what to do when entering this block}
1022 % ----------------------------------------------------------------------------
1024 \def\setTransitionTo#1#2{%
1025 \ifcsname enable
#1\endcsname
1026 \count@=\@classstart
1027 \loop\ifnum\count@<\@classend
1029 \ifnum\count@=
\csname #1Class
\endcsname\else
1030 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
1033 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
1035 \if@ucharclassverbose
1036 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1042 % ----------------------------------------------------------------------------
1043 % Use: \setTransitionFrom{block name }{what to do when leaving this block}
1044 % ----------------------------------------------------------------------------
1046 \def\setTransitionFrom#1#2{%
1047 \ifcsname enable
#1\endcsname
1048 \count@=\@classstart
1049 \loop\ifnum\count@<\@classend
1051 \ifnum\count@=
\csname #1Class
\endcsname\else
1052 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#2}%
1055 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#2}%
1057 \if@ucharclassverbose
1058 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1064 % ----------------------------------------------------------------------------
1065 % Informal Block Rules - for these, to/from must always be defined
1067 % Available informal groups are:
1070 % - CanadianSyllabics
1072 % - Chinese (including bopomofo)
1073 % - CJK (Chinese/Japanese/Korean)
1080 % - Japanese (it is advised to set CJK first to a catch-all, then set
1081 % Japanese for specifics)
1082 % - Korean (=Hangul) (same comment as for Japanese)
1094 % - Other (I am not a fan of lump groups. I hope to un-lump most of it)
1096 % ----------------------------------------------------------------------------
1098 %% For each class group Z we define \setTransitionsForX as
1099 %% \newcommand\setTransitionsForZ[2]{%
1100 %% \setTransitionsFor{X1}{#1}{#2}
1101 %% \setTransitionsFor{X2}{#1}{#2}
1103 %% \setTransitionsFor{Xn}{#1}{#2}}
1104 %% where X1, X2, ..., Xn are the blocks in group Z
1106 \def\do#1{\noexpand\setTransitionsFor{#1}{###
#1}{###
#2}}
1108 \begingroup\edef\x{\endgroup
1109 \noexpand\newcommand
1110 \unexpanded\expandafter{\csname setTransitionsFor
#1\endcsname}[2]%
1111 {\csname #1Classes
\endcsname}}\x}
1115 % ----------------------------------------------------------------------------
1117 % based on the previous informal groups, we can define a catch-all transition command
1119 % ----------------------------------------------------------------------------
1121 %% The following is equivalent to define
1122 %% \newcommand{\setDefaultTransitions[2]{
1123 %% \setTransitionsForArabic{#1}{#2}
1125 %% \setTransitionsForOther{#1}{#2}}
1128 \expandafter\noexpand\csname setTransitionsFor
#1\endcsname{###
#1}{###
#2}}
1129 \begingroup\edef\x{\endgroup
1130 \noexpand\newcommand\noexpand\setDefaultTransitions[2]{%
1133 % ----------------------------------------------------------------------------
1135 \let\do\@undefined
\let\doclass\@undefined
1138 % End of file `ucharclasses.sty'.