1 % ----------------------------------------------------------------------------
2 % This "ucharclasses" package sets up XeTeX character classes based on which
3 % unicode block a character is found in. It then allows transition rules to be defined
4 % when entering or leaving particular unicode blocks, the code of which gets inserted
5 % automatically when a transition from a character from one unicode block to a
6 % character from another unicode block is encountered by XeTeX
8 % Current compatibility should be Unicode 10.0.
11 % v2.1-2.3: Qing Lee, Werner Lemberg
12 % v2.0: Enrico Gregorio
13 % v1.0: Mike "Pomax" Kamermans
15 % Significant updates:
16 % v2.4: Unicode 12 support
17 % v2.3: Unicode 10 support
18 % v2.2: Unicode 8.0 and LaTeX2e support
19 % v2.1: Uplift for the intercharclass updates introduced in XeTeX 0.99994
20 % v2.0: Rewritten to Vastly improve performance.
21 % v1.0: Unicode block switching using XeTeX intercharclasses.
23 % License: public domain (https://www.ctan.org/license/pd)
25 % ----------------------------------------------------------------------------
27 \ProvidesPackage{ucharclasses
}[2019/
08/
20 v2.4
.0 Unicode block character classes for XeLaTeX
]
29 \newif\if@ucharclassverbose
30 \DeclareOption{verbose
}{\@ucharclassverbosetrue
}
32 % ----------------------------------------------------------------------------
33 % The package options allow you to selectively enable certain unicode blocks
34 % ----------------------------------------------------------------------------
36 % We first define all blocks in a list together with their start and end
39 % Starting with XeTeX version 3.14159265-2.6-0.99994, the number of
40 % \XeTeXcharclass registers was extended from 256 to 4096 entries; some not
41 % so important blocks are thus provided only for this and newer versions.
42 % The boundary of character class was changed from 255 to 4095 correspondingly.
43 % The primitive \XeTeXinterwordspaceshaping was introduced by XeTeX 0.99994;
44 % we use it as a flag to identify this version.
46 % However, earlier version of LaTeX2e (before 2016/04/22 v2.0q) didn't provide
47 % support for 4096 entries; we thus have to override the hard-coded limit.
48 \ifdefined\e@alloc@intercharclass@top
49 \chardef\@ucharclass@boundary=
\e@alloc@intercharclass@top
51 \ifdefined\XeTeXinterwordspaceshaping
52 \chardef\@ucharclass@boundary=
4095 %
53 \def\newXeTeXintercharclass{%
54 \e@alloc
\XeTeXcharclass\chardef
55 \xe@alloc@intercharclass
\m@ne\@ucharclass@boundary
}
57 \chardef\@ucharclass@boundary=\@cclv
62 % Unicode 5.1 block definitions
63 \do{AegeanNumbers
}{"
010100}{"
01013F
}
64 \do{AlphabeticPresentationForms
}{"
0FB00
}{"
0FB4F
}
65 \do{AncientGreekMusicalNotation
}{"
01D200
}{"
01D24F
}
66 \do{AncientGreekNumbers
}{"
010140}{"
01018F
}
67 \do{AncientSymbols
}{"
010190}{"
0101CF
}
68 \do{Arabic
}{"
0600}{"
06FF
}
69 \do{ArabicPresentationFormsA
}{"
0FB50
}{"
0FDFF
}
70 \do{ArabicPresentationFormsB
}{"
0FE70
}{"
0FEFF
}
71 \do{ArabicSupplement
}{"
0750}{"
077F
}
72 \do{Armenian
}{"
0530}{"
058F
}
73 \do{Arrows
}{"
02190}{"
021FF
}
74 \do{Balinese
}{"
01B00
}{"
01B7F
}
75 \do{BasicLatin
}{"
0020}{"
007F
} % 0000..007F in Unicode standard
76 \do{Bengali
}{"
0980}{"
09FF
}
77 \do{BlockElements
}{"
02580}{"
0259F
}
78 \do{Bopomofo
}{"
03100}{"
0312F
}
79 \do{BopomofoExtended
}{"
031A0
}{"
031BF
}
80 \do{BoxDrawing
}{"
02500}{"
0257F
}
81 \do{BraillePatterns
}{"
02800}{"
028FF
}
82 \do{Buginese
}{"
01A00
}{"
01A1F
}
83 \do{Buhid
}{"
01740}{"
0175F
}
84 \do{ByzantineMusicalSymbols
}{"
01D000
}{"
01D0FF
}
86 \do{Cham
}{"
0AA00
}{"
0AA5F
}
87 \do{Cherokee
}{"
013A0
}{"
013FF
}
88 \do{CJKCompatibility
}{"
03300}{"
033FF
}
89 \do{CJKCompatibilityForms
}{"
0FE30
}{"
0FE4F
}
90 \do{CJKCompatibilityIdeographs
}{"
0F900
}{"
0FAFF
}
91 \do{CJKCompatibilityIdeographsSupplement
}{"
02F800
}{"
02FA1F
}
92 \do{CJKRadicalsSupplement
}{"
02E80
}{"
02EFF
}
93 \do{CJKStrokes
}{"
031C0
}{"
031EF
}
94 \do{CJKSymbolsAndPunctuation
}{"
03000}{"
0303F
}
95 \do{CJKUnifiedIdeographs
}{"
04E00
}{"
09FFF
}
96 \do{CJKUnifiedIdeographsExtensionA
}{"
03400}{"
04DBF
}
97 \do{CJKUnifiedIdeographsExtensionB
}{"
020000}{"
02A6DF
}
98 \do{CombiningDiacriticalMarks
}{"
0300}{"
036F
}
99 \do{CombiningDiacriticalMarksForSymbols
}{"
020D0
}{"
020FF
}
100 \do{CombiningDiacriticalMarksSupplement
}{"
01DC0
}{"
01DFF
}
101 \do{CombiningHalfMarks
}{"
0FE20
}{"
0FE2F
}
102 \do{ControlPictures
}{"
02400}{"
0243F
}
103 \do{Coptic
}{"
02C80
}{"
02CFF
}
104 \do{CountingRodNumerals
}{"
01D360
}{"
01D37F
}
105 \do{Cuneiform
}{"
012000}{"
0123FF
}
106 \do{CuneiformNumbersAndPunctuation
}{"
012400}{"
01247F
}
107 \do{CurrencySymbols
}{"
020A0
}{"
020CF
}
108 \do{CypriotSyllabary
}{"
010800}{"
01083F
}
109 \do{Cyrillic
}{"
0400}{"
04FF
}
110 \do{CyrillicExtendedA
}{"
02DE0
}{"
02DFF
}
111 \do{CyrillicExtendedB
}{"
0A640
}{"
0A69F
}
112 \do{CyrillicSupplement
}{"
0500}{"
052F
}
113 \do{Deseret
}{"
010400}{"
01044F
}
114 \do{DevanagariDanDa
}{"
0964}{"
0965}
115 \do{DevanagariMarks
}{"
0951}{"
0954}
116 \do{DevanagariPostDanDa
}{"
0966}{"
097F
}
117 \do{DevanagariPostMarks
}{"
0955}{"
0963}
118 \do{DevanagariPreMarks
}{"
0900}{"
0950}
119 \do{Dingbats
}{"
02700}{"
027BF
}
120 \do{DominoTiles
}{"
01F030
}{"
01F09F
}
121 \do{EnclosedAlphanumerics
}{"
02460}{"
024FF
}
122 \do{EnclosedCJKLettersAndMonths
}{"
03200}{"
032FF
}
123 \do{Ethiopic
}{"
01200}{"
0137F
}
124 \do{EthiopicExtended
}{"
02D80
}{"
02DDF
}
125 \do{EthiopicSupplement
}{"
01380}{"
0139F
}
126 \do{GeneralPunctuation
}{"
02000}{"
0206F
}
127 \do{GeometricShapes
}{"
025A0
}{"
025FF
}
128 \do{Georgian
}{"
010A0
}{"
010FF
}
129 \do{GeorgianSupplement
}{"
02D00
}{"
02D2F
}
130 \do{Glagolitic
}{"
02C00
}{"
02C5F
}
131 \do{Gothic
}{"
010330}{"
01034F
}
132 \do{GreekAndCoptic
}{"
0370}{"
03FF
}
133 \do{GreekExtended
}{"
01F00
}{"
01FFF
}
134 \do{Gujarati
}{"
0A80
}{"
0AFF
}
135 \do{Gurmukhi
}{"
0A00
}{"
0A7F
}
136 \do{HalfwidthAndFullwidthForms
}{"
0FF00
}{"
0FFEF
}
137 \do{HangulCompatibilityJamo
}{"
03130}{"
0318F
}
138 \do{HangulJamo
}{"
01100}{"
011FF
}
139 \do{HangulSyllables
}{"
0AC00
}{"
0D7AF
}
140 \do{Hanunoo
}{"
01720}{"
0173F
}
141 \do{Hebrew
}{"
0590}{"
05FF
}
142 \do{Hiragana
}{"
03040}{"
0309F
}
143 \do{IdeographicDescriptionCharacters
}{"
02FF0
}{"
02FFF
}
144 \do{IPAExtensions
}{"
0250}{"
02AF
}
145 \do{Kanbun
}{"
03190}{"
0319F
}
146 \do{KangxiRadicals
}{"
02F00
}{"
02FDF
}
147 \do{Kannada
}{"
0C80
}{"
0CFF
}
148 \do{Katakana
}{"
030A0
}{"
030FF
}
149 \do{KatakanaPhoneticExtensions
}{"
031F0
}{"
031FF
}
150 \do{KayahLi
}{"
0A900
}{"
0A92F
}
151 \do{Kharoshthi
}{"
010A00
}{"
010A5F
}
152 \do{Khmer
}{"
01780}{"
017FF
}
153 \do{KhmerSymbols
}{"
019E0
}{"
019FF
}
154 \do{Lao
}{"
0E80
}{"
0EFF
}
155 \do{LatinExtendedAdditional
}{"
01E00
}{"
01EFF
}
156 \do{LatinExtendedA
}{"
0100}{"
017F
}
157 \do{LatinExtendedB
}{"
0180}{"
024F
}
158 \do{LatinExtendedC
}{"
02C60
}{"
02C7F
}
159 \do{LatinExtendedD
}{"
0A720
}{"
0A7FF
}
160 \do{LatinSupplement
}{"
0080}{"
00FF
}
161 \do{Lepcha
}{"
01C00
}{"
01C4F
}
162 \do{LetterlikeSymbols
}{"
02100}{"
0214F
}
163 \do{Limbu
}{"
01900}{"
0194F
}
164 \do{LinearBIdeograms
}{"
010080}{"
0100FF
}
165 \do{LinearBSyllabary
}{"
010000}{"
01007F
}
166 \do{Lycian
}{"
010280}{"
01029F
}
167 \do{Lydian
}{"
010920}{"
01093F
}
168 \do{MahjongTiles
}{"
01F000
}{"
01F02F
}
169 \do{Malayalam
}{"
0D00
}{"
0D7F
}
170 \do{MathematicalAlphanumericSymbols
}{"
01D400
}{"
01D7FF
}
171 \do{MathematicalOperators
}{"
02200}{"
022FF
}
172 \do{MiscellaneousMathematicalSymbolsA
}{"
027C0
}{"
027EF
}
173 \do{MiscellaneousMathematicalSymbolsB
}{"
02980}{"
029FF
}
174 \do{MiscellaneousSymbols
}{"
02600}{"
026FF
}
175 \do{MiscellaneousSymbolsAndArrows
}{"
02B00
}{"
02BFF
}
176 \do{MiscellaneousTechnical
}{"
02300}{"
023FF
}
177 \do{ModifierToneLetters
}{"
0A700
}{"
0A71F
}
178 \do{Mongolian
}{"
01800}{"
018AF
}
179 \do{MusicalSymbols
}{"
01D100
}{"
01D1FF
}
180 \do{Myanmar
}{"
01000}{"
0109F
}
181 \do{NewTaiLue
}{"
01980}{"
019DF
}
182 \do{NKo
}{"
07C0
}{"
07FF
}
183 \do{NumberForms
}{"
02150}{"
0218F
}
184 \do{Ogham
}{"
01680}{"
0169F
}
185 \do{OlChiki
}{"
01C50
}{"
01C7F
}
186 % OldItalic (see below)
187 \do{OldPersian
}{"
0103A0
}{"
0103DF
}
188 \do{OpticalCharacterRecognition
}{"
02440}{"
0245F
}
189 \do{Oriya
}{"
0B00
}{"
0B7F
}
190 \do{Osmanya
}{"
010480}{"
0104AF
}
191 \do{PhagsPa
}{"
0A840
}{"
0A87F
}
192 % PhaistosDisc (see below)
193 \do{Phoenician
}{"
010900}{"
01091F
}
194 \do{PhoneticExtensions
}{"
01D00
}{"
01D7F
}
195 \do{PhoneticExtensionsSupplement
}{"
01D80
}{"
01DBF
}
196 \do{PrivateUseArea
}{"
0E000
}{"
0F8FF
}
197 \do{Rejang
}{"
0A930
}{"
0A95F
}
198 \do{Runic
}{"
016A0
}{"
016FF
}
199 \do{Saurashtra
}{"
0A880
}{"
0A8DF
}
200 \do{Shavian
}{"
010450}{"
01047F
}
201 \do{Sinhala
}{"
0D80
}{"
0DFF
}
202 \do{SmallFormVariants
}{"
0FE50
}{"
0FE6F
}
203 \do{SpacingModifierLetters
}{"
02B0
}{"
02FF
}
204 \do{Sundanese
}{"
01B80
}{"
01BBF
}
205 \do{SuperscriptsAndSubscripts
}{"
02070}{"
0209F
}
206 \do{SupplementalArrowsA
}{"
027F0
}{"
027FF
}
207 \do{SupplementalArrowsB
}{"
02900}{"
0297F
}
208 \do{SupplementalMathematicalOperators
}{"
02A00
}{"
02AFF
}
209 \do{SupplementalPunctuation
}{"
02E00
}{"
02E7F
}
210 % SupplementaryPrivateUseAreaA (see below)
211 % SupplementaryPrivateUseAreaB (see below)
212 \do{SylotiNagri
}{"
0A800
}{"
0A82F
}
213 \do{Syriac
}{"
0700}{"
074F
}
214 \do{Tagalog
}{"
01700}{"
0171F
}
215 \do{Tagbanwa
}{"
01760}{"
0177F
}
216 \do{Tags
}{"
0E0000
}{"
0E007F
}
217 \do{TaiLe
}{"
01950}{"
0197F
}
218 \do{TaiXuanJingSymbols
}{"
01D300
}{"
01D35F
}
219 \do{Tamil
}{"
0B80
}{"
0BFF
}
220 \do{Telugu
}{"
0C00
}{"
0C7F
}
221 \do{Thaana
}{"
0780}{"
07BF
}
222 \do{Thai
}{"
0E00
}{"
0E7F
}
223 \do{Tibetan
}{"
0F00
}{"
0FFF
}
224 \do{Tifinagh
}{"
02D30
}{"
02D7F
}
225 \do{Ugaritic
}{"
010380}{"
01039F
}
226 \do{UnifiedCanadianAboriginalSyllabics
}{"
01400}{"
0167F
}
227 \do{Vai
}{"
0A500
}{"
0A63F
}
228 \do{VerticalForms
}{"
0FE10
}{"
0FE1F
}
229 \do{YiRadicals
}{"
0A490
}{"
0A4CF
}
230 \do{YiSyllables
}{"
0A000
}{"
0A48F
}
231 \do{YijingHexagramSymbols
}{"
04DC0
}{"
04DFF
}
232 % Unicode 5.2 additions
233 \do{Avestan
}{"
010B00
}{"
010B3F
}
234 \do{Bamum
}{"
0A6A0
}{"
0A6FF
}
235 \do{CJKUnifiedIdeographsExtensionC
}{"
02A700
}{"
02B73F
}
236 \do{CommonIndicNumberForms
}{"
0A830
}{"
0A83F
}
237 \do{DevanagariExtended
}{"
0A8E0
}{"
0A8FF
}
238 \do{EgyptianHieroglyphs
}{"
013000}{"
01342F
}
239 \do{EnclosedAlphanumericSupplement
}{"
01F100
}{"
01F1FF
}
240 \do{EnclosedIdeographicSupplement
}{"
01F200
}{"
01F2FF
}
241 \do{HangulJamoExtendedA
}{"
0A960
}{"
0A97F
}
242 \do{HangulJamoExtendedB
}{"
0D7B0
}{"
0D7FF
}
243 \do{ImperialAramaic
}{"
010840}{"
01085F
}
244 \do{InscriptionalPahlavi
}{"
010B60
}{"
010B7F
}
245 \do{InscriptionalParthian
}{"
010B40
}{"
010B5F
}
246 \do{Javanese
}{"
0A980
}{"
0A9DF
}
247 \do{Kaithi
}{"
011080}{"
0110CF
}
248 \do{Lisu
}{"
0A4D0
}{"
0A4FF
}
249 \do{MeeteiMayek
}{"
0ABC0
}{"
0ABFF
}
250 \do{MyanmarExtendedA
}{"
0AA60
}{"
0AA7F
}
251 % OldSouthArabian (see below)
252 % OldTurkic (see below)
253 \do{RumiNumeralSymbols
}{"
010E60
}{"
010E7F
}
254 \do{Samaritan
}{"
0800}{"
083F
}
255 \do{TaiTham
}{"
01A20
}{"
01AAF
}
256 \do{TaiViet
}{"
0AA80
}{"
0AADF
}
257 \do{UnifiedCanadianAboriginalSyllabicsExtended
}{"
018B0
}{"
018FF
}
258 \do{VedicExtensions
}{"
01CD0
}{"
01CFF
}
259 % Unicode 6.0 additions
260 \do{AlchemicalSymbols
}{"
01F700
}{"
01F77F
}
261 \do{BamumSupplement
}{"
016800}{"
016A3F
}
262 \do{Batak
}{"
01BC0
}{"
01BFF
}
263 \do{Brahmi
}{"
011000}{"
01107F
}
264 \do{CJKUnifiedIdeographsExtensionD
}{"
02B740
}{"
02B81F
}
265 \do{Emoticons
}{"
01F600
}{"
01F64F
}
266 \do{EthiopicExtendedA
}{"
0AB00
}{"
0AB2F
}
267 \do{KanaSupplement
}{"
01B000
}{"
01B0FF
}
268 \do{Mandaic
}{"
0840}{"
085F
}
269 \do{MiscellaneousSymbolsAndPictographs
}{"
01F300
}{"
01F5FF
}
270 \do{PlayingCards
}{"
01F0A0
}{"
01F0FF
}
271 \do{TransportAndMapSymbols
}{"
01F680
}{"
01F6FF
}
272 % Unicode 6.1 additions
273 \do{ArabicExtendedA
}{"
08A0
}{"
08FF
}
274 \do{ArabicMathematicalAlphabeticSymbols
}{"
01EE00
}{"
01EEFF
}
275 \do{Chakma
}{"
011100}{"
01114F
}
276 \do{MeeteiMayekExtensions
}{"
0AAE0
}{"
0AAFF
}
277 \do{MeroiticCursive
}{"
0109A0
}{"
0109FF
}
278 \do{MeroiticHieroglyphs
}{"
010980}{"
01099F
}
279 \do{Miao
}{"
016F00
}{"
016F9F
}
280 \do{Sharada
}{"
011180}{"
0111DF
}
281 \do{SoraSompeng
}{"
0110D0
}{"
0110FF
}
282 \do{SundaneseSupplement
}{"
01CC0
}{"
01CCF
}
283 \do{Takri
}{"
011680}{"
0116CF
}
284 % Unicode 7.0 additions
285 \do{BassaVah
}{"
016AD0
}{"
016AFF
}
286 \do{CaucasianAlbanian
}{"
010530}{"
01056F
}
287 \do{CombiningDiacriticalMarksExtended
}{"
01AB0
}{"
01AFF
}
288 \do{CopticEpactNumbers
}{"
0102E0
}{"
0102FF
}
289 % Duployan (see below)
290 \do{Elbasan
}{"
010500}{"
01052F
}
291 \do{GeometricShapesExtended
}{"
01F780
}{"
01F7FF
}
292 \do{Grantha
}{"
011300}{"
01137F
}
293 \do{Khojki
}{"
011200}{"
01124F
}
294 \do{Khudawadi
}{"
0112B0
}{"
0112FF
}
295 \do{LatinExtendedE
}{"
0AB30
}{"
0AB6F
}
296 \do{LinearA
}{"
010600}{"
01077F
}
297 \do{Mahajani
}{"
011150}{"
01117F
}
298 \do{Manichaean
}{"
010AC0
}{"
010AFF
}
299 \do{MendeKikakui
}{"
01E800
}{"
01E8DF
}
300 \do{Modi
}{"
011600}{"
01165F
}
301 \do{Mro
}{"
016A40
}{"
016A6F
}
302 \do{MyanmarExtendedB
}{"
0A9E0
}{"
0A9FF
}
303 \do{Nabataean
}{"
010880}{"
0108AF
}
304 % OldNorthArabian (see below)
305 \do{OldPermic
}{"
010350}{"
01037F
}
306 \do{OrnamentalDingbats
}{"
01F650
}{"
01F67F
}
307 \do{PahawhHmong
}{"
016B00
}{"
016B8F
}
308 \do{Palmyrene
}{"
010860}{"
01087F
}
309 \do{PauCinHau
}{"
011AC0
}{"
011AFF
}
310 \do{PsalterPahlavi
}{"
010B80
}{"
010BAF
}
311 % ShorthandFormatControls (see below)
312 \do{Siddham
}{"
011580}{"
0115FF
}
313 \do{SinhalaArchaicNumbers
}{"
0111E0
}{"
0111FF
}
314 \do{SupplementalArrowsC
}{"
01F800
}{"
01F8FF
}
315 \do{Tirhuta
}{"
011480}{"
0114DF
}
316 \do{WarangCiti
}{"
0118A0
}{"
0118FF
}
317 % Unicode 8.0 additions
318 \do{Ahom
}{"
011700}{"
01173F
}
319 % AnatolianHieroglyphs (see below)
320 \do{CherokeeSupplement
}{"
0AB70
}{"
0ABBF
}
321 \do{CJKUnifiedIdeographsExtensionE
}{"
02B820
}{"
02CEAF
}
322 % EarlyDynasticCuneiform (see below)
324 % Multani (see below)
325 \do{OldHungarian
}{"
010C80
}{"
010CFF
}
326 \do{SupplementalSymbolsAndPictographs
}{"
01F900
}{"
01F9FF
}
327 % SuttonSignWriting (see below)
328 % Unicode 9.0 additions needed for classes
329 \do{CyrillicExtendedC
}{"
01C80
}{"
01C8F
}
330 \do{GlagoliticSupplement
}{"
01E000
}{"
01E02F
}
331 \do{IdeographicSymbolsAndPunctuation
}{"
016FE0
}{"
016FFF
}
332 \do{MongolianSupplement
}{"
011660}{"
01167F
}
333 % Unicode 10.0 additions needed for classes
334 \do{CJKUnifiedIdeographsExtensionF
}{"
02CEB0
}{"
02EBEF
}
335 \do{KanaExtendedA
}{"
01B100
}{"
01B12F
}
336 \do{SyriacSupplement
}{"
0860}{"
086F
}
337 % Unicode 11.0 additions needed for classes
338 \do{GeorgianExtended
}{"
01C90
}{"
01CBF
}
339 % Unicode 12.0 additions needed for classes
340 \do{SmallKanaExtension
}{"
01B130
}{"
01B16F
}
341 \do{SymbolsAndPictographsExtendedA
}{"
01FA70
}{"
01FAFF
}
343 \ifdefined\XeTeXinterwordspaceshaping
344 \do{AnatolianHieroglyphs
}{"
014400}{"
01467F
}
345 \do{Carian
}{"
0102A0
}{"
0102DF
}
346 \do{Duployan
}{"
01BC00
}{"
01BC9F
}
347 \do{OldItalic
}{"
010300}{"
01032F
}
348 \do{OldNorthArabian
}{"
010A80
}{"
010A9F
}
349 \do{OldSouthArabian
}{"
010A60
}{"
010A7F
}
350 \do{OldTurkic
}{"
010C00
}{"
010C4F
}
351 \do{PhaistosDisc
}{"
0101D0
}{"
0101FF
}
352 \do{ShorthandFormatControls
}{"
01BCA0
}{"
01BCAF
}
353 \do{SupplementaryPrivateUseAreaA
}{"
0F0000
}{"
0FFFFF
}
354 \do{SupplementaryPrivateUseAreaB
}{"
0100000}{"
010FFFF
}
355 % Unicode 8.0 additions
356 \do{EarlyDynasticCuneiform
}{"
012480}{"
01254F
}
357 \do{Hatran
}{"
0108E0
}{"
0108FF
}
358 \do{Multani
}{"
011280}{"
0112AF
}
359 \do{SuttonSignWriting
}{"
01D800
}{"
01DAAF
}
360 % Unicode 9.0 additions
361 \do{Adlam
}{"
01E900
}{"
01E95F
}
362 \do{Bhaiksuki
}{"
011C00
}{"
011C6F
}
363 \do{Marchen
}{"
011C70
}{"
011CBF
}
364 \do{Newa
}{"
011400}{"
01147F
}
365 \do{Osage
}{"
0104B0
}{"
0104FF
}
366 \do{Tangut
}{"
017000}{"
0187FF
}
367 \do{TangutComponents
}{"
018800}{"
018AFF
}
368 % Unicode 10.0 additions
369 \do{MasaramGondi
}{"
011D00
}{"
011D5F
}
370 \do{Nushu
}{"
01B170
}{"
01B2FF
}
371 \do{Soyombo
}{"
011A50
}{"
011AAF
}
372 \do{ZanabazarSquare
}{"
011A00
}{"
011A4F
}
373 % Unicode 11.0 additions
374 \do{ChessSymbols
}{"
01FA00
}{"
01FA6F
}
375 \do{Dogra
}{"
011800}{"
01184F
}
376 \do{GunjalaGondi
}{"
011D60
}{"
011DAF
}
377 \do{HanifiRohingya
}{"
010D00
}{"
010D3F
}
378 \do{IndicSiyaqNumbers
}{"
01EC70
}{"
01ECBF
}
379 \do{Makasar
}{"
011EE0
}{"
011EFF
}
380 \do{MayanNumerals
}{"
01D2E0
}{"
01D2FF
}
381 \do{Medefaidrin
}{"
016E40
}{"
016E9F
}
382 \do{OldSogdian
}{"
010F00
}{"
010F2F
}
383 \do{Sogdian
}{"
010F30
}{"
010F6F
}
384 % Unicode 12.0 additions
385 \do{EgyptianHieroglyphFormatControls
}{"
013430}{"
01343F
}
386 \do{Elymaic
}{"
010FE0
}{"
010FFF
}
387 \do{Nandinagari
}{"
0119A0
}{"
0119FF
}
388 \do{NyiakengPuachueHmong
}{"
01E100
}{"
01E14F
}
389 \do{OttomanSiyaqNumbers
}{"
01ED00
}{"
01ED4F
}
390 \do{TamilSupplement
}{"
011FC0
}{"
011FFF
}
391 \do{Wancho
}{"
01E2C0
}{"
01E2FF
}
395 % ----------------------------------------------------------------------------
396 % Option handling lets the user turn off "load all" and selectively enable only those blocks
397 % they are interested in
398 % ----------------------------------------------------------------------------
400 % Each option starts with \overrideClassLoading; so any specified
401 % option will set |\if@overrideClassLoading| to true; when one has
402 % been scanned it's not necessary to set the conditional again. Then
403 % for block X we let \enableX to \@empty so that later on we can check
406 \newif\if@overrideClassLoading
407 \newcommand{\overrideClassLoading}{\@overrideClassLoadingtrue
408 \let\overrideClassLoading\relax}
410 \def\do#1#2#3{\DeclareOption{#1}%
411 {\overrideClassLoading\expandafter\let\csname enable
#1\endcsname\@empty
}}
412 % We execute the list with this definition of \do
416 % We define lists also for these groups
420 \doclass{CanadianSyllabics
}
421 \doclass{CherokeeFull
}
427 \doclass{EthiopicFull
}
428 \doclass{GeorgianFull
}
433 \doclass{Mathematics
}
434 \doclass{MongolianFull
}
435 \doclass{MyanmarFull
}
437 \doclass{Punctuation
}
438 \doclass{SundaneseFull
}
449 \do{ArabicPresentationFormsA
}
450 \do{ArabicPresentationFormsB
}
451 \do{ArabicSupplement
}
454 \def\CanadianSyllabicsClasses{
455 \do{UnifiedCanadianAboriginalSyllabics
}
456 \do{UnifiedCanadianAboriginalSyllabicsExtended
}
459 \def\CherokeeFullClasses{
461 \do{CherokeeSupplement
}
466 \do{BopomofoExtended
}
467 \do{CJKCompatibility
}
468 \do{CJKCompatibilityForms
}
469 \do{CJKCompatibilityIdeographs
}
470 \do{CJKCompatibilityIdeographsSupplement
}
471 \do{CJKRadicalsSupplement
}
473 \do{CJKSymbolsAndPunctuation
}
474 \do{CJKUnifiedIdeographs
}
475 \do{CJKUnifiedIdeographsExtensionA
}
476 \do{CJKUnifiedIdeographsExtensionB
}
477 \do{CJKUnifiedIdeographsExtensionC
}
478 \do{CJKUnifiedIdeographsExtensionD
}
479 \do{CJKUnifiedIdeographsExtensionE
}
480 \do{CJKUnifiedIdeographsExtensionF
}
481 \do{EnclosedCJKLettersAndMonths
}
482 \do{EnclosedIdeographicSupplement
}
483 \do{IdeographicDescriptionCharacters
}
484 \do{IdeographicSymbolsAndPunctuation
}
490 \do{BopomofoExtended
}
491 \do{CJKCompatibility
}
492 \do{CJKCompatibilityForms
}
493 \do{CJKCompatibilityIdeographs
}
494 \do{CJKCompatibilityIdeographsSupplement
}
495 \do{CJKRadicalsSupplement
}
497 \do{CJKSymbolsAndPunctuation
}
498 \do{CJKUnifiedIdeographs
}
499 \do{CJKUnifiedIdeographsExtensionA
}
500 \do{CJKUnifiedIdeographsExtensionB
}
501 \do{CJKUnifiedIdeographsExtensionC
}
502 \do{CJKUnifiedIdeographsExtensionD
}
503 \do{CJKUnifiedIdeographsExtensionE
}
504 \do{CJKUnifiedIdeographsExtensionF
}
505 \do{EnclosedCJKLettersAndMonths
}
506 \do{EnclosedIdeographicSupplement
}
507 \do{HalfwidthAndFullwidthForms
}
508 \do{HangulCompatibilityJamo
}
510 \do{HangulJamoExtendedA
}
511 \do{HangulJamoExtendedB
}
514 \do{IdeographicDescriptionCharacters
}
515 \do{IdeographicSymbolsAndPunctuation
}
521 \do{KatakanaPhoneticExtensions
}
522 \do{SmallKanaExtension
}
525 \def\CyrillicsClasses{
527 \do{CyrillicExtendedA
}
528 \do{CyrillicExtendedB
}
529 \do{CyrillicExtendedC
}
530 \do{CyrillicSupplement
}
531 \do{GlagoliticSupplement
}
535 \def\DevanagariClasses{
537 \do{DevanagariPostDanDa
}
538 \do{DevanagariPostMarks
}
539 \do{DevanagariPreMarks
}
542 \def\DiacriticsClasses{
543 \do{CombiningDiacriticalMarks
}
544 \do{CombiningDiacriticalMarksExtended
}
545 \do{CombiningDiacriticalMarksForSymbols
}
546 \do{CombiningDiacriticalMarksSupplement
}
547 \do{CombiningHalfMarks
}
548 \do{ModifierToneLetters
}
549 \do{SpacingModifierLetters
}
552 \def\EthiopicFullClasses{
554 \do{EthiopicExtended
}
555 \do{EthiopicExtendedA
}
556 \do{EthiopicSupplement
}
559 \def\GeorgianFullClasses{
561 \do{GeorgianExtended
}
562 \do{GeorgianSupplement
}
567 \do{CopticEpactNumbers
}
573 \do{HangulCompatibilityJamo
}
575 \do{HangulJamoExtendedA
}
576 \do{HangulJamoExtendedB
}
580 \def\JapaneseClasses{
581 \do{CJKUnifiedIdeographs
}
582 \do{HalfwidthAndFullwidthForms
}
589 \do{KatakanaPhoneticExtensions
}
593 \do{AlphabeticPresentationForms
}
595 \do{LatinExtendedAdditional
}
604 \def\MathematicsClasses{
605 \do{ArabicMathematicalAlphabeticSymbols
}
606 \do{MathematicalAlphanumericSymbols
}
607 \do{MathematicalOperators
}
608 \do{MiscellaneousMathematicalSymbolsA
}
609 \do{MiscellaneousMathematicalSymbolsB
}
610 \do{SupplementalMathematicalOperators
}
613 \def\MongolianFullClasses{
615 \do{MongolianSupplement
}
618 \def\MyanmarFullClasses{
620 \do{MyanmarExtendedA
}
621 \do{MyanmarExtendedB
}
624 \def\PhoneticsClasses{
626 \do{PhoneticExtensions
}
627 \do{PhoneticExtensionsSupplement
}
630 \def\PunctuationClasses{
631 \do{GeneralPunctuation
}
632 \do{SupplementalPunctuation
}
635 \def\SundaneseFullClasses{
637 \do{SundaneseSupplement
}
641 \do{AlchemicalSymbols
}
644 \do{ByzantineMusicalSymbols
}
650 \do{GeometricShapesExtended
}
651 \do{LetterlikeSymbols
}
652 \do{MiscellaneousSymbols
}
653 \do{MiscellaneousSymbolsAndArrows
}
654 \do{MiscellaneousSymbolsAndPictographs
}
655 \do{MiscellaneousTechnical
}
657 \do{OrnamentalDingbats
}
658 \do{SupplementalArrowsA
}
659 \do{SupplementalArrowsB
}
660 \do{SupplementalArrowsC
}
661 \do{SupplementalSymbolsAndPictographs
}
662 \do{SymbolsAndPictographsExtendedA
}
663 \do{TransportAndMapSymbols
}
666 \def\SyriacFullClasses{
668 \do{SyriacSupplement
}
671 \def\VedicMarksClasses{
672 \do{DevanagariExtended
}
685 % AnatolianHieroglyphs (see below)
686 \do{AncientGreekMusicalNotation
}
687 \do{AncientGreekNumbers
}
704 \do{CaucasianAlbanian
}
706 \do{CommonIndicNumberForms
}
708 \do{CountingRodNumerals
}
710 \do{CuneiformNumbersAndPunctuation
}
711 \do{CypriotSyllabary
}
714 % Duployan (see below)
715 \do{EarlyDynasticCuneiform
}
716 \do{EgyptianHieroglyphs
}
718 \do{EnclosedAlphanumerics
}
719 \do{EnclosedAlphanumericSupplement
}
728 \do{InscriptionalPahlavi
}
729 \do{InscriptionalParthian
}
743 \do{LinearBIdeograms
}
744 \do{LinearBSyllabary
}
754 \do{MeeteiMayekExtensions
}
757 \do{MeroiticHieroglyphs
}
768 % OldItalic (see below)
769 % OldNorthArabian (see below)
772 % OldSouthArabian (see below)
773 % OldTurkic (see below)
774 \do{OpticalCharacterRecognition
}
781 % PhaistosDisc (see below)
787 \do{RumiNumeralSymbols
}
793 % ShorthandFormatControls (see below)
796 \do{SinhalaArchaicNumbers
}
797 \do{SmallFormVariants
}
799 \do{SuperscriptsAndSubscripts
}
800 % SupplementaryPrivateUseAreaA (see below)
801 % SupplementaryPrivateUseAreaB (see below)
802 % SuttonSignWriting (see below)
810 \do{TaiXuanJingSymbols
}
823 \do{YijingHexagramSymbols
}
825 \ifdefined\XeTeXinterwordspaceshaping
827 \do{AnatolianHieroglyphs
}
833 \do{GeorgianExtended
}
836 \do{IndicSiyaqNumbers
}
852 \do{ShorthandFormatControls
}
855 \do{SupplementaryPrivateUseAreaA
}
856 \do{SupplementaryPrivateUseAreaB
}
857 \do{SuttonSignWriting
}
859 \do{TangutComponents
}
864 % For each class group Z we define the relative option
865 % \DeclareOption{Z}{\overrideClassLoading\enableX1\enableX2...\enableXn}
866 % where X1, X2, ..., Xn are the blocks belonging to class Z
869 \unexpanded{\expandafter\let\csname enable
#1\endcsname\@empty
}}
871 \begingroup\edef\x{\endgroup\noexpand\DeclareOption{#1}{%
872 \noexpand\overrideClassLoading\csname #1Classes
\endcsname}}\x}
876 \ProcessOptions\relax
878 % If no option has been given, \if@overrideClassLoading will still be
879 % false, and in this case we enable *all* blocks (again by defining
880 % \enableX equal to \@empty for each block X
882 \if@overrideClassLoading
\else
883 \def\do#1#2#3{\expandafter\let\csname enable
#1\endcsname\@empty
}
887 % ----------------------------------------------------------------------------
888 % After dealing with the options, make sure we have the necessary packages available
889 % ----------------------------------------------------------------------------
891 % because this package relies on XeTeX's intercharclass sytem, better require XeTeX
892 \RequirePackage{ifxetex
}
895 % ----------------------------------------------------------------------------
896 % This package heavily exploits XeTeX's intercharclass system!
897 % ----------------------------------------------------------------------------
899 % enable/disable commands
900 \newcommand{\disableTransitionRules}{\XeTeXinterchartokenstate =
\z@
}
901 \newcommand{\enableTransitionRules}{\XeTeXinterchartokenstate = \@ne
}
904 \let\uccoff\disableTransitionRules
905 \let\uccon\enableTransitionRules
907 % make sure it's turned on
908 \enableTransitionRules
910 % ----------------------------------------------------------------------------
911 % And now, finally, we can start loading all the requested blocks
912 % ----------------------------------------------------------------------------
914 % \message{Package ucharclasses Message: Assigning character classes per
915 % Unicode block (this may take a while)}
917 %% We record the last allocated class before allocating ours;
918 %% \newXeTeXintercharclass saves in the counter
919 %% \xe@alloc@intercharclass the last allocated class number; initially
920 %% it's 3, but some other code might have allocated interchar classes
921 %% before loading this package; if \enableX is defined (to \@empty,
922 %% but that's irrelevant), an intercharclass is allocated by using the
923 %% list \AllClasses; two cases for block X:
925 %% (1) \enableX is defined: then \do{X}{a}{b} will become
926 %% \@defineUnicodeClass{X}{a}{b} which in turn will execute
927 %% \newXeTeXintercharclass\XClass and start a loop assigning code
928 %% points from a to b to this class
930 %% (2) \enableX is not defined: then \do{X}{a}{b} will become
931 %% \@gobblethree{X}{a}{b} and so nothing will be performed
933 \chardef\@classstart=
\xe@alloc@intercharclass
935 \providecommand\@gobblethree
[3]{}
937 \ifcsname enable
#1\endcsname
938 \expandafter\@defineUnicodeClass
940 \expandafter\@gobblethree
943 \def\@defineUnicodeClass
#1#2#3{%
944 \if@ucharclassverbose
\typeout{Defining
#1 Class
}\fi
945 \expandafter\newXeTeXintercharclass\csname #1Class
\endcsname
948 \if@ucharclassverbose
949 \typeout{\XeTeXcharclass\number\count@=
950 \expandafter\string\csname #1Class
\endcsname}%
952 \XeTeXcharclass\count@=
\csname #1Class
\endcsname
959 % finally, we record the end of our charclass range
960 \chardef\@classend=
\xe@alloc@intercharclass
963 %%% Our assigned classes go from \@classstart (excluded) to \@classend (included)
965 % ----------------------------------------------------------------------------
966 % Use: \setTransitionsFor{block name}{when entering this block}{when leaving this block}
967 % ----------------------------------------------------------------------------
969 \def\setTransitionsFor#1#2#3{%
970 \ifcsname enable
#1\endcsname
972 \loop\ifnum\count@<\@classend
974 \ifnum\count@=
\csname #1Class
\endcsname\else
975 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
976 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#3}%
979 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
980 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#3}%
982 \if@ucharclassverbose
983 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
989 % ----------------------------------------------------------------------------
990 % Use: \setTransitionTo{block name}{what to do when entering this block}
991 % ----------------------------------------------------------------------------
993 \def\setTransitionTo#1#2{%
994 \ifcsname enable
#1\endcsname
996 \loop\ifnum\count@<\@classend
998 \ifnum\count@=
\csname #1Class
\endcsname\else
999 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
1002 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
1004 \if@ucharclassverbose
1005 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1011 % ----------------------------------------------------------------------------
1012 % Use: \setTransitionFrom{block name }{what to do when leaving this block}
1013 % ----------------------------------------------------------------------------
1015 \def\setTransitionFrom#1#2{%
1016 \ifcsname enable
#1\endcsname
1017 \count@=\@classstart
1018 \loop\ifnum\count@<\@classend
1020 \ifnum\count@=
\csname #1Class
\endcsname\else
1021 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#2}%
1024 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#2}%
1026 \if@ucharclassverbose
1027 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
1033 % ----------------------------------------------------------------------------
1034 % Informal Block Rules - for these, to/from must always be defined
1036 % Available informal groups are:
1039 % - CanadianSyllabics
1041 % - Chinese (including bopomofo)
1042 % - CJK (Chinese/Japanese/Korean)
1049 % - Japanese (it is advised to set CJK first to a catch-all, then set
1050 % Japanese for specifics)
1051 % - Korean (=Hangul) (same comment as for Japanese)
1063 % - Other (I am not a fan of lump groups. I hope to un-lump most of it)
1065 % ----------------------------------------------------------------------------
1067 %% For each class group Z we define \setTransitionsForX as
1068 %% \newcommand\setTransitionsForZ[2]{%
1069 %% \setTransitionsFor{X1}{#1}{#2}
1070 %% \setTransitionsFor{X2}{#1}{#2}
1072 %% \setTransitionsFor{Xn}{#1}{#2}}
1073 %% where X1, X2, ..., Xn are the blocks in group Z
1075 \def\do#1{\noexpand\setTransitionsFor{#1}{###
#1}{###
#2}}
1077 \begingroup\edef\x{\endgroup
1078 \noexpand\newcommand
1079 \unexpanded\expandafter{\csname setTransitionsFor
#1\endcsname}[2]%
1080 {\csname #1Classes
\endcsname}}\x}
1084 % ----------------------------------------------------------------------------
1086 % based on the previous informal groups, we can define a catch-all transition command
1088 % ----------------------------------------------------------------------------
1090 %% The following is equivalent to define
1091 %% \newcommand{\setDefaultTransitions[2]{
1092 %% \setTransitionsForArabic{#1}{#2}
1094 %% \setTransitionsForOther{#1}{#2}}
1097 \expandafter\noexpand\csname setTransitionsFor
#1\endcsname{###
#1}{###
#2}}
1098 \begingroup\edef\x{\endgroup
1099 \noexpand\newcommand\noexpand\setDefaultTransitions[2]{%
1102 % ----------------------------------------------------------------------------
1104 \let\do\@undefined
\let\doclass\@undefined
1107 % End of file `ucharclasses.sty'.