1 % ----------------------------------------------------------------------------
2 % This "ucharclasses" package sets up XeTeX character classes based on which
3 % unicode block a character is found in. It then allows transition rules to be defined
4 % when entering or leaving particular unicode blocks, the code of which gets inserted
5 % automatically when a transition from a character from one unicode block to a
6 % character from another unicode block is encountered by XeTeX
8 % Current compatibility should be Unicode 10.0.
11 % v2.1-2.3: Qing Lee, Werner Lemberg
12 % v2.0: Enrico Gregorio
13 % v1.0: Mike "Pomax" Kamermans
15 % Significant updates:
16 % v2.3: Unicode 10 support
17 % v2.2: Unicode 8.0 and LaTeX2e support
18 % v2.1: Uplift for the intercharclass updates introduced in XeTeX 0.99994
19 % v2.0: Rewritten to Vastly improve performance.
20 % v1.0: Unicode block switching using XeTeX intercharclasses.
22 % License: public domain (https://www.ctan.org/license/pd)
24 % ----------------------------------------------------------------------------
26 \ProvidesPackage{ucharclasses
}[2017/
08/
10 v2.3
.0 Unicode block character classes for XeLaTeX
]
28 \newif\if@ucharclassverbose
29 \DeclareOption{verbose
}{\@ucharclassverbosetrue
}
31 % ----------------------------------------------------------------------------
32 % The package options allow you to selectively enable certain unicode blocks
33 % ----------------------------------------------------------------------------
35 % We first define all blocks in a list together with their start and end
38 % Starting with XeTeX version 3.14159265-2.6-0.99994, the number of
39 % \XeTeXcharclass registers was extended from 256 to 4096 entries; some not
40 % so important blocks are thus provided only for this and newer versions.
41 % The boundary of character class was changed from 255 to 4095 correspondingly.
42 % The primitive \XeTeXinterwordspaceshaping was introduced by XeTeX 0.99994;
43 % we use it as a flag to identify this version.
45 % However, earlier version of LaTeX2e (before 2016/04/22 v2.0q) didn't provide
46 % support for 4096 entries; we thus have to override the hard-coded limit.
47 \ifdefined\e@alloc@intercharclass@top
48 \chardef\@ucharclass@boundary=
\e@alloc@intercharclass@top
50 \ifdefined\XeTeXinterwordspaceshaping
51 \chardef\@ucharclass@boundary=
4095 %
52 \def\newXeTeXintercharclass{%
53 \e@alloc
\XeTeXcharclass\chardef
54 \xe@alloc@intercharclass
\m@ne\@ucharclass@boundary
}
56 \chardef\@ucharclass@boundary=\@cclv
61 % Unicode 5.1 block definitions
62 \do{AegeanNumbers
}{"
010100}{"
01013F
}
63 \do{AlphabeticPresentationForms
}{"
0FB00
}{"
0FB4F
}
64 \do{AncientGreekMusicalNotation
}{"
01D200
}{"
01D24F
}
65 \do{AncientGreekNumbers
}{"
010140}{"
01018F
}
66 \do{AncientSymbols
}{"
010190}{"
0101CF
}
67 \do{Arabic
}{"
0600}{"
06FF
}
68 \do{ArabicPresentationFormsA
}{"
0FB50
}{"
0FDFF
}
69 \do{ArabicPresentationFormsB
}{"
0FE70
}{"
0FEFF
}
70 \do{ArabicSupplement
}{"
0750}{"
077F
}
71 \do{Armenian
}{"
0530}{"
058F
}
72 \do{Arrows
}{"
02190}{"
021FF
}
73 \do{Balinese
}{"
01B00
}{"
01B7F
}
74 \do{BasicLatin
}{"
0020}{"
007F
} % 0000..007F in Unicode standard
75 \do{Bengali
}{"
0980}{"
09FF
}
76 \do{BlockElements
}{"
02580}{"
0259F
}
77 \do{Bopomofo
}{"
03100}{"
0312F
}
78 \do{BopomofoExtended
}{"
031A0
}{"
031BF
}
79 \do{BoxDrawing
}{"
02500}{"
0257F
}
80 \do{BraillePatterns
}{"
02800}{"
028FF
}
81 \do{Buginese
}{"
01A00
}{"
01A1F
}
82 \do{Buhid
}{"
01740}{"
0175F
}
83 \do{ByzantineMusicalSymbols
}{"
01D000
}{"
01D0FF
}
85 \do{Cham
}{"
0AA00
}{"
0AA5F
}
86 \do{Cherokee
}{"
013A0
}{"
013FF
}
87 \do{CJKCompatibility
}{"
03300}{"
033FF
}
88 \do{CJKCompatibilityForms
}{"
0FE30
}{"
0FE4F
}
89 \do{CJKCompatibilityIdeographs
}{"
0F900
}{"
0FAFF
}
90 \do{CJKCompatibilityIdeographsSupplement
}{"
02F800
}{"
02FA1F
}
91 \do{CJKRadicalsSupplement
}{"
02E80
}{"
02EFF
}
92 \do{CJKStrokes
}{"
031C0
}{"
031EF
}
93 \do{CJKSymbolsAndPunctuation
}{"
03000}{"
0303F
}
94 \do{CJKUnifiedIdeographs
}{"
04E00
}{"
09FFF
}
95 \do{CJKUnifiedIdeographsExtensionA
}{"
03400}{"
04DBF
}
96 \do{CJKUnifiedIdeographsExtensionB
}{"
020000}{"
02A6DF
}
97 \do{CombiningDiacriticalMarks
}{"
0300}{"
036F
}
98 \do{CombiningDiacriticalMarksForSymbols
}{"
020D0
}{"
020FF
}
99 \do{CombiningDiacriticalMarksSupplement
}{"
01DC0
}{"
01DFF
}
100 \do{CombiningHalfMarks
}{"
0FE20
}{"
0FE2F
}
101 \do{ControlPictures
}{"
02400}{"
0243F
}
102 \do{Coptic
}{"
02C80
}{"
02CFF
}
103 \do{CountingRodNumerals
}{"
01D360
}{"
01D37F
}
104 \do{Cuneiform
}{"
012000}{"
0123FF
}
105 \do{CuneiformNumbersAndPunctuation
}{"
012400}{"
01247F
}
106 \do{CurrencySymbols
}{"
020A0
}{"
020CF
}
107 \do{CypriotSyllabary
}{"
010800}{"
01083F
}
108 \do{Cyrillic
}{"
0400}{"
04FF
}
109 \do{CyrillicExtendedA
}{"
02DE0
}{"
02DFF
}
110 \do{CyrillicExtendedB
}{"
0A640
}{"
0A69F
}
111 \do{CyrillicSupplement
}{"
0500}{"
052F
}
112 \do{Deseret
}{"
010400}{"
01044F
}
113 \do{Devanagari
}{"
0900}{"
097F
}
114 \do{Dingbats
}{"
02700}{"
027BF
}
115 \do{DominoTiles
}{"
01F030
}{"
01F09F
}
116 \do{EnclosedAlphanumerics
}{"
02460}{"
024FF
}
117 \do{EnclosedCJKLettersAndMonths
}{"
03200}{"
032FF
}
118 \do{Ethiopic
}{"
01200}{"
0137F
}
119 \do{EthiopicExtended
}{"
02D80
}{"
02DDF
}
120 \do{EthiopicSupplement
}{"
01380}{"
0139F
}
121 \do{GeneralPunctuation
}{"
02000}{"
0206F
}
122 \do{GeometricShapes
}{"
025A0
}{"
025FF
}
123 \do{Georgian
}{"
010A0
}{"
010FF
}
124 \do{GeorgianSupplement
}{"
02D00
}{"
02D2F
}
125 \do{Glagolitic
}{"
02C00
}{"
02C5F
}
126 \do{Gothic
}{"
010330}{"
01034F
}
127 \do{GreekAndCoptic
}{"
0370}{"
03FF
}
128 \do{GreekExtended
}{"
01F00
}{"
01FFF
}
129 \do{Gujarati
}{"
0A80
}{"
0AFF
}
130 \do{Gurmukhi
}{"
0A00
}{"
0A7F
}
131 \do{HalfwidthAndFullwidthForms
}{"
0FF00
}{"
0FFEF
}
132 \do{HangulCompatibilityJamo
}{"
03130}{"
0318F
}
133 \do{HangulJamo
}{"
01100}{"
011FF
}
134 \do{HangulSyllables
}{"
0AC00
}{"
0D7AF
}
135 \do{Hanunoo
}{"
01720}{"
0173F
}
136 \do{Hebrew
}{"
0590}{"
05FF
}
137 \do{Hiragana
}{"
03040}{"
0309F
}
138 \do{IdeographicDescriptionCharacters
}{"
02FF0
}{"
02FFF
}
139 \do{IPAExtensions
}{"
0250}{"
02AF
}
140 \do{Kanbun
}{"
03190}{"
0319F
}
141 \do{KangxiRadicals
}{"
02F00
}{"
02FDF
}
142 \do{Kannada
}{"
0C80
}{"
0CFF
}
143 \do{Katakana
}{"
030A0
}{"
030FF
}
144 \do{KatakanaPhoneticExtensions
}{"
031F0
}{"
031FF
}
145 \do{KayahLi
}{"
0A900
}{"
0A92F
}
146 \do{Kharoshthi
}{"
010A00
}{"
010A5F
}
147 \do{Khmer
}{"
01780}{"
017FF
}
148 \do{KhmerSymbols
}{"
019E0
}{"
019FF
}
149 \do{Lao
}{"
0E80
}{"
0EFF
}
150 \do{LatinExtendedAdditional
}{"
01E00
}{"
01EFF
}
151 \do{LatinExtendedA
}{"
0100}{"
017F
}
152 \do{LatinExtendedB
}{"
0180}{"
024F
}
153 \do{LatinExtendedC
}{"
02C60
}{"
02C7F
}
154 \do{LatinExtendedD
}{"
0A720
}{"
0A7FF
}
155 \do{LatinSupplement
}{"
0080}{"
00FF
}
156 \do{Lepcha
}{"
01C00
}{"
01C4F
}
157 \do{LetterlikeSymbols
}{"
02100}{"
0214F
}
158 \do{Limbu
}{"
01900}{"
0194F
}
159 \do{LinearBIdeograms
}{"
010080}{"
0100FF
}
160 \do{LinearBSyllabary
}{"
010000}{"
01007F
}
161 \do{Lycian
}{"
010280}{"
01029F
}
162 \do{Lydian
}{"
010920}{"
01093F
}
163 \do{MahjongTiles
}{"
01F000
}{"
01F02F
}
164 \do{Malayalam
}{"
0D00
}{"
0D7F
}
165 \do{MathematicalAlphanumericSymbols
}{"
01D400
}{"
01D7FF
}
166 \do{MathematicalOperators
}{"
02200}{"
022FF
}
167 \do{MiscellaneousMathematicalSymbolsA
}{"
027C0
}{"
027EF
}
168 \do{MiscellaneousMathematicalSymbolsB
}{"
02980}{"
029FF
}
169 \do{MiscellaneousSymbols
}{"
02600}{"
026FF
}
170 \do{MiscellaneousSymbolsAndArrows
}{"
02B00
}{"
02BFF
}
171 \do{MiscellaneousTechnical
}{"
02300}{"
023FF
}
172 \do{ModifierToneLetters
}{"
0A700
}{"
0A71F
}
173 \do{Mongolian
}{"
01800}{"
018AF
}
174 \do{MusicalSymbols
}{"
01D100
}{"
01D1FF
}
175 \do{Myanmar
}{"
01000}{"
0109F
}
176 \do{NewTaiLue
}{"
01980}{"
019DF
}
177 \do{NKo
}{"
07C0
}{"
07FF
}
178 \do{NumberForms
}{"
02150}{"
0218F
}
179 \do{Ogham
}{"
01680}{"
0169F
}
180 \do{OlChiki
}{"
01C50
}{"
01C7F
}
181 % OldItalic (see below)
182 \do{OldPersian
}{"
0103A0
}{"
0103DF
}
183 \do{OpticalCharacterRecognition
}{"
02440}{"
0245F
}
184 \do{Oriya
}{"
0B00
}{"
0B7F
}
185 \do{Osmanya
}{"
010480}{"
0104AF
}
186 \do{PhagsPa
}{"
0A840
}{"
0A87F
}
187 % PhaistosDisc (see below)
188 \do{Phoenician
}{"
010900}{"
01091F
}
189 \do{PhoneticExtensions
}{"
01D00
}{"
01D7F
}
190 \do{PhoneticExtensionsSupplement
}{"
01D80
}{"
01DBF
}
191 \do{PrivateUseArea
}{"
0E000
}{"
0F8FF
}
192 \do{Rejang
}{"
0A930
}{"
0A95F
}
193 \do{Runic
}{"
016A0
}{"
016FF
}
194 \do{Saurashtra
}{"
0A880
}{"
0A8DF
}
195 \do{Shavian
}{"
010450}{"
01047F
}
196 \do{Sinhala
}{"
0D80
}{"
0DFF
}
197 \do{SmallFormVariants
}{"
0FE50
}{"
0FE6F
}
198 \do{SpacingModifierLetters
}{"
02B0
}{"
02FF
}
199 \do{Sundanese
}{"
01B80
}{"
01BBF
}
200 \do{SuperscriptsAndSubscripts
}{"
02070}{"
0209F
}
201 \do{SupplementalArrowsA
}{"
027F0
}{"
027FF
}
202 \do{SupplementalArrowsB
}{"
02900}{"
0297F
}
203 \do{SupplementalMathematicalOperators
}{"
02A00
}{"
02AFF
}
204 \do{SupplementalPunctuation
}{"
02E00
}{"
02E7F
}
205 % SupplementaryPrivateUseAreaA (see below)
206 % SupplementaryPrivateUseAreaB (see below)
207 \do{SylotiNagri
}{"
0A800
}{"
0A82F
}
208 \do{Syriac
}{"
0700}{"
074F
}
209 \do{Tagalog
}{"
01700}{"
0171F
}
210 \do{Tagbanwa
}{"
01760}{"
0177F
}
211 \do{Tags
}{"
0E0000
}{"
0E007F
}
212 \do{TaiLe
}{"
01950}{"
0197F
}
213 \do{TaiXuanJingSymbols
}{"
01D300
}{"
01D35F
}
214 \do{Tamil
}{"
0B80
}{"
0BFF
}
215 \do{Telugu
}{"
0C00
}{"
0C7F
}
216 \do{Thaana
}{"
0780}{"
07BF
}
217 \do{Thai
}{"
0E00
}{"
0E7F
}
218 \do{Tibetan
}{"
0F00
}{"
0FFF
}
219 \do{Tifinagh
}{"
02D30
}{"
02D7F
}
220 \do{Ugaritic
}{"
010380}{"
01039F
}
221 \do{UnifiedCanadianAboriginalSyllabics
}{"
01400}{"
0167F
}
222 \do{Vai
}{"
0A500
}{"
0A63F
}
223 \do{VerticalForms
}{"
0FE10
}{"
0FE1F
}
224 \do{YiRadicals
}{"
0A490
}{"
0A4CF
}
225 \do{YiSyllables
}{"
0A000
}{"
0A48F
}
226 \do{YijingHexagramSymbols
}{"
04DC0
}{"
04DFF
}
227 % Unicode 5.2 additions
228 \do{Avestan
}{"
010B00
}{"
010B3F
}
229 \do{Bamum
}{"
0A6A0
}{"
0A6FF
}
230 \do{CJKUnifiedIdeographsExtensionC
}{"
02A700
}{"
02B73F
}
231 \do{CommonIndicNumberForms
}{"
0A830
}{"
0A83F
}
232 \do{DevanagariExtended
}{"
0A8E0
}{"
0A8FF
}
233 \do{EgyptianHieroglyphs
}{"
013000}{"
01342F
}
234 \do{EnclosedAlphanumericSupplement
}{"
01F100
}{"
01F1FF
}
235 \do{EnclosedIdeographicSupplement
}{"
01F200
}{"
01F2FF
}
236 \do{HangulJamoExtendedA
}{"
0A960
}{"
0A97F
}
237 \do{HangulJamoExtendedB
}{"
0D7B0
}{"
0D7FF
}
238 \do{ImperialAramaic
}{"
010840}{"
01085F
}
239 \do{InscriptionalPahlavi
}{"
010B60
}{"
010B7F
}
240 \do{InscriptionalParthian
}{"
010B40
}{"
010B5F
}
241 \do{Javanese
}{"
0A980
}{"
0A9DF
}
242 \do{Kaithi
}{"
011080}{"
0110CF
}
243 \do{Lisu
}{"
0A4D0
}{"
0A4FF
}
244 \do{MeeteiMayek
}{"
0ABC0
}{"
0ABFF
}
245 \do{MyanmarExtendedA
}{"
0AA60
}{"
0AA7F
}
246 % OldSouthArabian (see below)
247 % OldTurkic (see below)
248 \do{RumiNumeralSymbols
}{"
010E60
}{"
010E7F
}
249 \do{Samaritan
}{"
0800}{"
083F
}
250 \do{TaiTham
}{"
01A20
}{"
01AAF
}
251 \do{TaiViet
}{"
0AA80
}{"
0AADF
}
252 \do{UnifiedCanadianAboriginalSyllabicsExtended
}{"
018B0
}{"
018FF
}
253 \do{VedicExtensions
}{"
01CD0
}{"
01CFF
}
254 % Unicode 6.0 additions
255 \do{AlchemicalSymbols
}{"
01F700
}{"
01F77F
}
256 \do{BamumSupplement
}{"
016800}{"
016A3F
}
257 \do{Batak
}{"
01BC0
}{"
01BFF
}
258 \do{Brahmi
}{"
011000}{"
01107F
}
259 \do{CJKUnifiedIdeographsExtensionD
}{"
02B740
}{"
02B81F
}
260 \do{Emoticons
}{"
01F600
}{"
01F64F
}
261 \do{EthiopicExtendedA
}{"
0AB00
}{"
0AB2F
}
262 \do{KanaSupplement
}{"
01B000
}{"
01B0FF
}
263 \do{Mandaic
}{"
0840}{"
085F
}
264 \do{MiscellaneousSymbolsAndPictographs
}{"
01F300
}{"
01F5FF
}
265 \do{PlayingCards
}{"
01F0A0
}{"
01F0FF
}
266 \do{TransportAndMapSymbols
}{"
01F680
}{"
01F6FF
}
267 % Unicode 6.1 additions
268 \do{ArabicExtendedA
}{"
08A0
}{"
08FF
}
269 \do{ArabicMathematicalAlphabeticSymbols
}{"
01EE00
}{"
01EEFF
}
270 \do{Chakma
}{"
011100}{"
01114F
}
271 \do{MeeteiMayekExtensions
}{"
0AAE0
}{"
0AAFF
}
272 \do{MeroiticCursive
}{"
0109A0
}{"
0109FF
}
273 \do{MeroiticHieroglyphs
}{"
010980}{"
01099F
}
274 \do{Miao
}{"
016F00
}{"
016F9F
}
275 \do{Sharada
}{"
011180}{"
0111DF
}
276 \do{SoraSompeng
}{"
0110D0
}{"
0110FF
}
277 \do{SundaneseSupplement
}{"
01CC0
}{"
01CCF
}
278 \do{Takri
}{"
011680}{"
0116CF
}
279 % Unicode 7.0 additions
280 \do{BassaVah
}{"
016AD0
}{"
016AFF
}
281 \do{CaucasianAlbanian
}{"
010530}{"
01056F
}
282 \do{CombiningDiacriticalMarksExtended
}{"
01AB0
}{"
01AFF
}
283 \do{CopticEpactNumbers
}{"
0102E0
}{"
0102FF
}
284 % Duployan (see below)
285 \do{Elbasan
}{"
010500}{"
01052F
}
286 \do{GeometricShapesExtended
}{"
01F780
}{"
01F7FF
}
287 \do{Grantha
}{"
011300}{"
01137F
}
288 \do{Khojki
}{"
011200}{"
01124F
}
289 \do{Khudawadi
}{"
0112B0
}{"
0112FF
}
290 \do{LatinExtendedE
}{"
0AB30
}{"
0AB6F
}
291 \do{LinearA
}{"
010600}{"
01077F
}
292 \do{Mahajani
}{"
011150}{"
01117F
}
293 \do{Manichaean
}{"
010AC0
}{"
010AFF
}
294 \do{MendeKikakui
}{"
01E800
}{"
01E8DF
}
295 \do{Modi
}{"
011600}{"
01165F
}
296 \do{Mro
}{"
016A40
}{"
016A6F
}
297 \do{MyanmarExtendedB
}{"
0A9E0
}{"
0A9FF
}
298 \do{Nabataean
}{"
010880}{"
0108AF
}
299 % OldNorthArabian (see below)
300 \do{OldPermic
}{"
010350}{"
01037F
}
301 \do{OrnamentalDingbats
}{"
01F650
}{"
01F67F
}
302 \do{PahawhHmong
}{"
016B00
}{"
016B8F
}
303 \do{Palmyrene
}{"
010860}{"
01087F
}
304 \do{PauCinHau
}{"
011AC0
}{"
011AFF
}
305 \do{PsalterPahlavi
}{"
010B80
}{"
010BAF
}
306 % ShorthandFormatControls (see below)
307 \do{Siddham
}{"
011580}{"
0115FF
}
308 \do{SinhalaArchaicNumbers
}{"
0111E0
}{"
0111FF
}
309 \do{SupplementalArrowsC
}{"
01F800
}{"
01F8FF
}
310 \do{Tirhuta
}{"
011480}{"
0114DF
}
311 \do{WarangCiti
}{"
0118A0
}{"
0118FF
}
312 % Unicode 8.0 additions
313 \do{Ahom
}{"
011700}{"
01173F
}
314 % AnatolianHieroglyphs (see below)
315 \do{CherokeeSupplement
}{"
0AB70
}{"
0ABBF
}
316 \do{CJKUnifiedIdeographsExtensionE
}{"
02B820
}{"
02CEAF
}
317 \do{EarlyDynasticCuneiform
}{"
012480}{"
01254F
}
318 \do{Hatran
}{"
0108E0
}{"
0108FF
}
319 \do{Multani
}{"
011280}{"
0112AF
}
320 \do{OldHungarian
}{"
010C80
}{"
010CFF
}
321 \do{SupplementalSymbolsAndPictographs
}{"
01F900
}{"
01F9FF
}
322 % SuttonSignWriting (see below)
323 % Unicode 9.0 additions needed for classes
324 \do{CyrillicExtendedC
}{"
01C80
}{"
01C8F
}
325 \do{GlagoliticSupplement
}{"
01E000
}{"
01E02F
}
326 \do{IdeographicSymbolsAndPunctuation
}{"
016FE0
}{"
016FFF
}
327 \do{MongolianSupplement
}{"
011660}{"
01167F
}
328 % Unicode 10.0 additions needed for classes
329 \do{CJKUnifiedIdeographsExtensionF
}{"
02CEB0
}{"
02EBEF
}
330 \do{KanaExtendedA
}{"
01B100
}{"
01B12F
}
331 \do{SyriacSupplement
}{"
0860}{"
086F
}
333 \ifdefined\XeTeXinterwordspaceshaping
334 \do{AnatolianHieroglyphs
}{"
014400}{"
01467F
}
335 \do{Carian
}{"
0102A0
}{"
0102DF
}
336 \do{Duployan
}{"
01BC00
}{"
01BC9F
}
337 \do{OldItalic
}{"
010300}{"
01032F
}
338 \do{OldNorthArabian
}{"
010A80
}{"
010A9F
}
339 \do{OldSouthArabian
}{"
010A60
}{"
010A7F
}
340 \do{OldTurkic
}{"
010C00
}{"
010C4F
}
341 \do{PhaistosDisc
}{"
0101D0
}{"
0101FF
}
342 \do{ShorthandFormatControls
}{"
01BCA0
}{"
01BCAF
}
343 \do{SupplementaryPrivateUseAreaA
}{"
0F0000
}{"
0FFFFF
}
344 \do{SupplementaryPrivateUseAreaB
}{"
0100000}{"
010FFFF
}
345 \do{SuttonSignWriting
}{"
01D800
}{"
01DAAF
}
346 % Unicode 9.0 additions
347 \do{Adlam
}{"
01E900
}{"
01E95F
}
348 \do{Bhaiksuki
}{"
011C00
}{"
011C6F
}
349 \do{Marchen
}{"
011C70
}{"
011CBF
}
350 \do{Newa
}{"
011400}{"
01147F
}
351 \do{Osage
}{"
0104B0
}{"
0104FF
}
352 \do{Tangut
}{"
017000}{"
0187FF
}
353 \do{TangutComponents
}{"
018800}{"
018AFF
}
354 % Unicode 10.0 additions
355 \do{MasaramGondi
}{"
011D00
}{"
011D5F
}
356 \do{Nushu
}{"
01B170
}{"
01B2FF
}
357 \do{Soyombo
}{"
011A50
}{"
011AAF
}
358 \do{ZanabazarSquare
}{"
011A00
}{"
011A4F
}
362 % ----------------------------------------------------------------------------
363 % Option handling lets the user turn off "load all" and selectively enable only those blocks
364 % they are interested in
365 % ----------------------------------------------------------------------------
367 % Each option starts with \overrideClassLoading; so any specified
368 % option will set |\if@overrideClassLoading| to true; when one has
369 % been scanned it's not necessary to set the conditional again. Then
370 % for block X we let \enableX to \@empty so that later on we can check
373 \newif\if@overrideClassLoading
374 \newcommand{\overrideClassLoading}{\@overrideClassLoadingtrue
375 \let\overrideClassLoading\relax}
377 \def\do#1#2#3{\DeclareOption{#1}%
378 {\overrideClassLoading\expandafter\let\csname enable
#1\endcsname\@empty
}}
379 % We execute the list with this definition of \do
383 % We define lists also for these groups
387 \doclass{CanadianSyllabics
}
388 \doclass{CherokeeFull
}
393 \doclass{EthiopicFull
}
394 \doclass{GeorgianFull
}
399 \doclass{Mathematics
}
400 \doclass{MongolianFull
}
401 \doclass{MyanmarFull
}
403 \doclass{Punctuation
}
404 \doclass{SundaneseFull
}
414 \do{ArabicPresentationFormsA
}
415 \do{ArabicPresentationFormsB
}
416 \do{ArabicSupplement
}
419 \def\CanadianSyllabicsClasses{
420 \do{UnifiedCanadianAboriginalSyllabics
}
421 \do{UnifiedCanadianAboriginalSyllabicsExtended
}
424 \def\CherokeeFullClasses{
426 \do{CherokeeSupplement
}
431 \do{BopomofoExtended
}
432 \do{CJKCompatibility
}
433 \do{CJKCompatibilityForms
}
434 \do{CJKCompatibilityIdeographs
}
435 \do{CJKCompatibilityIdeographsSupplement
}
436 \do{CJKRadicalsSupplement
}
438 \do{CJKSymbolsAndPunctuation
}
439 \do{CJKUnifiedIdeographs
}
440 \do{CJKUnifiedIdeographsExtensionA
}
441 \do{CJKUnifiedIdeographsExtensionB
}
442 \do{CJKUnifiedIdeographsExtensionC
}
443 \do{CJKUnifiedIdeographsExtensionD
}
444 \do{CJKUnifiedIdeographsExtensionE
}
445 \do{CJKUnifiedIdeographsExtensionF
}
446 \do{EnclosedCJKLettersAndMonths
}
447 \do{EnclosedIdeographicSupplement
}
448 \do{IdeographicDescriptionCharacters
}
449 \do{IdeographicSymbolsAndPunctuation
}
455 \do{BopomofoExtended
}
456 \do{CJKCompatibility
}
457 \do{CJKCompatibilityForms
}
458 \do{CJKCompatibilityIdeographs
}
459 \do{CJKCompatibilityIdeographsSupplement
}
460 \do{CJKRadicalsSupplement
}
462 \do{CJKSymbolsAndPunctuation
}
463 \do{CJKUnifiedIdeographs
}
464 \do{CJKUnifiedIdeographsExtensionA
}
465 \do{CJKUnifiedIdeographsExtensionB
}
466 \do{CJKUnifiedIdeographsExtensionC
}
467 \do{CJKUnifiedIdeographsExtensionD
}
468 \do{CJKUnifiedIdeographsExtensionE
}
469 \do{CJKUnifiedIdeographsExtensionF
}
470 \do{EnclosedCJKLettersAndMonths
}
471 \do{EnclosedIdeographicSupplement
}
472 \do{HalfwidthAndFullwidthForms
}
473 \do{HangulCompatibilityJamo
}
475 \do{HangulJamoExtendedA
}
476 \do{HangulJamoExtendedB
}
479 \do{IdeographicDescriptionCharacters
}
480 \do{IdeographicSymbolsAndPunctuation
}
486 \do{KatakanaPhoneticExtensions
}
489 \def\CyrillicsClasses{
491 \do{CyrillicExtendedA
}
492 \do{CyrillicExtendedB
}
493 \do{CyrillicExtendedC
}
494 \do{CyrillicSupplement
}
495 \do{GlagoliticSupplement
}
499 \def\DiacriticsClasses{
500 \do{CombiningDiacriticalMarks
}
501 \do{CombiningDiacriticalMarksExtended
}
502 \do{CombiningDiacriticalMarksForSymbols
}
503 \do{CombiningDiacriticalMarksSupplement
}
504 \do{CombiningHalfMarks
}
505 \do{ModifierToneLetters
}
506 \do{SpacingModifierLetters
}
509 \def\EthiopicFullClasses{
511 \do{EthiopicExtended
}
512 \do{EthiopicExtendedA
}
513 \do{EthiopicSupplement
}
516 \def\GeorgianFullClasses{
518 \do{GeorgianSupplement
}
523 \do{CopticEpactNumbers
}
529 \do{HangulCompatibilityJamo
}
531 \do{HangulJamoExtendedA
}
532 \do{HangulJamoExtendedB
}
536 \def\JapaneseClasses{
537 \do{CJKUnifiedIdeographs
}
538 \do{HalfwidthAndFullwidthForms
}
545 \do{KatakanaPhoneticExtensions
}
549 \do{AlphabeticPresentationForms
}
551 \do{LatinExtendedAdditional
}
560 \def\MathematicsClasses{
561 \do{ArabicMathematicalAlphabeticSymbols
}
562 \do{MathematicalAlphanumericSymbols
}
563 \do{MathematicalOperators
}
564 \do{MiscellaneousMathematicalSymbolsA
}
565 \do{MiscellaneousMathematicalSymbolsB
}
566 \do{SupplementalMathematicalOperators
}
569 \def\MongolianFullClasses{
571 \do{MongolianSupplement
}
574 \def\MyanmarFullClasses{
576 \do{MyanmarExtendedA
}
577 \do{MyanmarExtendedB
}
580 \def\PhoneticsClasses{
582 \do{PhoneticExtensions
}
583 \do{PhoneticExtensionsSupplement
}
586 \def\PunctuationClasses{
587 \do{GeneralPunctuation
}
588 \do{SupplementalPunctuation
}
591 \def\SundaneseFullClasses{
593 \do{SundaneseSupplement
}
597 \do{AlchemicalSymbols
}
600 \do{ByzantineMusicalSymbols
}
606 \do{GeometricShapesExtended
}
607 \do{LetterlikeSymbols
}
608 \do{MiscellaneousSymbols
}
609 \do{MiscellaneousSymbolsAndArrows
}
610 \do{MiscellaneousSymbolsAndPictographs
}
611 \do{MiscellaneousTechnical
}
613 \do{OrnamentalDingbats
}
614 \do{SupplementalArrowsA
}
615 \do{SupplementalArrowsB
}
616 \do{SupplementalArrowsC
}
617 \do{SupplementalSymbolsAndPictographs
}
618 \do{TransportAndMapSymbols
}
621 \def\SyriacFullClasses{
623 \do{SyriacSupplement
}
634 % AnatolianHieroglyphs (see below)
635 \do{AncientGreekMusicalNotation
}
636 \do{AncientGreekNumbers
}
653 \do{CaucasianAlbanian
}
655 \do{CommonIndicNumberForms
}
657 \do{CountingRodNumerals
}
659 \do{CuneiformNumbersAndPunctuation
}
660 \do{CypriotSyllabary
}
664 % Duployan (see below)
665 \do{EarlyDynasticCuneiform
}
666 \do{EgyptianHieroglyphs
}
668 \do{EnclosedAlphanumerics
}
669 \do{EnclosedAlphanumericSupplement
}
678 \do{InscriptionalPahlavi
}
679 \do{InscriptionalParthian
}
693 \do{LinearBIdeograms
}
694 \do{LinearBSyllabary
}
704 \do{MeeteiMayekExtensions
}
707 \do{MeroiticHieroglyphs
}
719 % OldItalic (see below)
720 % OldNorthArabian (see below)
723 % OldSouthArabian (see below)
724 % OldTurkic (see below)
725 \do{OpticalCharacterRecognition
}
732 % PhaistosDisc (see below)
738 \do{RumiNumeralSymbols
}
744 % ShorthandFormatControls (see below)
747 \do{SinhalaArchaicNumbers
}
748 \do{SmallFormVariants
}
750 \do{SuperscriptsAndSubscripts
}
751 % SupplementaryPrivateUseAreaA (see below)
752 % SupplementaryPrivateUseAreaB (see below)
753 % SuttonSignWriting (see below)
761 \do{TaiXuanJingSymbols
}
775 \do{YijingHexagramSymbols
}
777 \ifdefined\XeTeXinterwordspaceshaping
779 \do{AnatolianHieroglyphs
}
793 \do{ShorthandFormatControls
}
795 \do{SupplementaryPrivateUseAreaA
}
796 \do{SupplementaryPrivateUseAreaB
}
797 \do{SuttonSignWriting
}
799 \do{TangutComponents
}
804 % For each class group Z we define the relative option
805 % \DeclareOption{Z}{\overrideClassLoading\enableX1\enableX2...\enableXn}
806 % where X1, X2, ..., Xn are the blocks belonging to class Z
809 \unexpanded{\expandafter\let\csname enable
#1\endcsname\@empty
}}
811 \begingroup\edef\x{\endgroup\noexpand\DeclareOption{#1}{%
812 \noexpand\overrideClassLoading\csname #1Classes
\endcsname}}\x}
816 \ProcessOptions\relax
818 % If no option has been given, \if@overrideClassLoading will still be
819 % false, and in this case we enable *all* blocks (again by defining
820 % \enableX equal to \@empty for each block X
822 \if@overrideClassLoading
\else
823 \def\do#1#2#3{\expandafter\let\csname enable
#1\endcsname\@empty
}
827 % ----------------------------------------------------------------------------
828 % After dealing with the options, make sure we have the necessary packages available
829 % ----------------------------------------------------------------------------
831 % because this package relies on XeTeX's intercharclass sytem, better require XeTeX
832 \RequirePackage{ifxetex
}
835 % ----------------------------------------------------------------------------
836 % This package heavily exploits XeTeX's intercharclass system!
837 % ----------------------------------------------------------------------------
839 % enable/disable commands
840 \newcommand{\disableTransitionRules}{\XeTeXinterchartokenstate =
\z@
}
841 \newcommand{\enableTransitionRules}{\XeTeXinterchartokenstate = \@ne
}
844 \let\uccoff\disableTransitionRules
845 \let\uccon\enableTransitionRules
847 % make sure it's turned on
848 \enableTransitionRules
850 % ----------------------------------------------------------------------------
851 % And now, finally, we can start loading all the requested blocks
852 % ----------------------------------------------------------------------------
854 % \message{Package ucharclasses Message: Assigning character classes per
855 % Unicode block (this may take a while)}
857 %% We record the last allocated class before allocating ours;
858 %% \newXeTeXintercharclass saves in the counter
859 %% \xe@alloc@intercharclass the last allocated class number; initially
860 %% it's 3, but some other code might have allocated interchar classes
861 %% before loading this package; if \enableX is defined (to \@empty,
862 %% but that's irrelevant), an intercharclass is allocated by using the
863 %% list \AllClasses; two cases for block X:
865 %% (1) \enableX is defined: then \do{X}{a}{b} will become
866 %% \@defineUnicodeClass{X}{a}{b} which in turn will execute
867 %% \newXeTeXintercharclass\XClass and start a loop assigning code
868 %% points from a to b to this class
870 %% (2) \enableX is not defined: then \do{X}{a}{b} will become
871 %% \@gobblethree{X}{a}{b} and so nothing will be performed
873 \chardef\@classstart=
\xe@alloc@intercharclass
875 \providecommand\@gobblethree
[3]{}
877 \ifcsname enable
#1\endcsname
878 \expandafter\@defineUnicodeClass
880 \expandafter\@gobblethree
883 \def\@defineUnicodeClass
#1#2#3{%
884 \if@ucharclassverbose
\typeout{Defining
#1 Class
}\fi
885 \expandafter\newXeTeXintercharclass\csname #1Class
\endcsname
888 \if@ucharclassverbose
889 \typeout{\XeTeXcharclass\number\count@=
890 \expandafter\string\csname #1Class
\endcsname}%
892 \XeTeXcharclass\count@=
\csname #1Class
\endcsname
899 % finally, we record the end of our charclass range
900 \chardef\@classend=
\xe@alloc@intercharclass
903 %%% Our assigned classes go from \@classstart (excluded) to \@classend (included)
905 % ----------------------------------------------------------------------------
906 % Use: \setTransitionsFor{block name}{when entering this block}{when leaving this block}
907 % ----------------------------------------------------------------------------
909 \def\setTransitionsFor#1#2#3{%
910 \ifcsname enable
#1\endcsname
912 \loop\ifnum\count@<\@classend
914 \ifnum\count@=
\csname #1Class
\endcsname\else
915 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
916 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#3}%
919 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
920 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#3}%
922 \if@ucharclassverbose
923 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
929 % ----------------------------------------------------------------------------
930 % Use: \setTransitionTo{block name}{what to do when entering this block}
931 % ----------------------------------------------------------------------------
933 \def\setTransitionTo#1#2{%
934 \ifcsname enable
#1\endcsname
936 \loop\ifnum\count@<\@classend
938 \ifnum\count@=
\csname #1Class
\endcsname\else
939 \XeTeXinterchartoks\count@
\csname #1Class
\endcsname=
{#2}%
942 \XeTeXinterchartoks\@ucharclass@boundary
\csname #1Class
\endcsname=
{#2}%
944 \if@ucharclassverbose
945 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
951 % ----------------------------------------------------------------------------
952 % Use: \setTransitionFrom{block name }{what to do when leaving this block}
953 % ----------------------------------------------------------------------------
955 \def\setTransitionFrom#1#2{%
956 \ifcsname enable
#1\endcsname
958 \loop\ifnum\count@<\@classend
960 \ifnum\count@=
\csname #1Class
\endcsname\else
961 \XeTeXinterchartoks\csname #1Class
\endcsname \count@=
{#2}%
964 \XeTeXinterchartoks\csname #1Class
\endcsname\@ucharclass@boundary=
{#2}%
966 \if@ucharclassverbose
967 \PackageWarningNoLine{ucharclasses
}{Class
#1\MessageBreak
973 % ----------------------------------------------------------------------------
974 % Informal Block Rules - for these, to/from must always be defined
976 % Available informal groups are:
979 % - CanadianSyllabics
981 % - Chinese (including bopomofo)
982 % - CJK (Chinese/Japanese/Korean)
988 % - Japanese (it is advised to set CJK first to a catch-all, then set
989 % Japanese for specifics)
990 % - Korean (=Hangul) (same comment as for Japanese)
1001 % - Other (I am not a fan of lump groups. I hope to un-lump most of it)
1003 % ----------------------------------------------------------------------------
1005 %% For each class group Z we define \setTransitionsForX as
1006 %% \newcommand\setTransitionsForZ[2]{%
1007 %% \setTransitionsFor{X1}{#1}{#2}
1008 %% \setTransitionsFor{X2}{#1}{#2}
1010 %% \setTransitionsFor{Xn}{#1}{#2}}
1011 %% where X1, X2, ..., Xn are the blocks in group Z
1013 \def\do#1{\noexpand\setTransitionsFor{#1}{###
#1}{###
#2}}
1015 \begingroup\edef\x{\endgroup
1016 \noexpand\newcommand
1017 \unexpanded\expandafter{\csname setTransitionsFor
#1\endcsname}[2]%
1018 {\csname #1Classes
\endcsname}}\x}
1022 % ----------------------------------------------------------------------------
1024 % based on the previous informal groups, we can define a catch-all transition command
1026 % ----------------------------------------------------------------------------
1028 %% The following is equivalent to define
1029 %% \newcommand{\setDefaultTransitions[2]{
1030 %% \setTransitionsForArabic{#1}{#2}
1032 %% \setTransitionsForOther{#1}{#2}}
1035 \expandafter\noexpand\csname setTransitionsFor
#1\endcsname{###
#1}{###
#2}}
1036 \begingroup\edef\x{\endgroup
1037 \noexpand\newcommand\noexpand\setDefaultTransitions[2]{%
1040 % ----------------------------------------------------------------------------
1042 \let\do\@undefined
\let\doclass\@undefined
1045 % End of file `ucharclasses.sty'.