1 ;;; nxml-uchnm.el --- support for Unicode standard cha names in nxml-mode
3 ;; Copyright (C) 2003, 2007-2013 Free Software Foundation, Inc.
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; This enables the use of the character names defined in the Unicode
26 ;; Standard. The use of the names can be controlled on a per-block
27 ;; basis, so as both to reduce memory usage and loading time,
28 ;; and to make completion work better.
34 (defconst nxml-unicode-blocks
35 '(("Basic Latin" #x0000
#x007F
)
36 ("Latin-1 Supplement" #x0080
#x00FF
)
37 ("Latin Extended-A" #x0100
#x017F
)
38 ("Latin Extended-B" #x0180
#x024F
)
39 ("IPA Extensions" #x0250
#x02AF
)
40 ("Spacing Modifier Letters" #x02B0
#x02FF
)
41 ("Combining Diacritical Marks" #x0300
#x036F
)
42 ("Greek and Coptic" #x0370
#x03FF
)
43 ("Cyrillic" #x0400
#x04FF
)
44 ("Cyrillic Supplementary" #x0500
#x052F
)
45 ("Armenian" #x0530
#x058F
)
46 ("Hebrew" #x0590
#x05FF
)
47 ("Arabic" #x0600
#x06FF
)
48 ("Syriac" #x0700
#x074F
)
49 ("Thaana" #x0780
#x07BF
)
50 ("Devanagari" #x0900
#x097F
)
51 ("Bengali" #x0980
#x09FF
)
52 ("Gurmukhi" #x0A00
#x0A7F
)
53 ("Gujarati" #x0A80
#x0AFF
)
54 ("Oriya" #x0B00
#x0B7F
)
55 ("Tamil" #x0B80
#x0BFF
)
56 ("Telugu" #x0C00
#x0C7F
)
57 ("Kannada" #x0C80
#x0CFF
)
58 ("Malayalam" #x0D00
#x0D7F
)
59 ("Sinhala" #x0D80
#x0DFF
)
60 ("Thai" #x0E00
#x0E7F
)
62 ("Tibetan" #x0F00
#x0FFF
)
63 ("Myanmar" #x1000
#x109F
)
64 ("Georgian" #x10A0
#x10FF
)
65 ("Hangul Jamo" #x1100
#x11FF
)
66 ("Ethiopic" #x1200
#x137F
)
67 ("Cherokee" #x13A0
#x13FF
)
68 ("Unified Canadian Aboriginal Syllabics" #x1400
#x167F
)
69 ("Ogham" #x1680
#x169F
)
70 ("Runic" #x16A0
#x16FF
)
71 ("Tagalog" #x1700
#x171F
)
72 ("Hanunoo" #x1720
#x173F
)
73 ("Buhid" #x1740
#x175F
)
74 ("Tagbanwa" #x1760
#x177F
)
75 ("Khmer" #x1780
#x17FF
)
76 ("Mongolian" #x1800
#x18AF
)
77 ("Latin Extended Additional" #x1E00
#x1EFF
)
78 ("Greek Extended" #x1F00
#x1FFF
)
79 ("General Punctuation" #x2000
#x206F
)
80 ("Superscripts and Subscripts" #x2070
#x209F
)
81 ("Currency Symbols" #x20A0
#x20CF
)
82 ("Combining Diacritical Marks for Symbols" #x20D0
#x20FF
)
83 ("Letterlike Symbols" #x2100
#x214F
)
84 ("Number Forms" #x2150
#x218F
)
85 ("Arrows" #x2190
#x21FF
)
86 ("Mathematical Operators" #x2200
#x22FF
)
87 ("Miscellaneous Technical" #x2300
#x23FF
)
88 ("Control Pictures" #x2400
#x243F
)
89 ("Optical Character Recognition" #x2440
#x245F
)
90 ("Enclosed Alphanumerics" #x2460
#x24FF
)
91 ("Box Drawing" #x2500
#x257F
)
92 ("Block Elements" #x2580
#x259F
)
93 ("Geometric Shapes" #x25A0
#x25FF
)
94 ("Miscellaneous Symbols" #x2600
#x26FF
)
95 ("Dingbats" #x2700
#x27BF
)
96 ("Miscellaneous Mathematical Symbols-A" #x27C0
#x27EF
)
97 ("Supplemental Arrows-A" #x27F0
#x27FF
)
98 ("Braille Patterns" #x2800
#x28FF
)
99 ("Supplemental Arrows-B" #x2900
#x297F
)
100 ("Miscellaneous Mathematical Symbols-B" #x2980
#x29FF
)
101 ("Supplemental Mathematical Operators" #x2A00
#x2AFF
)
102 ("CJK Radicals Supplement" #x2E80
#x2EFF
)
103 ("Kangxi Radicals" #x2F00
#x2FDF
)
104 ("Ideographic Description Characters" #x2FF0
#x2FFF
)
105 ("CJK Symbols and Punctuation" #x3000
#x303F
)
106 ("Hiragana" #x3040
#x309F
)
107 ("Katakana" #x30A0
#x30FF
)
108 ("Bopomofo" #x3100
#x312F
)
109 ("Hangul Compatibility Jamo" #x3130
#x318F
)
110 ("Kanbun" #x3190
#x319F
)
111 ("Bopomofo Extended" #x31A0
#x31BF
)
112 ("Katakana Phonetic Extensions" #x31F0
#x31FF
)
113 ("Enclosed CJK Letters and Months" #x3200
#x32FF
)
114 ("CJK Compatibility" #x3300
#x33FF
)
115 ("CJK Unified Ideographs Extension A" #x3400
#x4DBF
)
116 ;;("CJK Unified Ideographs" #x4E00 #x9FFF)
117 ("Yi Syllables" #xA000
#xA48F
)
118 ("Yi Radicals" #xA490
#xA4CF
)
119 ;;("Hangul Syllables" #xAC00 #xD7AF)
120 ;;("High Surrogates" #xD800 #xDB7F)
121 ;;("High Private Use Surrogates" #xDB80 #xDBFF)
122 ;;("Low Surrogates" #xDC00 #xDFFF)
123 ;;("Private Use Area" #xE000 #xF8FF)
124 ;;("CJK Compatibility Ideographs" #xF900 #xFAFF)
125 ("Alphabetic Presentation Forms" #xFB00
#xFB4F
)
126 ("Arabic Presentation Forms-A" #xFB50
#xFDFF
)
127 ("Variation Selectors" #xFE00
#xFE0F
)
128 ("Combining Half Marks" #xFE20
#xFE2F
)
129 ("CJK Compatibility Forms" #xFE30
#xFE4F
)
130 ("Small Form Variants" #xFE50
#xFE6F
)
131 ("Arabic Presentation Forms-B" #xFE70
#xFEFF
)
132 ("Halfwidth and Fullwidth Forms" #xFF00
#xFFEF
)
133 ("Specials" #xFFF0
#xFFFF
)
134 ("Old Italic" #x10300
#x1032F
)
135 ("Gothic" #x10330
#x1034F
)
136 ("Deseret" #x10400
#x1044F
)
137 ("Byzantine Musical Symbols" #x1D000
#x1D0FF
)
138 ("Musical Symbols" #x1D100
#x1D1FF
)
139 ("Mathematical Alphanumeric Symbols" #x1D400
#x1D7FF
)
140 ;;("CJK Unified Ideographs Extension B" #x20000 #x2A6DF)
141 ;;("CJK Compatibility Ideographs Supplement" #x2F800 #x2FA1F)
142 ("Tags" #xE0000
#xE007F
)
143 ;;("Supplementary Private Use Area-A" #xF0000 #xFFFFF)
144 ;;("Supplementary Private Use Area-B" #x100000 #x10FFFF)
146 "List of Unicode blocks.
147 For each block there is a list (NAME FIRST LAST), where
148 NAME is a string giving the official name of the block,
149 FIRST is the first code-point and LAST is the last code-point.
150 Blocks containing only characters with algorithmic names or no names
153 (defun nxml-unicode-block-char-name-set (name)
154 "Return a symbol for a block whose official Unicode name is NAME.
155 The symbol is generated by downcasing and replacing each space
157 (intern (replace-regexp-in-string " " "-" (downcase name
))))
159 ;; This is intended to be a superset of the coverage
160 ;; of existing standard entity sets.
161 (defvar nxml-enabled-unicode-blocks-default
167 spacing-modifier-letters
168 combining-diacritical-marks
172 superscripts-and-subscripts
174 combining-diacritical-marks-for-symbols
178 mathematical-operators
179 miscellaneous-technical
181 optical-character-recognition
182 enclosed-alphanumerics
186 miscellaneous-symbols
188 miscellaneous-mathematical-symbols-a
189 supplemental-arrows-a
190 supplemental-arrows-b
191 miscellaneous-mathematical-symbols-b
192 supplemental-mathematical-operators
193 cjk-symbols-and-punctuation
194 alphabetic-presentation-forms
198 mathematical-alphanumeric-symbols
)
199 "Default value for `nxml-enabled-unicode-blocks'.")
201 (mapc (lambda (block)
202 (nxml-autoload-char-name-set
203 (nxml-unicode-block-char-name-set (car block
))
205 (format "nxml/%05X-%05X"
211 ;; Internal flag to control whether customize reloads the character tables.
212 ;; Should be set the first time the
213 (defvar nxml-internal-unicode-char-name-sets-enabled nil
)
215 (defcustom nxml-enabled-unicode-blocks nxml-enabled-unicode-blocks-default
216 "List of Unicode blocks for which Unicode character names are enabled.
217 Each block is identified by a symbol derived from the name
218 of the block by downcasing and replacing each space by a hyphen."
220 :set
(lambda (sym value
)
221 (set-default 'nxml-enabled-unicode-blocks value
)
222 (when nxml-internal-unicode-char-name-sets-enabled
223 (nxml-enable-unicode-char-name-sets)))
225 (mapcar (lambda (block)
226 `(const :tag
,(format "%s (%04X-%04X)"
230 ,(nxml-unicode-block-char-name-set
232 nxml-unicode-blocks
)))
235 (defun nxml-enable-unicode-char-name-sets ()
236 "Enable the use of Unicode standard names for characters.
237 The Unicode blocks for which names are enabled is controlled by
238 the variable `nxml-enabled-unicode-blocks'."
240 (setq nxml-internal-unicode-char-name-sets-enabled t
)
241 (mapc (lambda (block)
242 (nxml-disable-char-name-set
243 (nxml-unicode-block-char-name-set (car block
))))
245 (mapc (lambda (nameset)
246 (nxml-enable-char-name-set nameset
))
247 nxml-enabled-unicode-blocks
))
249 (provide 'nxml-uchnm
)
251 ;;; nxml-uchnm.el ends here