.gitignore tweaks
[emacs.git] / lisp / international / ucs-normalize.el
blob8839b00dfff54b77e0cd4a6abc503db4f304aaa3
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
3 ;; Copyright (C) 2009-2015 Free Software Foundation, Inc.
5 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
6 ;; Keywords: unicode, normalization
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; This program has passed the NormalizationTest-5.2.0.txt.
27 ;; References:
28 ;; http://www.unicode.org/reports/tr15/
29 ;; http://www.unicode.org/review/pr-29.html
31 ;; HFS-Normalization:
32 ;; Reference:
33 ;; http://developer.apple.com/technotes/tn/tn1150.html
35 ;; HFS Normalization excludes following area for decomposition.
37 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
38 ;; (Characters in this region will be composed.)
39 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
40 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
42 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
44 ;;;
45 ;;; Implementation Notes on NFC/HFS-NFC.
46 ;;;
48 ;; <Stages> Decomposition Composition
49 ;; NFD: 'nfd nil
50 ;; NFC: 'nfd t
51 ;; NFKD: 'nfkd nil
52 ;; NFKC: 'nfkd t
53 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
54 ;; HFS-NFC: 'hfs-nfd t
56 ;; Algorithm for Normalization
58 ;; Before normalization, following data will be prepared.
60 ;; 1. quick-check-list
62 ;; `quick-check-list' consists of characters that will be decomposed
63 ;; during normalization. It includes composition-exclusions,
64 ;; singletons, non-starter-decompositions and decomposable
65 ;; characters.
67 ;; `quick-check-regexp' will search the above characters plus
68 ;; combining characters.
70 ;; 2. decomposition-translation
72 ;; `decomposition-translation' is a translation table that will be
73 ;; used to decompose the characters.
76 ;; Normalization Process
78 ;; A. Searching (`ucs-normalize-region')
80 ;; Region is searched for `quick-check-regexp' to find possibly
81 ;; normalizable point.
83 ;; B. Identification of Normalization Block
85 ;; (1) start of the block
86 ;; If the searched character is a starter and not combining
87 ;; with previous character, then the beginning of the block is
88 ;; the searched character. If searched character is combining
89 ;; character, then previous character will be the target
90 ;; character
91 ;; (2) end of the block
92 ;; Block ends at non-composable starter character.
94 ;; C. Decomposition (`ucs-normalize-block')
96 ;; The entire block will be decomposed by
97 ;; `decomposition-translation' table.
99 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
101 ;; The block will be split to multiple samller blocks by starter
102 ;; characters. Each block is sorted, and composed if necessary.
104 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
106 ;; Composed blocks are collected and again composed.
108 ;;; Code:
110 (defconst ucs-normalize-version "1.2")
112 (eval-when-compile (require 'cl-lib))
114 (declare-function nfd "ucs-normalize" (char))
116 (eval-when-compile
118 (defconst ucs-normalize-composition-exclusions
119 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
120 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
121 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
122 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
123 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
124 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
125 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
126 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
127 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
128 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
129 #x1D1BF #x1D1C0)
130 "Composition Exclusion List.
131 This list is taken from
132 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
134 ;; Unicode ranges that decompositions & combining characters are defined.
135 (defvar check-range nil)
136 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
138 ;; Basic normalization functions
139 (defun nfd (char)
140 (let ((decomposition
141 (get-char-code-property char 'decomposition)))
142 (if (and decomposition (numberp (car decomposition))
143 (or (> (length decomposition) 1)
144 (/= (car decomposition) char)))
145 decomposition)))
147 (defun nfkd (char)
148 (let ((decomposition
149 (get-char-code-property char 'decomposition)))
150 (if (symbolp (car decomposition)) (cdr decomposition)
151 (if (or (> (length decomposition) 1)
152 (/= (car decomposition) char)) decomposition))))
154 (defun hfs-nfd (char)
155 (when (or (and (>= char 0) (< char #x2000))
156 (and (>= char #x3000) (< char #xf900))
157 (and (>= char #xfb00) (< char #x2f800))
158 (>= char #x30000))
159 (nfd char))))
161 (eval-and-compile
162 (defun ucs-normalize-hfs-nfd-comp-p (char)
163 (and (>= char #x2000) (< char #x3000)))
165 (defsubst ucs-normalize-ccc (char)
166 (get-char-code-property char 'canonical-combining-class))
169 ;; Data common to all normalizations
171 (eval-when-compile
173 (defvar combining-chars nil)
174 (setq combining-chars nil)
175 (defvar decomposition-pair-to-composition nil)
176 (setq decomposition-pair-to-composition nil)
177 (defvar non-starter-decompositions nil)
178 (setq non-starter-decompositions nil)
179 (let ((char 0) ccc decomposition)
180 (mapc
181 (lambda (start-end)
182 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
183 (setq ccc (ucs-normalize-ccc char))
184 (setq decomposition (get-char-code-property
185 char 'decomposition))
186 (if (and (= (length decomposition) 1)
187 (= (car decomposition) char))
188 (setq decomposition nil))
189 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
190 (if (and (numberp (car decomposition))
191 (/= (ucs-normalize-ccc (car decomposition))
193 (add-to-list 'non-starter-decompositions char))
194 (when (numberp (car decomposition))
195 (if (and (= 2 (length decomposition))
196 (null (memq char ucs-normalize-composition-exclusions))
197 (null (memq char non-starter-decompositions)))
198 (setq decomposition-pair-to-composition
199 (cons (cons decomposition char)
200 decomposition-pair-to-composition)))
201 ;; If not singleton decomposition, second and later characters in
202 ;; decomposition will be the subject of combining characters.
203 (if (cdr decomposition)
204 (dolist (char (cdr decomposition))
205 (add-to-list 'combining-chars char))))))
206 check-range))
208 (setq combining-chars
209 (append combining-chars
210 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
211 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
212 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
213 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
216 (eval-and-compile
217 (defun ucs-normalize-make-hash-table-from-alist (alist)
218 (let ((table (make-hash-table :test 'equal :size 2000)))
219 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
220 table))
222 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
223 "Hashtable of decomposed pair to primary composite.
224 Note that Hangul are excluded.")
225 (setq ucs-normalize-decomposition-pair-to-primary-composite
226 (ucs-normalize-make-hash-table-from-alist
227 (eval-when-compile decomposition-pair-to-composition)))
229 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
230 "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE."
231 (let ((char (or (gethash decomposition-pair
232 ucs-normalize-decomposition-pair-to-primary-composite)
233 (and (<= #x1100 (car decomposition-pair))
234 (< (car decomposition-pair) #x1113)
235 (<= #x1161 (cadr decomposition-pair))
236 (< (car decomposition-pair) #x1176)
237 (let ((lindex (- (car decomposition-pair) #x1100))
238 (vindex (- (cadr decomposition-pair) #x1161)))
239 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
240 (and (<= #xac00 (car decomposition-pair))
241 (< (car decomposition-pair) #xd7a4)
242 (<= #x11a7 (cadr decomposition-pair))
243 (< (cadr decomposition-pair) #x11c3)
244 (= 0 (% (- (car decomposition-pair) #xac00) 28))
245 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
246 (+ (car decomposition-pair) tindex))))))
247 (if (and char
248 (functionp composition-predicate)
249 (null (funcall composition-predicate char)))
250 nil char)))
253 (defvar ucs-normalize-combining-chars nil)
254 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
256 (defvar ucs-normalize-combining-chars-regexp nil
257 "Regular expression to match sequence of combining characters.")
258 (setq ucs-normalize-combining-chars-regexp
259 (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+")))
261 (declare-function decomposition-translation-alist "ucs-normalize"
262 (decomposition-function))
263 (declare-function decomposition-char-recursively "ucs-normalize"
264 (char decomposition-function))
265 (declare-function alist-list-to-vector "ucs-normalize" (alist))
267 (eval-when-compile
269 (defun decomposition-translation-alist (decomposition-function)
270 (let (decomposition alist)
271 (mapc
272 (lambda (start-end)
273 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
274 (setq decomposition (funcall decomposition-function char))
275 (if decomposition
276 (setq alist (cons (cons char
277 (apply 'append
278 (mapcar (lambda (x)
279 (decomposition-char-recursively
280 x decomposition-function))
281 decomposition)))
282 alist)))))
283 check-range)
284 alist))
286 (defun decomposition-char-recursively (char decomposition-function)
287 (let ((decomposition (funcall decomposition-function char)))
288 (if decomposition
289 (apply 'append
290 (mapcar (lambda (x)
291 (decomposition-char-recursively x decomposition-function))
292 decomposition))
293 (list char))))
295 (defun alist-list-to-vector (alist)
296 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
298 (defvar nfd-alist nil)
299 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
300 (defvar nfkd-alist nil)
301 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
302 (defvar hfs-nfd-alist nil)
303 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
306 (eval-and-compile
307 (defvar ucs-normalize-hangul-translation-alist nil)
308 (setq ucs-normalize-hangul-translation-alist
309 (let ((i 0) entries)
310 (while (< i 11172)
311 (setq entries
312 (cons (cons (+ #xac00 i)
313 (if (= 0 (% i 28))
314 (vector (+ #x1100 (/ i 588))
315 (+ #x1161 (/ (% i 588) 28)))
316 (vector (+ #x1100 (/ i 588))
317 (+ #x1161 (/ (% i 588) 28))
318 (+ #x11a7 (% i 28)))))
319 entries)
320 i (1+ i))) entries))
322 (defun ucs-normalize-make-translation-table-from-alist (alist)
323 (make-translation-table-from-alist
324 (append alist ucs-normalize-hangul-translation-alist)))
326 (define-translation-table 'ucs-normalize-nfd-table
327 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
328 (define-translation-table 'ucs-normalize-nfkd-table
329 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
330 (define-translation-table 'ucs-normalize-hfs-nfd-table
331 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
333 (defun ucs-normalize-sort (chars)
334 "Sort by canonical combining class of CHARS."
335 (sort chars
336 (lambda (ch1 ch2)
337 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
339 (defun ucs-normalize-compose-chars (chars composition-predicate)
340 "Compose CHARS by COMPOSITION-PREDICATE.
341 CHARS must be sorted and normalized in starter-combining pairs."
342 (if composition-predicate
343 (let* ((starter (car chars))
344 remain result prev-ccc
345 (target-chars (cdr chars))
346 target target-ccc
347 primary-composite)
348 (while target-chars
349 (setq target (car target-chars)
350 target-ccc (ucs-normalize-ccc target))
351 (if (and (or (null prev-ccc)
352 (< prev-ccc target-ccc))
353 (setq primary-composite
354 (ucs-normalize-primary-composite (list starter target)
355 composition-predicate)))
356 ;; case 1: composable
357 (setq starter primary-composite
358 prev-ccc nil)
359 (if (= 0 target-ccc)
360 ;; case 2: move starter
361 (setq result (nconc result (cons starter (nreverse remain)))
362 starter target
363 remain nil)
364 ;; case 3: move target
365 (setq prev-ccc target-ccc
366 remain (cons target remain))))
367 (setq target-chars (cdr target-chars)))
368 (nconc result (cons starter (nreverse remain))))
369 chars))
371 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
372 "Try composing CHARS by COMPOSITION-PREDICATE.
373 If COMPOSITION-PREDICATE is not given, then do nothing."
374 (let ((chars (ucs-normalize-sort chars)))
375 (if composition-predicate
376 (ucs-normalize-compose-chars chars composition-predicate)
377 chars)))
380 (declare-function quick-check-list "ucs-normalize"
381 (decomposition-translation &optional composition-predicate))
382 (declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
384 (eval-when-compile
386 (defun quick-check-list (decomposition-translation
387 &optional composition-predicate)
388 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
389 It includes Singletons, CompositionExclusions, and Non-Starter
390 decomposition."
391 (let (entries decomposition composition)
392 (mapc
393 (lambda (start-end)
394 (cl-do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
395 (setq decomposition
396 (string-to-list
397 (with-temp-buffer
398 (insert i)
399 (translate-region 1 2 decomposition-translation)
400 (buffer-string))))
401 (setq composition
402 (ucs-normalize-block-compose-chars decomposition composition-predicate))
403 (when (not (equal composition (list i)))
404 (setq entries (cons i entries)))))
405 check-range)
406 ;;(remove-duplicates
407 (append entries
408 ucs-normalize-composition-exclusions
409 non-starter-decompositions)))
412 (defvar nfd-quick-check-list nil)
413 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
414 (defvar nfc-quick-check-list nil)
415 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
416 (defvar nfkd-quick-check-list nil)
417 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
418 (defvar nfkc-quick-check-list nil)
419 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
420 (defvar hfs-nfd-quick-check-list nil)
421 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
422 'ucs-normalize-hfs-nfd-comp-p))
423 (defvar hfs-nfc-quick-check-list nil)
424 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
426 (defun quick-check-list-to-regexp (quick-check-list)
427 (regexp-opt (mapcar 'char-to-string (append quick-check-list combining-chars))))
429 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
430 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
432 (defun quick-check-composition-list-to-regexp (quick-check-list)
433 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
437 ;; NFD/NFC
438 (defvar ucs-normalize-nfd-quick-check-regexp nil)
439 (setq ucs-normalize-nfd-quick-check-regexp
440 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
441 (defvar ucs-normalize-nfc-quick-check-regexp nil)
442 (setq ucs-normalize-nfc-quick-check-regexp
443 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
445 ;; NFKD/NFKC
446 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
447 (setq ucs-normalize-nfkd-quick-check-regexp
448 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
449 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
450 (setq ucs-normalize-nfkc-quick-check-regexp
451 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
453 ;; HFS-NFD/HFS-NFC
454 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
455 (setq ucs-normalize-hfs-nfd-quick-check-regexp
456 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
457 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
458 (setq ucs-normalize-hfs-nfc-quick-check-regexp
459 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
461 ;;------------------------------------------------------------------------------------------
463 ;; Normalize local region.
465 (defun ucs-normalize-block
466 (from to &optional decomposition-translation-table composition-predicate)
467 "Normalize region FROM TO, by sorting the region with canonical-cc.
468 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
469 before sorting. If COMPOSITION-PREDICATE is given, then compose
470 the region by using it."
471 (save-restriction
472 (narrow-to-region from to)
473 (goto-char (point-min))
474 (if decomposition-translation-table
475 (translate-region from to decomposition-translation-table))
476 (goto-char (point-min))
477 (let ((start (point)) chars); ccc)
478 (while (not (eobp))
479 (forward-char)
480 (when (or (eobp)
481 (= 0 (ucs-normalize-ccc (char-after (point)))))
482 (setq chars
483 (nconc chars
484 (ucs-normalize-block-compose-chars
485 (string-to-list (buffer-substring start (point)))
486 composition-predicate))
487 start (point)))
488 ;;(unless ccc (error "Undefined character can not be normalized!"))
490 (delete-region (point-min) (point-max))
491 (apply 'insert
492 (ucs-normalize-compose-chars
493 chars composition-predicate)))))
495 (defun ucs-normalize-region
496 (from to quick-check-regexp translation-table composition-predicate)
497 "Normalize region from FROM to TO.
498 QUICK-CHECK-REGEXP is applied for searching the region.
499 TRANSLATION-TABLE will be used to decompose region.
500 COMPOSITION-PREDICATE will be used to compose region."
501 (save-excursion
502 (save-restriction
503 (narrow-to-region from to)
504 (goto-char (point-min))
505 (let (start-pos starter)
506 (while (re-search-forward quick-check-regexp nil t)
507 (setq starter (string-to-char (match-string 0)))
508 (setq start-pos (match-beginning 0))
509 (ucs-normalize-block
510 ;; from
511 (if (or (= start-pos (point-min))
512 (and (= 0 (ucs-normalize-ccc starter))
513 (not (memq starter ucs-normalize-combining-chars))))
514 start-pos (1- start-pos))
515 ;; to
516 (if (looking-at ucs-normalize-combining-chars-regexp)
517 (match-end 0) (1+ start-pos))
518 translation-table composition-predicate))))))
520 ;; --------------------------------------------------------------------------------
522 (defmacro ucs-normalize-string (ucs-normalize-region)
523 `(with-temp-buffer
524 (insert str)
525 (,ucs-normalize-region (point-min) (point-max))
526 (buffer-string)))
528 ;;;###autoload
529 (defun ucs-normalize-NFD-region (from to)
530 "Normalize the current region by the Unicode NFD."
531 (interactive "r")
532 (ucs-normalize-region from to
533 ucs-normalize-nfd-quick-check-regexp
534 'ucs-normalize-nfd-table nil))
535 ;;;###autoload
536 (defun ucs-normalize-NFD-string (str)
537 "Normalize the string STR by the Unicode NFD."
538 (ucs-normalize-string ucs-normalize-NFD-region))
540 ;;;###autoload
541 (defun ucs-normalize-NFC-region (from to)
542 "Normalize the current region by the Unicode NFC."
543 (interactive "r")
544 (ucs-normalize-region from to
545 ucs-normalize-nfc-quick-check-regexp
546 'ucs-normalize-nfd-table t))
547 ;;;###autoload
548 (defun ucs-normalize-NFC-string (str)
549 "Normalize the string STR by the Unicode NFC."
550 (ucs-normalize-string ucs-normalize-NFC-region))
552 ;;;###autoload
553 (defun ucs-normalize-NFKD-region (from to)
554 "Normalize the current region by the Unicode NFKD."
555 (interactive "r")
556 (ucs-normalize-region from to
557 ucs-normalize-nfkd-quick-check-regexp
558 'ucs-normalize-nfkd-table nil))
559 ;;;###autoload
560 (defun ucs-normalize-NFKD-string (str)
561 "Normalize the string STR by the Unicode NFKD."
562 (ucs-normalize-string ucs-normalize-NFKD-region))
564 ;;;###autoload
565 (defun ucs-normalize-NFKC-region (from to)
566 "Normalize the current region by the Unicode NFKC."
567 (interactive "r")
568 (ucs-normalize-region from to
569 ucs-normalize-nfkc-quick-check-regexp
570 'ucs-normalize-nfkd-table t))
571 ;;;###autoload
572 (defun ucs-normalize-NFKC-string (str)
573 "Normalize the string STR by the Unicode NFKC."
574 (ucs-normalize-string ucs-normalize-NFKC-region))
576 ;;;###autoload
577 (defun ucs-normalize-HFS-NFD-region (from to)
578 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
579 (interactive "r")
580 (ucs-normalize-region from to
581 ucs-normalize-hfs-nfd-quick-check-regexp
582 'ucs-normalize-hfs-nfd-table
583 'ucs-normalize-hfs-nfd-comp-p))
584 ;;;###autoload
585 (defun ucs-normalize-HFS-NFD-string (str)
586 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
587 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
588 ;;;###autoload
589 (defun ucs-normalize-HFS-NFC-region (from to)
590 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
591 (interactive "r")
592 (ucs-normalize-region from to
593 ucs-normalize-hfs-nfc-quick-check-regexp
594 'ucs-normalize-hfs-nfd-table t))
595 ;;;###autoload
596 (defun ucs-normalize-HFS-NFC-string (str)
597 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
598 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
600 ;; Post-read-conversion function for `utf-8-hfs'.
601 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
602 (save-excursion
603 (save-restriction
604 (narrow-to-region (point) (+ (point) len))
605 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
606 (- (point-max) (point-min)))))
608 ;; Pre-write conversion for `utf-8-hfs'.
609 (defun ucs-normalize-hfs-nfd-pre-write-conversion (from to)
610 (let ((old-buf (current-buffer)))
611 (set-buffer (generate-new-buffer " *temp*"))
612 (if (stringp from)
613 (insert from)
614 (insert-buffer-substring old-buf from to))
615 (ucs-normalize-HFS-NFD-region (point-min) (point-max))
616 nil))
618 ;;; coding-system definition
619 (define-coding-system 'utf-8-hfs
620 "UTF-8 based coding system for MacOS HFS file names.
621 The singleton characters in HFS normalization exclusion will not
622 be decomposed."
623 :coding-type 'utf-8
624 :mnemonic ?U
625 :charset-list '(unicode)
626 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
627 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
630 (provide 'ucs-normalize)
632 ;; Local Variables:
633 ;; coding: utf-8
634 ;; End:
636 ;;; ucs-normalize.el ends here