An electric test is now passing
[emacs.git] / lisp / international / ucs-normalize.el
blob6f1e770c09c372c9a63edd33e1787cb95603a8bc
1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
3 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
5 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
6 ;; Keywords: unicode, normalization
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; This program has passed the NormalizationTest-5.2.0.txt.
27 ;; References:
28 ;; http://www.unicode.org/reports/tr15/
29 ;; http://www.unicode.org/review/pr-29.html
31 ;; HFS-Normalization:
32 ;; Reference:
33 ;; https://developer.apple.com/library/archive/technotes/tn/tn1150.html
35 ;; HFS Normalization excludes following area for decomposition.
37 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
38 ;; (Characters in this region will be composed.)
39 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
40 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
42 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
44 ;;;
45 ;;; Implementation Notes on NFC/HFS-NFC.
46 ;;;
48 ;; <Stages> Decomposition Composition
49 ;; NFD: 'nfd nil
50 ;; NFC: 'nfd t
51 ;; NFKD: 'nfkd nil
52 ;; NFKC: 'nfkd t
53 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
54 ;; HFS-NFC: 'hfs-nfd t
56 ;; Algorithm for Normalization
58 ;; Before normalization, following data will be prepared.
60 ;; 1. quick-check-list
62 ;; `quick-check-list' consists of characters that will be decomposed
63 ;; during normalization. It includes composition-exclusions,
64 ;; singletons, non-starter-decompositions and decomposable
65 ;; characters.
67 ;; `quick-check-regexp' will search the above characters plus
68 ;; combining characters.
70 ;; 2. decomposition-translation
72 ;; `decomposition-translation' is a translation table that will be
73 ;; used to decompose the characters.
76 ;; Normalization Process
78 ;; A. Searching (`ucs-normalize-region')
80 ;; Region is searched for `quick-check-regexp' to find possibly
81 ;; normalizable point.
83 ;; B. Identification of Normalization Block
85 ;; (1) start of the block
86 ;; If the searched character is a starter and not combining
87 ;; with previous character, then the beginning of the block is
88 ;; the searched character. If searched character is combining
89 ;; character, then previous character will be the target
90 ;; character
91 ;; (2) end of the block
92 ;; Block ends at non-composable starter character.
94 ;; C. Decomposition (`ucs-normalize-block')
96 ;; The entire block will be decomposed by
97 ;; `decomposition-translation' table.
99 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
101 ;; The block will be split to multiple samller blocks by starter
102 ;; characters. Each block is sorted, and composed if necessary.
104 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
106 ;; Composed blocks are collected and again composed.
108 ;;; Code:
110 (defconst ucs-normalize-version "1.2")
112 (eval-when-compile
113 (require 'cl-lib)
114 (require 'regexp-opt))
116 (declare-function nfd "ucs-normalize" (char))
118 (eval-when-compile
120 (defconst ucs-normalize-composition-exclusions
121 '(#x0958 #x0959 #x095A #x095B #x095C #x095D #x095E #x095F
122 #x09DC #x09DD #x09DF #x0A33 #x0A36 #x0A59 #x0A5A #x0A5B
123 #x0A5E #x0B5C #x0B5D #x0F43 #x0F4D #x0F52 #x0F57 #x0F5C
124 #x0F69 #x0F76 #x0F78 #x0F93 #x0F9D #x0FA2 #x0FA7 #x0FAC
125 #x0FB9 #xFB1D #xFB1F #xFB2A #xFB2B #xFB2C #xFB2D #xFB2E
126 #xFB2F #xFB30 #xFB31 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36
127 #xFB38 #xFB39 #xFB3A #xFB3B #xFB3C #xFB3E #xFB40 #xFB41
128 #xFB43 #xFB44 #xFB46 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B
129 #xFB4C #xFB4D #xFB4E #x2ADC #x1D15E #x1D15F #x1D160 #x1D161
130 #x1D162 #x1D163 #x1D164 #x1D1BB #x1D1BC #x1D1BD #x1D1BE
131 #x1D1BF #x1D1C0)
132 "Composition Exclusion List.
133 This list is taken from
134 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
136 ;; Unicode ranges that decompositions & combining characters are defined.
137 (defvar check-range nil)
138 (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
140 ;; Basic normalization functions
141 (defun nfd (char)
142 (let ((decomposition
143 (get-char-code-property char 'decomposition)))
144 (if (and decomposition (numberp (car decomposition))
145 (or (> (length decomposition) 1)
146 (/= (car decomposition) char)))
147 decomposition)))
149 (defun nfkd (char)
150 (let ((decomposition
151 (get-char-code-property char 'decomposition)))
152 (if (symbolp (car decomposition)) (cdr decomposition)
153 (if (or (> (length decomposition) 1)
154 (/= (car decomposition) char)) decomposition))))
156 (defun hfs-nfd (char)
157 (when (or (and (>= char 0) (< char #x2000))
158 (and (>= char #x3000) (< char #xf900))
159 (and (>= char #xfb00) (< char #x2f800))
160 (>= char #x30000))
161 (nfd char))))
163 (eval-and-compile
164 (defun ucs-normalize-hfs-nfd-comp-p (char)
165 (and (>= char #x2000) (< char #x3000)))
167 (defsubst ucs-normalize-ccc (char)
168 (get-char-code-property char 'canonical-combining-class))
171 ;; Data common to all normalizations
173 (eval-when-compile
175 (defvar combining-chars nil)
176 (setq combining-chars nil)
177 (defvar decomposition-pair-to-composition nil)
178 (setq decomposition-pair-to-composition nil)
179 (defvar non-starter-decompositions nil)
180 (setq non-starter-decompositions nil)
181 ;; This file needs to access these 2 Unicode properties, but when we
182 ;; compile it during bootstrap, charprop.el was not built yet, and
183 ;; therefore is not yet loaded into bootstrap-emacs, so
184 ;; char-code-property-alist is nil, and get-char-code-property
185 ;; always returns nil, something the code here doesn't like.
186 (define-char-code-property 'decomposition "uni-decomposition.el")
187 (define-char-code-property 'canonical-combining-class "uni-combining.el")
188 (let ((char 0) ccc decomposition)
189 (mapc
190 (lambda (start-end)
191 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
192 (setq ccc (ucs-normalize-ccc char))
193 (setq decomposition (get-char-code-property
194 char 'decomposition))
195 (if (and (= (length decomposition) 1)
196 (= (car decomposition) char))
197 (setq decomposition nil))
198 (if (and ccc (/= 0 ccc)) (add-to-list 'combining-chars char))
199 (if (and (numberp (car decomposition))
200 (/= (ucs-normalize-ccc (car decomposition))
202 (add-to-list 'non-starter-decompositions char))
203 (when (numberp (car decomposition))
204 (if (and (= 2 (length decomposition))
205 (null (memq char ucs-normalize-composition-exclusions))
206 (null (memq char non-starter-decompositions)))
207 (setq decomposition-pair-to-composition
208 (cons (cons decomposition char)
209 decomposition-pair-to-composition)))
210 ;; If not singleton decomposition, second and later characters in
211 ;; decomposition will be the subject of combining characters.
212 (if (cdr decomposition)
213 (dolist (char (cdr decomposition))
214 (add-to-list 'combining-chars char))))))
215 check-range))
217 (setq combining-chars
218 (append combining-chars
219 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
220 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
221 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
222 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
225 (eval-and-compile
226 (defun ucs-normalize-make-hash-table-from-alist (alist)
227 (let ((table (make-hash-table :test 'equal :size 2000)))
228 (mapc (lambda (x) (puthash (car x) (cdr x) table)) alist)
229 table))
231 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
232 "Hash table of decomposed pair to primary composite.
233 Note that Hangul are excluded.")
234 (setq ucs-normalize-decomposition-pair-to-primary-composite
235 (ucs-normalize-make-hash-table-from-alist
236 (eval-when-compile decomposition-pair-to-composition)))
238 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate)
239 "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE."
240 (let ((char (or (gethash decomposition-pair
241 ucs-normalize-decomposition-pair-to-primary-composite)
242 (and (<= #x1100 (car decomposition-pair))
243 (< (car decomposition-pair) #x1113)
244 (<= #x1161 (cadr decomposition-pair))
245 (< (car decomposition-pair) #x1176)
246 (let ((lindex (- (car decomposition-pair) #x1100))
247 (vindex (- (cadr decomposition-pair) #x1161)))
248 (+ #xAC00 (* (+ (* lindex 21) vindex) 28))))
249 (and (<= #xac00 (car decomposition-pair))
250 (< (car decomposition-pair) #xd7a4)
251 (<= #x11a7 (cadr decomposition-pair))
252 (< (cadr decomposition-pair) #x11c3)
253 (= 0 (% (- (car decomposition-pair) #xac00) 28))
254 (let ((tindex (- (cadr decomposition-pair) #x11a7)))
255 (+ (car decomposition-pair) tindex))))))
256 (if (and char
257 (functionp composition-predicate)
258 (null (funcall composition-predicate char)))
259 nil char)))
262 (defvar ucs-normalize-combining-chars nil)
263 (setq ucs-normalize-combining-chars (eval-when-compile combining-chars))
265 (defvar ucs-normalize-combining-chars-regexp nil
266 "Regular expression to match sequence of combining characters.")
267 (setq ucs-normalize-combining-chars-regexp
268 (eval-when-compile (concat (regexp-opt-charset combining-chars) "+")))
270 (declare-function decomposition-translation-alist "ucs-normalize"
271 (decomposition-function))
272 (declare-function decomposition-char-recursively "ucs-normalize"
273 (char decomposition-function))
274 (declare-function alist-list-to-vector "ucs-normalize" (alist))
276 (eval-when-compile
278 (defun decomposition-translation-alist (decomposition-function)
279 (let (decomposition alist)
280 (mapc
281 (lambda (start-end)
282 (cl-do ((char (car start-end) (+ char 1))) ((> char (cdr start-end)))
283 (setq decomposition (funcall decomposition-function char))
284 (if decomposition
285 (setq alist (cons (cons char
286 (apply 'append
287 (mapcar (lambda (x)
288 (decomposition-char-recursively
289 x decomposition-function))
290 decomposition)))
291 alist)))))
292 check-range)
293 alist))
295 (defun decomposition-char-recursively (char decomposition-function)
296 (let ((decomposition (funcall decomposition-function char)))
297 (if decomposition
298 (apply 'append
299 (mapcar (lambda (x)
300 (decomposition-char-recursively x decomposition-function))
301 decomposition))
302 (list char))))
304 (defun alist-list-to-vector (alist)
305 (mapcar (lambda (x) (cons (car x) (apply 'vector (cdr x)))) alist))
307 (defvar nfd-alist nil)
308 (setq nfd-alist (alist-list-to-vector (decomposition-translation-alist 'nfd)))
309 (defvar nfkd-alist nil)
310 (setq nfkd-alist (alist-list-to-vector (decomposition-translation-alist 'nfkd)))
311 (defvar hfs-nfd-alist nil)
312 (setq hfs-nfd-alist (alist-list-to-vector (decomposition-translation-alist 'hfs-nfd)))
315 (eval-and-compile
316 (defvar ucs-normalize-hangul-translation-alist nil)
317 (setq ucs-normalize-hangul-translation-alist
318 (let ((i 0) entries)
319 (while (< i 11172)
320 (setq entries
321 (cons (cons (+ #xac00 i)
322 (if (= 0 (% i 28))
323 (vector (+ #x1100 (/ i 588))
324 (+ #x1161 (/ (% i 588) 28)))
325 (vector (+ #x1100 (/ i 588))
326 (+ #x1161 (/ (% i 588) 28))
327 (+ #x11a7 (% i 28)))))
328 entries)
329 i (1+ i))) entries))
331 (defun ucs-normalize-make-translation-table-from-alist (alist)
332 (make-translation-table-from-alist
333 (append alist ucs-normalize-hangul-translation-alist)))
335 (define-translation-table 'ucs-normalize-nfd-table
336 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist)))
337 (define-translation-table 'ucs-normalize-nfkd-table
338 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist)))
339 (define-translation-table 'ucs-normalize-hfs-nfd-table
340 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
342 (defun ucs-normalize-sort (chars)
343 "Sort by canonical combining class of CHARS."
344 (sort chars
345 (lambda (ch1 ch2)
346 (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
348 (defun ucs-normalize-compose-chars (chars composition-predicate)
349 "Compose CHARS by COMPOSITION-PREDICATE.
350 CHARS must be sorted and normalized in starter-combining pairs."
351 (if composition-predicate
352 (let* ((starter (car chars))
353 remain result prev-ccc
354 (target-chars (cdr chars))
355 target target-ccc
356 primary-composite)
357 (while target-chars
358 (setq target (car target-chars)
359 target-ccc (ucs-normalize-ccc target))
360 (if (and (or (null prev-ccc)
361 (< prev-ccc target-ccc))
362 (setq primary-composite
363 (ucs-normalize-primary-composite (list starter target)
364 composition-predicate)))
365 ;; case 1: composable
366 (setq starter primary-composite
367 prev-ccc nil)
368 (if (= 0 target-ccc)
369 ;; case 2: move starter
370 (setq result (nconc result (cons starter (nreverse remain)))
371 starter target
372 remain nil)
373 ;; case 3: move target
374 (setq prev-ccc target-ccc
375 remain (cons target remain))))
376 (setq target-chars (cdr target-chars)))
377 (nconc result (cons starter (nreverse remain))))
378 chars))
380 (defun ucs-normalize-block-compose-chars (chars composition-predicate)
381 "Try composing CHARS by COMPOSITION-PREDICATE.
382 If COMPOSITION-PREDICATE is not given, then do nothing."
383 (let ((chars (ucs-normalize-sort chars)))
384 (if composition-predicate
385 (ucs-normalize-compose-chars chars composition-predicate)
386 chars)))
389 (declare-function quick-check-list "ucs-normalize"
390 (decomposition-translation &optional composition-predicate))
391 (declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
393 (eval-when-compile
395 (defun quick-check-list (decomposition-translation
396 &optional composition-predicate)
397 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
398 It includes Singletons, CompositionExclusions, and Non-Starter
399 decomposition."
400 (let (entries decomposition composition)
401 (with-temp-buffer
402 (mapc
403 (lambda (start-end)
404 (cl-do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
405 (setq decomposition
406 (string-to-list
407 (progn
408 (erase-buffer)
409 (insert i)
410 (translate-region 1 2 decomposition-translation)
411 (buffer-string))))
412 (setq composition
413 (ucs-normalize-block-compose-chars decomposition composition-predicate))
414 (when (not (equal composition (list i)))
415 (setq entries (cons i entries)))))
416 check-range))
417 ;;(remove-duplicates
418 (append entries
419 ucs-normalize-composition-exclusions
420 non-starter-decompositions)))
423 (defvar nfd-quick-check-list nil)
424 (setq nfd-quick-check-list (quick-check-list 'ucs-normalize-nfd-table ))
425 (defvar nfc-quick-check-list nil)
426 (setq nfc-quick-check-list (quick-check-list 'ucs-normalize-nfd-table t ))
427 (defvar nfkd-quick-check-list nil)
428 (setq nfkd-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table ))
429 (defvar nfkc-quick-check-list nil)
430 (setq nfkc-quick-check-list (quick-check-list 'ucs-normalize-nfkd-table t ))
431 (defvar hfs-nfd-quick-check-list nil)
432 (setq hfs-nfd-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table
433 'ucs-normalize-hfs-nfd-comp-p))
434 (defvar hfs-nfc-quick-check-list nil)
435 (setq hfs-nfc-quick-check-list (quick-check-list 'ucs-normalize-hfs-nfd-table t ))
437 (defun quick-check-list-to-regexp (quick-check-list)
438 (regexp-opt-charset (append quick-check-list combining-chars)))
440 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
441 (concat (quick-check-list-to-regexp quick-check-list) "\\|[가-힣]"))
443 (defun quick-check-composition-list-to-regexp (quick-check-list)
444 (concat (quick-check-list-to-regexp quick-check-list) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
448 ;; NFD/NFC
449 (defvar ucs-normalize-nfd-quick-check-regexp nil)
450 (setq ucs-normalize-nfd-quick-check-regexp
451 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list)))
452 (defvar ucs-normalize-nfc-quick-check-regexp nil)
453 (setq ucs-normalize-nfc-quick-check-regexp
454 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list)))
456 ;; NFKD/NFKC
457 (defvar ucs-normalize-nfkd-quick-check-regexp nil)
458 (setq ucs-normalize-nfkd-quick-check-regexp
459 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list)))
460 (defvar ucs-normalize-nfkc-quick-check-regexp nil)
461 (setq ucs-normalize-nfkc-quick-check-regexp
462 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list)))
464 ;; HFS-NFD/HFS-NFC
465 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil)
466 (setq ucs-normalize-hfs-nfd-quick-check-regexp
467 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list))))
468 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil)
469 (setq ucs-normalize-hfs-nfc-quick-check-regexp
470 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list)))
472 ;;------------------------------------------------------------------------------------------
474 ;; Normalize local region.
476 (defun ucs-normalize-block
477 (from to &optional decomposition-translation-table composition-predicate)
478 "Normalize region FROM TO, by sorting the region with canonical-cc.
479 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
480 before sorting. If COMPOSITION-PREDICATE is given, then compose
481 the region by using it."
482 (save-restriction
483 (narrow-to-region from to)
484 (goto-char (point-min))
485 (if decomposition-translation-table
486 (translate-region from to decomposition-translation-table))
487 (goto-char (point-min))
488 (let ((start (point)) chars); ccc)
489 (while (not (eobp))
490 (forward-char)
491 (when (or (eobp)
492 (= 0 (ucs-normalize-ccc (char-after (point)))))
493 (setq chars
494 (nconc chars
495 (ucs-normalize-block-compose-chars
496 (string-to-list (buffer-substring start (point)))
497 composition-predicate))
498 start (point)))
499 ;;(unless ccc (error "Undefined character can not be normalized!"))
501 (delete-region (point-min) (point-max))
502 (apply 'insert
503 (ucs-normalize-compose-chars
504 chars composition-predicate)))))
506 (defun ucs-normalize-region
507 (from to quick-check-regexp translation-table composition-predicate)
508 "Normalize region from FROM to TO.
509 QUICK-CHECK-REGEXP is applied for searching the region.
510 TRANSLATION-TABLE will be used to decompose region.
511 COMPOSITION-PREDICATE will be used to compose region."
512 (save-excursion
513 (save-restriction
514 (narrow-to-region from to)
515 (goto-char (point-min))
516 (let (start-pos starter)
517 (while (re-search-forward quick-check-regexp nil t)
518 (setq starter (string-to-char (match-string 0)))
519 (setq start-pos (match-beginning 0))
520 (ucs-normalize-block
521 ;; from
522 (if (or (= start-pos (point-min))
523 (and (= 0 (ucs-normalize-ccc starter))
524 (not (memq starter ucs-normalize-combining-chars))))
525 start-pos (1- start-pos))
526 ;; to
527 (if (looking-at ucs-normalize-combining-chars-regexp)
528 (match-end 0) (1+ start-pos))
529 translation-table composition-predicate))))))
531 ;; --------------------------------------------------------------------------------
533 (defmacro ucs-normalize-string (ucs-normalize-region)
534 `(with-temp-buffer
535 (insert str)
536 (,ucs-normalize-region (point-min) (point-max))
537 (buffer-string)))
539 ;;;###autoload
540 (defun ucs-normalize-NFD-region (from to)
541 "Normalize the current region by the Unicode NFD."
542 (interactive "r")
543 (ucs-normalize-region from to
544 ucs-normalize-nfd-quick-check-regexp
545 'ucs-normalize-nfd-table nil))
546 ;;;###autoload
547 (defun ucs-normalize-NFD-string (str)
548 "Normalize the string STR by the Unicode NFD."
549 (ucs-normalize-string ucs-normalize-NFD-region))
551 ;;;###autoload
552 (defun ucs-normalize-NFC-region (from to)
553 "Normalize the current region by the Unicode NFC."
554 (interactive "r")
555 (ucs-normalize-region from to
556 ucs-normalize-nfc-quick-check-regexp
557 'ucs-normalize-nfd-table t))
558 ;;;###autoload
559 (defun ucs-normalize-NFC-string (str)
560 "Normalize the string STR by the Unicode NFC."
561 (ucs-normalize-string ucs-normalize-NFC-region))
563 ;;;###autoload
564 (defun ucs-normalize-NFKD-region (from to)
565 "Normalize the current region by the Unicode NFKD."
566 (interactive "r")
567 (ucs-normalize-region from to
568 ucs-normalize-nfkd-quick-check-regexp
569 'ucs-normalize-nfkd-table nil))
570 ;;;###autoload
571 (defun ucs-normalize-NFKD-string (str)
572 "Normalize the string STR by the Unicode NFKD."
573 (ucs-normalize-string ucs-normalize-NFKD-region))
575 ;;;###autoload
576 (defun ucs-normalize-NFKC-region (from to)
577 "Normalize the current region by the Unicode NFKC."
578 (interactive "r")
579 (ucs-normalize-region from to
580 ucs-normalize-nfkc-quick-check-regexp
581 'ucs-normalize-nfkd-table t))
582 ;;;###autoload
583 (defun ucs-normalize-NFKC-string (str)
584 "Normalize the string STR by the Unicode NFKC."
585 (ucs-normalize-string ucs-normalize-NFKC-region))
587 ;;;###autoload
588 (defun ucs-normalize-HFS-NFD-region (from to)
589 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
590 (interactive "r")
591 (ucs-normalize-region from to
592 ucs-normalize-hfs-nfd-quick-check-regexp
593 'ucs-normalize-hfs-nfd-table
594 'ucs-normalize-hfs-nfd-comp-p))
595 ;;;###autoload
596 (defun ucs-normalize-HFS-NFD-string (str)
597 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
598 (ucs-normalize-string ucs-normalize-HFS-NFD-region))
599 ;;;###autoload
600 (defun ucs-normalize-HFS-NFC-region (from to)
601 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
602 (interactive "r")
603 (ucs-normalize-region from to
604 ucs-normalize-hfs-nfc-quick-check-regexp
605 'ucs-normalize-hfs-nfd-table t))
606 ;;;###autoload
607 (defun ucs-normalize-HFS-NFC-string (str)
608 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
609 (ucs-normalize-string ucs-normalize-HFS-NFC-region))
611 ;; Post-read-conversion function for `utf-8-hfs'.
612 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
613 (save-excursion
614 (save-restriction
615 (narrow-to-region (point) (+ (point) len))
616 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
617 (- (point-max) (point-min)))))
619 ;; Pre-write conversion for `utf-8-hfs'.
620 ;; _from and _to are legacy arguments (see `define-coding-system').
621 (defun ucs-normalize-hfs-nfd-pre-write-conversion (_from _to)
622 (ucs-normalize-HFS-NFD-region (point-min) (point-max)))
624 ;;; coding-system definition
625 (define-coding-system 'utf-8-hfs
626 "UTF-8 based coding system for macOS HFS file names.
627 The singleton characters in HFS normalization exclusion will not
628 be decomposed."
629 :coding-type 'utf-8
630 :mnemonic ?U
631 :charset-list '(unicode)
632 :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
633 :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
636 ;; This is tested in dired.c:file_name_completion in order to reject
637 ;; false positives due to comparison of encoded file names.
638 (coding-system-put 'utf-8-hfs 'decomposed-characters 't)
640 (provide 'ucs-normalize)
642 ;; Local Variables:
643 ;; coding: utf-8
644 ;; End:
646 ;;; ucs-normalize.el ends here