1 ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
3 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
5 ;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
6 ;; Keywords: unicode, normalization
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; This program has passed the NormalizationTest-5.2.0.txt.
28 ;; http://www.unicode.org/reports/tr15/
29 ;; http://www.unicode.org/review/pr-29.html
33 ;; http://developer.apple.com/technotes/tn/tn1150.html
35 ;; HFS Normalization excludes following area for decomposition.
37 ;; U+02000 .. U+02FFF :: Punctuation, symbols, dingbats, arrows, etc.
38 ;; (Characters in this region will be composed.)
39 ;; U+0F900 .. U+0FAFF :: CJK compatibility Ideographs.
40 ;; U+2F800 .. U+2FFFF :: CJK compatibility Ideographs.
42 ;; HFS-Normalization is useful for normalizing text involving CJK Ideographs.
45 ;;; Implementation Notes on NFC/HFS-NFC.
48 ;; <Stages> Decomposition Composition
53 ;; HFS-NFD: 'hfs-nfd 'hfs-nfd-comp-p
54 ;; HFS-NFC: 'hfs-nfd t
56 ;; Algorithm for Normalization
58 ;; Before normalization, following data will be prepared.
60 ;; 1. quick-check-list
62 ;; `quick-check-list' consists of characters that will be decomposed
63 ;; during normalization. It includes composition-exclusions,
64 ;; singletons, non-starter-decompositions and decomposable
67 ;; `quick-check-regexp' will search the above characters plus
68 ;; combining characters.
70 ;; 2. decomposition-translation
72 ;; `decomposition-translation' is a translation table that will be
73 ;; used to decompose the characters.
76 ;; Normalization Process
78 ;; A. Searching (`ucs-normalize-region')
80 ;; Region is searched for `quick-check-regexp' to find possibly
81 ;; normalizable point.
83 ;; B. Identification of Normalization Block
85 ;; (1) start of the block
86 ;; If the searched character is a starter and not combining
87 ;; with previous character, then the beginning of the block is
88 ;; the searched character. If searched character is combining
89 ;; character, then previous character will be the target
91 ;; (2) end of the block
92 ;; Block ends at non-composable starter character.
94 ;; C. Decomposition (`ucs-normalize-block')
96 ;; The entire block will be decomposed by
97 ;; `decomposition-translation' table.
99 ;; D. Sorting and Composition of Smaller Blocks (`ucs-normalize-block-compose-chars')
101 ;; The block will be split to multiple samller blocks by starter
102 ;; characters. Each block is sorted, and composed if necessary.
104 ;; E. Composition of Entire Block (`ucs-normalize-compose-chars')
106 ;; Composed blocks are collected and again composed.
110 (defconst ucs-normalize-version
"1.2")
112 (eval-when-compile (require 'cl-lib
))
114 (declare-function nfd
"ucs-normalize" (char))
118 (defconst ucs-normalize-composition-exclusions
119 '(#x0958
#x0959
#x095A
#x095B
#x095C
#x095D
#x095E
#x095F
120 #x09DC
#x09DD
#x09DF
#x0A33
#x0A36
#x0A59
#x0A5A
#x0A5B
121 #x0A5E
#x0B5C
#x0B5D
#x0F43
#x0F4D
#x0F52
#x0F57
#x0F5C
122 #x0F69
#x0F76
#x0F78
#x0F93
#x0F9D
#x0FA2
#x0FA7
#x0FAC
123 #x0FB9
#xFB1D
#xFB1F
#xFB2A
#xFB2B
#xFB2C
#xFB2D
#xFB2E
124 #xFB2F
#xFB30
#xFB31
#xFB32
#xFB33
#xFB34
#xFB35
#xFB36
125 #xFB38
#xFB39
#xFB3A
#xFB3B
#xFB3C
#xFB3E
#xFB40
#xFB41
126 #xFB43
#xFB44
#xFB46
#xFB47
#xFB48
#xFB49
#xFB4A
#xFB4B
127 #xFB4C
#xFB4D
#xFB4E
#x2ADC
#x1D15E
#x1D15F
#x1D160
#x1D161
128 #x1D162
#x1D163
#x1D164
#x1D1BB
#x1D1BC
#x1D1BD
#x1D1BE
130 "Composition Exclusion List.
131 This list is taken from
132 http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
134 ;; Unicode ranges that decompositions & combining characters are defined.
135 (defvar check-range nil
)
136 (setq check-range
'((#x00a0 .
#x3400
) (#xA600 .
#xAC00
) (#xF900 .
#x110ff
) (#x1d000 .
#x1dfff
) (#x1f100 .
#x1f2ff
) (#x2f800 .
#x2faff
)))
138 ;; Basic normalization functions
141 (get-char-code-property char
'decomposition
)))
142 (if (and decomposition
(numberp (car decomposition
))
143 (or (> (length decomposition
) 1)
144 (/= (car decomposition
) char
)))
149 (get-char-code-property char
'decomposition
)))
150 (if (symbolp (car decomposition
)) (cdr decomposition
)
151 (if (or (> (length decomposition
) 1)
152 (/= (car decomposition
) char
)) decomposition
))))
154 (defun hfs-nfd (char)
155 (when (or (and (>= char
0) (< char
#x2000
))
156 (and (>= char
#x3000
) (< char
#xf900
))
157 (and (>= char
#xfb00
) (< char
#x2f800
))
162 (defun ucs-normalize-hfs-nfd-comp-p (char)
163 (and (>= char
#x2000
) (< char
#x3000
)))
165 (defsubst ucs-normalize-ccc
(char)
166 (get-char-code-property char
'canonical-combining-class
))
169 ;; Data common to all normalizations
173 (defvar combining-chars nil
)
174 (setq combining-chars nil
)
175 (defvar decomposition-pair-to-composition nil
)
176 (setq decomposition-pair-to-composition nil
)
177 (defvar non-starter-decompositions nil
)
178 (setq non-starter-decompositions nil
)
179 ;; This file needs to access these 2 Unicode properties, but when we
180 ;; compile it during bootstrap, charprop.el was not built yet, and
181 ;; therefore is not yet loaded into bootstrap-emacs, so
182 ;; char-code-property-alist is nil, and get-char-code-property
183 ;; always returns nil, something the code here doesn't like.
184 (define-char-code-property 'decomposition
"uni-decomposition.el")
185 (define-char-code-property 'canonical-combining-class
"uni-combining.el")
186 (let ((char 0) ccc decomposition
)
189 (cl-do ((char (car start-end
) (+ char
1))) ((> char
(cdr start-end
)))
190 (setq ccc
(ucs-normalize-ccc char
))
191 (setq decomposition
(get-char-code-property
192 char
'decomposition
))
193 (if (and (= (length decomposition
) 1)
194 (= (car decomposition
) char
))
195 (setq decomposition nil
))
196 (if (and ccc
(/= 0 ccc
)) (add-to-list 'combining-chars char
))
197 (if (and (numberp (car decomposition
))
198 (/= (ucs-normalize-ccc (car decomposition
))
200 (add-to-list 'non-starter-decompositions char
))
201 (when (numberp (car decomposition
))
202 (if (and (= 2 (length decomposition
))
203 (null (memq char ucs-normalize-composition-exclusions
))
204 (null (memq char non-starter-decompositions
)))
205 (setq decomposition-pair-to-composition
206 (cons (cons decomposition char
)
207 decomposition-pair-to-composition
)))
208 ;; If not singleton decomposition, second and later characters in
209 ;; decomposition will be the subject of combining characters.
210 (if (cdr decomposition
)
211 (dolist (char (cdr decomposition
))
212 (add-to-list 'combining-chars char
))))))
215 (setq combining-chars
216 (append combining-chars
217 '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
218 ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
219 ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
220 ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ
)))
224 (defun ucs-normalize-make-hash-table-from-alist (alist)
225 (let ((table (make-hash-table :test
'equal
:size
2000)))
226 (mapc (lambda (x) (puthash (car x
) (cdr x
) table
)) alist
)
229 (defvar ucs-normalize-decomposition-pair-to-primary-composite nil
230 "Hashtable of decomposed pair to primary composite.
231 Note that Hangul are excluded.")
232 (setq ucs-normalize-decomposition-pair-to-primary-composite
233 (ucs-normalize-make-hash-table-from-alist
234 (eval-when-compile decomposition-pair-to-composition
)))
236 (defun ucs-normalize-primary-composite (decomposition-pair composition-predicate
)
237 "Convert DECOMPOSITION-PAIR to primary composite using COMPOSITION-PREDICATE."
238 (let ((char (or (gethash decomposition-pair
239 ucs-normalize-decomposition-pair-to-primary-composite
)
240 (and (<= #x1100
(car decomposition-pair
))
241 (< (car decomposition-pair
) #x1113
)
242 (<= #x1161
(cadr decomposition-pair
))
243 (< (car decomposition-pair
) #x1176
)
244 (let ((lindex (- (car decomposition-pair
) #x1100
))
245 (vindex (- (cadr decomposition-pair
) #x1161
)))
246 (+ #xAC00
(* (+ (* lindex
21) vindex
) 28))))
247 (and (<= #xac00
(car decomposition-pair
))
248 (< (car decomposition-pair
) #xd7a4
)
249 (<= #x11a7
(cadr decomposition-pair
))
250 (< (cadr decomposition-pair
) #x11c3
)
251 (= 0 (%
(- (car decomposition-pair
) #xac00
) 28))
252 (let ((tindex (- (cadr decomposition-pair
) #x11a7
)))
253 (+ (car decomposition-pair
) tindex
))))))
255 (functionp composition-predicate
)
256 (null (funcall composition-predicate char
)))
260 (defvar ucs-normalize-combining-chars nil
)
261 (setq ucs-normalize-combining-chars
(eval-when-compile combining-chars
))
263 (defvar ucs-normalize-combining-chars-regexp nil
264 "Regular expression to match sequence of combining characters.")
265 (setq ucs-normalize-combining-chars-regexp
266 (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars
)) "+")))
268 (declare-function decomposition-translation-alist
"ucs-normalize"
269 (decomposition-function))
270 (declare-function decomposition-char-recursively
"ucs-normalize"
271 (char decomposition-function
))
272 (declare-function alist-list-to-vector
"ucs-normalize" (alist))
276 (defun decomposition-translation-alist (decomposition-function)
277 (let (decomposition alist
)
280 (cl-do ((char (car start-end
) (+ char
1))) ((> char
(cdr start-end
)))
281 (setq decomposition
(funcall decomposition-function char
))
283 (setq alist
(cons (cons char
286 (decomposition-char-recursively
287 x decomposition-function
))
293 (defun decomposition-char-recursively (char decomposition-function
)
294 (let ((decomposition (funcall decomposition-function char
)))
298 (decomposition-char-recursively x decomposition-function
))
302 (defun alist-list-to-vector (alist)
303 (mapcar (lambda (x) (cons (car x
) (apply 'vector
(cdr x
)))) alist
))
305 (defvar nfd-alist nil
)
306 (setq nfd-alist
(alist-list-to-vector (decomposition-translation-alist 'nfd
)))
307 (defvar nfkd-alist nil
)
308 (setq nfkd-alist
(alist-list-to-vector (decomposition-translation-alist 'nfkd
)))
309 (defvar hfs-nfd-alist nil
)
310 (setq hfs-nfd-alist
(alist-list-to-vector (decomposition-translation-alist 'hfs-nfd
)))
314 (defvar ucs-normalize-hangul-translation-alist nil
)
315 (setq ucs-normalize-hangul-translation-alist
319 (cons (cons (+ #xac00 i
)
321 (vector (+ #x1100
(/ i
588))
322 (+ #x1161
(/ (% i
588) 28)))
323 (vector (+ #x1100
(/ i
588))
324 (+ #x1161
(/ (% i
588) 28))
325 (+ #x11a7
(% i
28)))))
329 (defun ucs-normalize-make-translation-table-from-alist (alist)
330 (make-translation-table-from-alist
331 (append alist ucs-normalize-hangul-translation-alist
)))
333 (define-translation-table 'ucs-normalize-nfd-table
334 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfd-alist
)))
335 (define-translation-table 'ucs-normalize-nfkd-table
336 (ucs-normalize-make-translation-table-from-alist (eval-when-compile nfkd-alist
)))
337 (define-translation-table 'ucs-normalize-hfs-nfd-table
338 (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist
)))
340 (defun ucs-normalize-sort (chars)
341 "Sort by canonical combining class of CHARS."
344 (< (ucs-normalize-ccc ch1
) (ucs-normalize-ccc ch2
)))))
346 (defun ucs-normalize-compose-chars (chars composition-predicate
)
347 "Compose CHARS by COMPOSITION-PREDICATE.
348 CHARS must be sorted and normalized in starter-combining pairs."
349 (if composition-predicate
350 (let* ((starter (car chars
))
351 remain result prev-ccc
352 (target-chars (cdr chars
))
356 (setq target
(car target-chars
)
357 target-ccc
(ucs-normalize-ccc target
))
358 (if (and (or (null prev-ccc
)
359 (< prev-ccc target-ccc
))
360 (setq primary-composite
361 (ucs-normalize-primary-composite (list starter target
)
362 composition-predicate
)))
363 ;; case 1: composable
364 (setq starter primary-composite
367 ;; case 2: move starter
368 (setq result
(nconc result
(cons starter
(nreverse remain
)))
371 ;; case 3: move target
372 (setq prev-ccc target-ccc
373 remain
(cons target remain
))))
374 (setq target-chars
(cdr target-chars
)))
375 (nconc result
(cons starter
(nreverse remain
))))
378 (defun ucs-normalize-block-compose-chars (chars composition-predicate
)
379 "Try composing CHARS by COMPOSITION-PREDICATE.
380 If COMPOSITION-PREDICATE is not given, then do nothing."
381 (let ((chars (ucs-normalize-sort chars
)))
382 (if composition-predicate
383 (ucs-normalize-compose-chars chars composition-predicate
)
387 (declare-function quick-check-list
"ucs-normalize"
388 (decomposition-translation &optional composition-predicate
))
389 (declare-function quick-check-list-to-regexp
"ucs-normalize" (quick-check-list))
393 (defun quick-check-list (decomposition-translation
394 &optional composition-predicate
)
395 "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
396 It includes Singletons, CompositionExclusions, and Non-Starter
398 (let (entries decomposition composition
)
401 (cl-do ((i (car start-end
) (+ i
1))) ((> i
(cdr start-end
)))
406 (translate-region 1 2 decomposition-translation
)
409 (ucs-normalize-block-compose-chars decomposition composition-predicate
))
410 (when (not (equal composition
(list i
)))
411 (setq entries
(cons i entries
)))))
415 ucs-normalize-composition-exclusions
416 non-starter-decompositions
)))
419 (defvar nfd-quick-check-list nil
)
420 (setq nfd-quick-check-list
(quick-check-list 'ucs-normalize-nfd-table
))
421 (defvar nfc-quick-check-list nil
)
422 (setq nfc-quick-check-list
(quick-check-list 'ucs-normalize-nfd-table t
))
423 (defvar nfkd-quick-check-list nil
)
424 (setq nfkd-quick-check-list
(quick-check-list 'ucs-normalize-nfkd-table
))
425 (defvar nfkc-quick-check-list nil
)
426 (setq nfkc-quick-check-list
(quick-check-list 'ucs-normalize-nfkd-table t
))
427 (defvar hfs-nfd-quick-check-list nil
)
428 (setq hfs-nfd-quick-check-list
(quick-check-list 'ucs-normalize-hfs-nfd-table
429 'ucs-normalize-hfs-nfd-comp-p
))
430 (defvar hfs-nfc-quick-check-list nil
)
431 (setq hfs-nfc-quick-check-list
(quick-check-list 'ucs-normalize-hfs-nfd-table t
))
433 (defun quick-check-list-to-regexp (quick-check-list)
434 (regexp-opt (mapcar 'char-to-string
(append quick-check-list combining-chars
))))
436 (defun quick-check-decomposition-list-to-regexp (quick-check-list)
437 (concat (quick-check-list-to-regexp quick-check-list
) "\\|[가-힣]"))
439 (defun quick-check-composition-list-to-regexp (quick-check-list)
440 (concat (quick-check-list-to-regexp quick-check-list
) "\\|[ᅡ-ᅵᆨ-ᇂ]"))
445 (defvar ucs-normalize-nfd-quick-check-regexp nil
)
446 (setq ucs-normalize-nfd-quick-check-regexp
447 (eval-when-compile (quick-check-decomposition-list-to-regexp nfd-quick-check-list
)))
448 (defvar ucs-normalize-nfc-quick-check-regexp nil
)
449 (setq ucs-normalize-nfc-quick-check-regexp
450 (eval-when-compile (quick-check-composition-list-to-regexp nfc-quick-check-list
)))
453 (defvar ucs-normalize-nfkd-quick-check-regexp nil
)
454 (setq ucs-normalize-nfkd-quick-check-regexp
455 (eval-when-compile (quick-check-decomposition-list-to-regexp nfkd-quick-check-list
)))
456 (defvar ucs-normalize-nfkc-quick-check-regexp nil
)
457 (setq ucs-normalize-nfkc-quick-check-regexp
458 (eval-when-compile (quick-check-composition-list-to-regexp nfkc-quick-check-list
)))
461 (defvar ucs-normalize-hfs-nfd-quick-check-regexp nil
)
462 (setq ucs-normalize-hfs-nfd-quick-check-regexp
463 (eval-when-compile (concat (quick-check-decomposition-list-to-regexp hfs-nfd-quick-check-list
))))
464 (defvar ucs-normalize-hfs-nfc-quick-check-regexp nil
)
465 (setq ucs-normalize-hfs-nfc-quick-check-regexp
466 (eval-when-compile (quick-check-composition-list-to-regexp hfs-nfc-quick-check-list
)))
468 ;;------------------------------------------------------------------------------------------
470 ;; Normalize local region.
472 (defun ucs-normalize-block
473 (from to
&optional decomposition-translation-table composition-predicate
)
474 "Normalize region FROM TO, by sorting the region with canonical-cc.
475 If DECOMPOSITION-TRANSLATION-TABLE is given, translate region
476 before sorting. If COMPOSITION-PREDICATE is given, then compose
477 the region by using it."
479 (narrow-to-region from to
)
480 (goto-char (point-min))
481 (if decomposition-translation-table
482 (translate-region from to decomposition-translation-table
))
483 (goto-char (point-min))
484 (let ((start (point)) chars
); ccc)
488 (= 0 (ucs-normalize-ccc (char-after (point)))))
491 (ucs-normalize-block-compose-chars
492 (string-to-list (buffer-substring start
(point)))
493 composition-predicate
))
495 ;;(unless ccc (error "Undefined character can not be normalized!"))
497 (delete-region (point-min) (point-max))
499 (ucs-normalize-compose-chars
500 chars composition-predicate
)))))
502 (defun ucs-normalize-region
503 (from to quick-check-regexp translation-table composition-predicate
)
504 "Normalize region from FROM to TO.
505 QUICK-CHECK-REGEXP is applied for searching the region.
506 TRANSLATION-TABLE will be used to decompose region.
507 COMPOSITION-PREDICATE will be used to compose region."
510 (narrow-to-region from to
)
511 (goto-char (point-min))
512 (let (start-pos starter
)
513 (while (re-search-forward quick-check-regexp nil t
)
514 (setq starter
(string-to-char (match-string 0)))
515 (setq start-pos
(match-beginning 0))
518 (if (or (= start-pos
(point-min))
519 (and (= 0 (ucs-normalize-ccc starter
))
520 (not (memq starter ucs-normalize-combining-chars
))))
521 start-pos
(1- start-pos
))
523 (if (looking-at ucs-normalize-combining-chars-regexp
)
524 (match-end 0) (1+ start-pos
))
525 translation-table composition-predicate
))))))
527 ;; --------------------------------------------------------------------------------
529 (defmacro ucs-normalize-string
(ucs-normalize-region)
532 (,ucs-normalize-region
(point-min) (point-max))
536 (defun ucs-normalize-NFD-region (from to
)
537 "Normalize the current region by the Unicode NFD."
539 (ucs-normalize-region from to
540 ucs-normalize-nfd-quick-check-regexp
541 'ucs-normalize-nfd-table nil
))
543 (defun ucs-normalize-NFD-string (str)
544 "Normalize the string STR by the Unicode NFD."
545 (ucs-normalize-string ucs-normalize-NFD-region
))
548 (defun ucs-normalize-NFC-region (from to
)
549 "Normalize the current region by the Unicode NFC."
551 (ucs-normalize-region from to
552 ucs-normalize-nfc-quick-check-regexp
553 'ucs-normalize-nfd-table t
))
555 (defun ucs-normalize-NFC-string (str)
556 "Normalize the string STR by the Unicode NFC."
557 (ucs-normalize-string ucs-normalize-NFC-region
))
560 (defun ucs-normalize-NFKD-region (from to
)
561 "Normalize the current region by the Unicode NFKD."
563 (ucs-normalize-region from to
564 ucs-normalize-nfkd-quick-check-regexp
565 'ucs-normalize-nfkd-table nil
))
567 (defun ucs-normalize-NFKD-string (str)
568 "Normalize the string STR by the Unicode NFKD."
569 (ucs-normalize-string ucs-normalize-NFKD-region
))
572 (defun ucs-normalize-NFKC-region (from to
)
573 "Normalize the current region by the Unicode NFKC."
575 (ucs-normalize-region from to
576 ucs-normalize-nfkc-quick-check-regexp
577 'ucs-normalize-nfkd-table t
))
579 (defun ucs-normalize-NFKC-string (str)
580 "Normalize the string STR by the Unicode NFKC."
581 (ucs-normalize-string ucs-normalize-NFKC-region
))
584 (defun ucs-normalize-HFS-NFD-region (from to
)
585 "Normalize the current region by the Unicode NFD and Mac OS's HFS Plus."
587 (ucs-normalize-region from to
588 ucs-normalize-hfs-nfd-quick-check-regexp
589 'ucs-normalize-hfs-nfd-table
590 'ucs-normalize-hfs-nfd-comp-p
))
592 (defun ucs-normalize-HFS-NFD-string (str)
593 "Normalize the string STR by the Unicode NFD and Mac OS's HFS Plus."
594 (ucs-normalize-string ucs-normalize-HFS-NFD-region
))
596 (defun ucs-normalize-HFS-NFC-region (from to
)
597 "Normalize the current region by the Unicode NFC and Mac OS's HFS Plus."
599 (ucs-normalize-region from to
600 ucs-normalize-hfs-nfc-quick-check-regexp
601 'ucs-normalize-hfs-nfd-table t
))
603 (defun ucs-normalize-HFS-NFC-string (str)
604 "Normalize the string STR by the Unicode NFC and Mac OS's HFS Plus."
605 (ucs-normalize-string ucs-normalize-HFS-NFC-region
))
607 ;; Post-read-conversion function for `utf-8-hfs'.
608 (defun ucs-normalize-hfs-nfd-post-read-conversion (len)
611 (narrow-to-region (point) (+ (point) len
))
612 (ucs-normalize-HFS-NFC-region (point-min) (point-max))
613 (- (point-max) (point-min)))))
615 ;; Pre-write conversion for `utf-8-hfs'.
616 (defun ucs-normalize-hfs-nfd-pre-write-conversion (from to
)
617 (let ((old-buf (current-buffer)))
618 (set-buffer (generate-new-buffer " *temp*"))
621 (insert-buffer-substring old-buf from to
))
622 (ucs-normalize-HFS-NFD-region (point-min) (point-max))
625 ;;; coding-system definition
626 (define-coding-system 'utf-8-hfs
627 "UTF-8 based coding system for MacOS HFS file names.
628 The singleton characters in HFS normalization exclusion will not
632 :charset-list
'(unicode)
633 :post-read-conversion
'ucs-normalize-hfs-nfd-post-read-conversion
634 :pre-write-conversion
'ucs-normalize-hfs-nfd-pre-write-conversion
637 ;; This is tested in dired.c:file_name_completion in order to reject
638 ;; false positives due to comparison of encoded file names.
639 (coding-system-put 'utf-8-hfs
'decomposed-characters
't
)
641 (provide 'ucs-normalize
)
647 ;;; ucs-normalize.el ends here