Some final fixes in file notification before merging with master
[emacs.git] / lisp / character-fold.el
blob0e156c50ddefe3fc90808046ead9205d9891ea3f
1 ;;; character-fold.el --- match unicode to similar ASCII -*- lexical-binding: t; -*-
3 ;; Copyright (C) 2015 Free Software Foundation, Inc.
5 ;; Maintainer: emacs-devel@gnu.org
6 ;; Keywords: matching
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Code:
26 (defconst character-fold-table
27 (eval-when-compile
28 (let* ((equiv (make-char-table 'character-fold-table))
29 (table (unicode-property-table-internal 'decomposition))
30 (func (char-table-extra-slot table 1)))
31 ;; Ensure the table is populated.
32 (map-char-table
33 (lambda (i v) (when (consp i) (funcall func (car i) v table)))
34 table)
36 ;; Compile a list of all complex characters that each simple
37 ;; character should match.
38 (map-char-table
39 (lambda (i dec)
40 (when (consp dec)
41 ;; Discard a possible formatting tag.
42 (when (symbolp (car dec))
43 (setq dec (cdr dec)))
44 ;; Skip trivial cases like ?a decomposing to (?a).
45 (unless (or (and (eq i (car dec))
46 (not (cdr dec))))
47 (let ((d dec)
48 (fold-decomp t)
49 k found)
50 (while (and d (not found))
51 (setq k (pop d))
52 ;; Is k a number or letter, per unicode standard?
53 (setq found (memq (get-char-code-property k 'general-category)
54 '(Lu Ll Lt Lm Lo Nd Nl No))))
55 (if found
56 ;; Check if the decomposition has more than one letter,
57 ;; because then we don't want the first letter to match
58 ;; the decomposition.
59 (dolist (k d)
60 (when (and fold-decomp
61 (memq (get-char-code-property k 'general-category)
62 '(Lu Ll Lt Lm Lo Nd Nl No)))
63 (setq fold-decomp nil)))
64 ;; If there's no number or letter on the
65 ;; decomposition, take the first character in it.
66 (setq found (car-safe dec)))
67 ;; Finally, we only fold multi-char decomposition if at
68 ;; least one of the chars is non-spacing (combining).
69 (when fold-decomp
70 (setq fold-decomp nil)
71 (dolist (k dec)
72 (when (and (not fold-decomp)
73 (> (get-char-code-property k 'canonical-combining-class) 0))
74 (setq fold-decomp t))))
75 ;; Add i to the list of characters that k can
76 ;; represent. Also possibly add its decomposition, so we can
77 ;; match multi-char representations like (format "a%c" 769)
78 (when (and found (not (eq i k)))
79 (let ((chars (cons (char-to-string i) (aref equiv k))))
80 (aset equiv k
81 (if fold-decomp
82 (cons (apply #'string dec) chars)
83 chars))))))))
84 table)
86 ;; Add some manual entries.
87 (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
88 (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
89 (?` "❛" "‘" "‛" "󠀢" "❮" "‹")))
90 (let ((idx (car it))
91 (chars (cdr it)))
92 (aset equiv idx (append chars (aref equiv idx)))))
94 ;; Convert the lists of characters we compiled into regexps.
95 (map-char-table
96 (lambda (i v) (let ((re (regexp-opt (cons (char-to-string i) v))))
97 (if (consp i)
98 (set-char-table-range equiv i re)
99 (aset equiv i re))))
100 equiv)
101 equiv))
102 "Used for folding characters of the same group during search.")
104 (defun character-fold--make-space-string (n)
105 "Return a string that matches N spaces."
106 (format "\\(?:%s\\|%s\\)"
107 (make-string n ?\s)
108 (apply #'concat
109 (make-list n (or (aref character-fold-table ?\s) " ")))))
111 ;;;###autoload
112 (defun character-fold-to-regexp (string &optional _lax)
113 "Return a regexp matching anything that character-folds into STRING.
114 Any character in STRING that has an entry in
115 `character-fold-table' is replaced with that entry (which is a
116 regexp) and other characters are `regexp-quote'd."
117 (let* ((spaces 0)
118 (chars (mapcar #'identity string))
119 (out chars))
120 ;; When the user types a space, we want to match the table entry,
121 ;; but we also want the ?\s to be visible to `search-spaces-regexp'.
122 ;; See commit message for a longer description.
123 (while chars
124 (let ((c (car chars)))
125 (setcar chars
126 (cond
127 ((eq c ?\s)
128 (setq spaces (1+ spaces))
129 nil)
130 ((> spaces 0)
131 (prog1 (concat (character-fold--make-space-string spaces)
132 (or (aref character-fold-table c)
133 (regexp-quote (string c))))
134 (setq spaces 0)))
135 (t (or (aref character-fold-table c)
136 (regexp-quote (string c))))))
137 (setq chars (cdr chars))))
138 (concat (apply #'concat out)
139 (when (> spaces 0)
140 (character-fold--make-space-string spaces)))))
143 ;;; Commands provided for completeness.
144 (defun character-fold-search-forward (string &optional bound noerror count)
145 "Search forward for a character-folded version of STRING.
146 STRING is converted to a regexp with `character-fold-to-regexp',
147 which is searched for with `re-search-forward'.
148 BOUND NOERROR COUNT are passed to `re-search-forward'."
149 (interactive "sSearch: ")
150 (re-search-forward (character-fold-to-regexp string) bound noerror count))
152 (defun character-fold-search-backward (string &optional bound noerror count)
153 "Search backward for a character-folded version of STRING.
154 STRING is converted to a regexp with `character-fold-to-regexp',
155 which is searched for with `re-search-backward'.
156 BOUND NOERROR COUNT are passed to `re-search-backward'."
157 (interactive "sSearch: ")
158 (re-search-backward (character-fold-to-regexp string) bound noerror count))
160 (provide 'character-fold)
162 ;;; character-fold.el ends here