Fix C99 incompatibilities in Cairo code
[emacs.git] / lisp / character-fold.el
blob7f5be8324a8a6f30ecb5461573e934cc2a87a015
1 ;;; character-fold.el --- matching unicode characters to their ascii similars -*- lexical-binding: t; -*-
3 ;; Copyright (C) 2015 Free Software Foundation, Inc.
5 ;; Maintainer: emacs-devel@gnu.org
6 ;; Keywords: matching
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Code:
26 ;;;###autoload
27 (defvar character-fold-search t
28 "Non-nil if searches should fold similar characters.
29 This means some characters will match entire groups of charactes.
30 For instance, \" will match all variants of double quotes, and
31 the letter a will match all of its accented versions (and then
32 some).")
34 (defconst character-fold-table
35 (eval-when-compile
36 (let* ((equiv (make-char-table 'character-fold-table))
37 (table (unicode-property-table-internal 'decomposition))
38 (func (char-table-extra-slot table 1)))
39 ;; Ensure the table is populated
40 (map-char-table
41 (lambda (i v) (when (consp i) (funcall func (car i) v table)))
42 table)
44 ;; Compile a list of all complex characters that each simple
45 ;; character should match.
46 (map-char-table
47 (lambda (i dec)
48 (when (consp dec)
49 ;; Discard a possible formatting tag.
50 (when (symbolp (car dec))
51 (setq dec (cdr dec)))
52 ;; Skip trivial cases lika ?a decomposing to (?a).
53 (unless (or (and (eq i (car dec))
54 (not (cdr dec))))
55 (let ((d dec) k found multiletter)
56 (while (and d (not found))
57 (setq k (pop d))
58 ;; Is k a number or letter, per unicode standard?
59 (setq found (memq (get-char-code-property k 'general-category)
60 '(Lu Ll Lt Lm Lo Nd Nl No))))
61 (if found
62 ;; Check if the decomposition has more than one letter,
63 ;; because then we don't want the first letter to match
64 ;; the decomposition.
65 (dolist (k d)
66 (when (memq (get-char-code-property k 'general-category)
67 '(Lu Ll Lt Lm Lo Nd Nl No))
68 (setq multiletter t)))
69 ;; If there's no number or letter on the
70 ;; decomposition, take the first character in it.
71 (setq found (car-safe dec)))
72 ;; Add i to the list of characters that k can
73 ;; represent. Also possibly add its decomposition, so we can
74 ;; match multi-char representations like (format "a%c" 769)
75 (when (and found (not (eq i k)))
76 (let ((chars (cons (char-to-string i) (aref equiv k))))
77 (aset equiv k
78 (if multiletter chars
79 (cons (apply #'string dec) chars)))))))))
80 (unicode-property-table-internal 'decomposition))
81 (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
82 (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
83 (?` "❛" "‘" "‛" "󠀢" "❮" "‹")
84 (?\s "\t" "\r" "\n")))
85 (let ((idx (car it))
86 (chars (cdr it)))
87 (aset equiv idx (append chars (aref equiv idx)))))
88 (map-char-table
89 (lambda (i v) (let ((re (regexp-opt (cons (char-to-string i) v))))
90 (if (consp i)
91 (set-char-table-range equiv i re)
92 (aset equiv i re))))
93 equiv)
94 equiv))
95 "Used for folding characters of the same group during search.")
97 ;;;###autoload
98 (defun character-fold-to-regexp (string &optional lax)
99 "Return a regexp matching anything that character-folds into STRING.
100 If `character-fold-search' is nil, `regexp-quote' string.
101 Otherwise, any character in STRING that has an entry in
102 `character-fold-table' is replaced with that entry (which is a
103 regexp) and other characters are `regexp-quote'd.
104 If LAX is non-nil, any single whitespace character is allowed to
105 match any number of times."
106 (if character-fold-search
107 (apply #'concat
108 (mapcar (lambda (c) (let ((out (or (aref character-fold-table c)
109 (regexp-quote (string c)))))
110 (if (and lax (memq c '(?\s ?\t ?\r ?\n )))
111 (concat out "+")
112 out)))
113 string))
114 (regexp-quote string)))
116 ;;; character-fold.el ends here