Tweak eww DWIM URL recognition
[emacs.git] / lisp / textmodes / tildify.el
blob9382b32845d5b5481cdddf822fc017f3b6d28eea
1 ;;; tildify.el --- adding hard spaces into texts -*- lexical-binding: t -*-
3 ;; Copyright (C) 1997-2015 Free Software Foundation, Inc.
5 ;; Author: Milan Zamazal <pdm@zamazal.org>
6 ;; Michal Nazarewicz <mina86@mina86.com>
7 ;; Version: 4.5.7
8 ;; Keywords: text, TeX, SGML, wp
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; This package can be typically used for adding forgotten tildes in TeX
28 ;; sources or adding `&nbsp;' sequences in SGML (e.g. HTML) texts.
30 ;; For example, the Czech orthography requires avoiding one letter
31 ;; prepositions at line endings. So they should be connected with the
32 ;; following words by a tilde. Some users forget to do this all the
33 ;; time. The purpose of this program is to check the text and suggest
34 ;; adding of missing tildes on some places. It works in a similar
35 ;; manner to `query-replace-regexp'.
37 ;; The functionality of this program is actually performing query
38 ;; replace on certain regions, but for historical reasons explained
39 ;; above it is called `tildify'.
41 ;; The default variable settings are suited for Czech, so do not try to
42 ;; understand them if you are not familiar with Czech grammar and spelling.
44 ;; The algorithm was inspired by Petr Olšák's program `vlna'. Abilities of
45 ;; `tildify.el' are a little limited; if you have improvement suggestions, let
46 ;; me know.
48 ;;; Code:
51 ;;; *** User configuration variables ***
54 (defgroup tildify nil
55 "Add hard spaces or other text fragments to text buffers."
56 :version "21.1"
57 :group 'wp)
59 (defcustom tildify-pattern
60 "\\(?:[,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(?:\\w\\|[([{\\]\\|<[a-zA-Z]\\)"
61 "A pattern specifying where to insert hard spaces.
63 `tildify-buffer' function will replace first capturing group of the regexp with
64 a hard space (as defined by `tildify-space-string' variable). (Hint: \\(…\\)
65 non-capturing groups can be used for grouping prior to the part of the regexp
66 matching the white space). The pattern is matched case-sensitive regardless of
67 the value of `case-fold-search' setting."
68 :version "25.1"
69 :group 'tildify
70 :type 'string
71 :safe t)
73 (defcustom tildify-pattern-alist ()
74 "Alist specifying where to insert hard spaces.
76 Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or
77 \(MAJOR-MODE . SYMBOL).
79 MAJOR-MODE defines major mode, for which the item applies. It can be either:
80 - a symbol equal to the major mode of the buffer to be fixed
81 - t for default item, this applies to all major modes not defined in another
82 alist item
84 REGEXP is a regular expression matching the part of a text, where a hard space
85 is missing. The regexp is always case sensitive, regardless of the current
86 `case-fold-search' setting.
88 NUMBER defines the number of the REGEXP subexpression which should be replaced
89 by the hard space character.
91 The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
92 mode, the item for the mode SYMBOL is looked up in the alist instead."
93 :group 'tildify
94 :type '(repeat (cons :tag "Entry for major mode"
95 (choice (const :tag "Default" t)
96 (symbol :tag "Major mode"))
97 (choice (list :tag "Regexp"
98 regexp
99 (integer :tag "Group "))
100 (symbol :tag "Like other")))))
101 (make-obsolete-variable 'tildify-pattern-alist 'tildify-pattern "25.1")
103 (defcustom tildify-space-string "\u00A0"
104 "Representation of a hard (a.k.a. no-break) space in current major mode.
106 Used by `tildify-buffer' in places where space is required but line
107 cannot be broken. For example \"~\" for TeX or \"&#160;\" for SGML,
108 HTML and XML modes. A no-break space Unicode character (\"\\u00A0\")
109 might be used for other modes if compatible encoding is used.
111 If nil, current major mode has no way to represent a hard space."
112 :version "25.1"
113 :group 'tildify
114 :type '(choice (const :tag "Space character (no hard-space representation)"
115 " ")
116 (const :tag "No-break space (U+00A0)" "\u00A0")
117 (string :tag "Custom string"))
118 :safe t)
120 (defcustom tildify-string-alist ()
121 "Alist specifying what is a hard space in the current major mode.
123 Each alist item is of the form (MAJOR-MODE . STRING) or
124 \(MAJOR-MODE . SYMBOL).
126 MAJOR-MODE defines major mode, for which the item applies. It can be either:
127 - a symbol equal to the major mode of the buffer to be fixed
128 - t for default item, this applies to all major modes not defined in another
129 alist item
131 STRING defines the hard space, which is inserted at places defined by
132 `tildify-pattern'. For example it can be \"~\" for TeX or \"&nbsp;\" for SGML.
134 The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
135 mode, the item for the mode SYMBOL is looked up in the alist instead."
136 :group 'tildify
137 :type '(repeat (cons :tag "Entry for major mode"
138 (choice (const :tag "Default" t)
139 (symbol :tag "Major mode"))
140 (choice (const :tag "No-break space (U+00A0)" "\u00A0")
141 (string :tag "String ")
142 (symbol :tag "Like other")))))
143 (make-obsolete-variable 'tildify-string-alist
144 'tildify-space-string "25.1")
146 (defcustom tildify-foreach-region-function
147 'tildify--deprecated-ignore-evironments
148 "A function calling a callback on portions of the buffer to tildify.
150 The function is called from `tildify-buffer' function with three arguments: FUNC
151 BEG END. FUNC is a callback accepting two arguments -- REG-BEG REG-END --
152 specifying a portion of buffer to operate on.
154 The BEG and END arguments may be used to limit portion of the buffer being
155 scanned, but the `tildify-foreach-region-function' is not required to make use
156 of them. IT must, however, terminate as soon as FUNC returns nil.
158 For example, if `tildify-buffer' function should operate on the whole buffer,
159 a simple pass through function could be used:
160 (setq-local tildify-foreach-region-function
161 (lambda (cb beg end) (funcall cb beg end)))
162 or better still:
163 (setq-local tildify-foreach-region-function 'funcall)
164 See `tildify-foreach-ignore-environments' function for other ways to use the
165 variable."
166 :version "25.1"
167 :group 'tildify
168 :type 'function)
170 (defcustom tildify-ignored-environments-alist ()
171 "Alist specifying ignored structured text environments.
172 Parts of text defined in this alist are skipped without performing hard space
173 insertion on them. These setting allow skipping text parts like verbatim or
174 math environments in TeX or preformatted text in SGML.
176 Each list element is of the form
177 (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... )
179 MAJOR-MODE defines major mode, for which the item applies. It can be either:
180 - a symbol equal to the major mode of the buffer to be fixed
181 - t for default item, this applies to all major modes not defined in another
182 alist item
184 See `tildify-foreach-ignore-environments' function for description of BEG-REGEX
185 and END-REGEX."
186 :group 'tildify
187 :type '(repeat
188 (cons :tag "Entry for major mode"
189 (choice (const :tag "Default" t)
190 (symbol :tag "Major mode"))
191 (choice
192 (const :tag "None")
193 (repeat
194 :tag "Environments"
195 (cons :tag "Regexp pair"
196 (regexp :tag "Open ")
197 (choice :tag "Close"
198 (regexp :tag "Regexp")
199 (list :tag "Regexp and groups (concatenated)"
200 (choice (regexp :tag "Regexp")
201 (integer :tag "Group "))))))
202 (symbol :tag "Like other")))))
203 (make-obsolete-variable 'tildify-ignored-environments-alist
204 'tildify-foreach-region-function "25.1")
207 ;;; *** Interactive functions ***
209 ;;;###autoload
210 (defun tildify-region (beg end &optional dont-ask)
211 "Add hard spaces in the region between BEG and END.
212 See variables `tildify-pattern', `tildify-space-string', and
213 `tildify-ignored-environments-alist' for information about configuration
214 parameters.
215 This function performs no refilling of the changed text.
216 If DONT-ASK is set, or called interactively with prefix argument, user
217 won't be prompted for confirmation of each substitution."
218 (interactive "*rP")
219 (let (case-fold-search (count 0) (ask (not dont-ask)))
220 (tildify--foreach-region
221 (lambda (beg end)
222 (let ((aux (tildify-tildify beg end ask)))
223 (setq count (+ count (car aux)))
224 (if (not (eq (cdr aux) 'force))
225 (cdr aux)
226 (setq ask nil)
227 t)))
228 beg end)
229 (message "%d spaces replaced." count)))
231 ;;;###autoload
232 (defun tildify-buffer (&optional dont-ask)
233 "Add hard spaces in the current buffer.
234 See variables `tildify-pattern', `tildify-space-string', and
235 `tildify-ignored-environments-alist' for information about configuration
236 parameters.
237 This function performs no refilling of the changed text.
238 If DONT-ASK is set, or called interactively with prefix argument, user
239 won't be prompted for confirmation of each substitution."
240 (interactive "*P")
241 (tildify-region (point-min) (point-max) dont-ask))
244 ;;; *** Auxiliary functions ***
246 (defun tildify--pick-alist-entry (mode-alist &optional mode)
247 "Return alist item for the MODE-ALIST in the current major MODE."
248 (let ((alist (cdr (or (assoc (or mode major-mode) mode-alist)
249 (assoc t mode-alist)))))
250 (if (and alist
251 (symbolp alist))
252 (tildify--pick-alist-entry mode-alist alist)
253 alist)))
254 (make-obsolete 'tildify--pick-alist-entry
255 "it should not be used in new code." "25.1")
257 (defun tildify--deprecated-ignore-evironments (callback beg end)
258 "Call CALLBACK on regions between BEG and END.
260 Call CALLBACK on each region outside of environment to ignore. Stop scanning
261 the region as soon as CALLBACK returns nil. Environments to ignore are
262 defined by deprecated `tildify-ignored-environments-alist'. CALLBACK may be
263 called on portions of the buffer outside of [BEG END)."
264 (let ((pairs (tildify--pick-alist-entry tildify-ignored-environments-alist)))
265 (if pairs
266 (tildify-foreach-ignore-environments pairs callback beg end)
267 (funcall callback beg end))))
268 (make-obsolete 'tildify--deprecated-ignore-evironments
269 "it should not be used in new code." "25.1")
271 (defun tildify-foreach-ignore-environments (pairs callback _beg end)
272 "Outside of environments defined by PAIRS call CALLBACK.
274 PAIRS is a list of (BEG-REGEX . END-REGEX) cons. BEG-REGEX is a regexp matching
275 beginning of a text part to be skipped. END-REGEX defines end of the
276 corresponding text part and can be either:
277 - a regexp matching the end of the skipped text part
278 - a list of regexps and numbers, which will compose the ending regexp by
279 concatenating themselves, while replacing the numbers with corresponding
280 subexpressions of BEG-REGEX (this is used to solve cases like
281 \\\\verb<character> in TeX).
283 CALLBACK is a function accepting two arguments -- REG-BEG and REG-END -- that
284 will be called for portions of the buffer outside of the environments defined by
285 PAIRS regexes.
287 The function will return as soon as CALLBACK returns nil or point goes past END.
288 CALLBACK may be called on portions of the buffer outside of [BEG END); in fact
289 BEG argument is ignored.
291 This function is meant to be used to set `tildify-foreach-region-function'
292 variable. For example, for an XML file one might use:
293 (setq-local tildify-foreach-region-function
294 (apply-partially 'tildify-foreach-ignore-environments
295 '((\"<! *--\" . \"-- *>\") (\"<\" . \">\"))))"
296 (let ((beg-re (concat "\\(?:" (mapconcat 'car pairs "\\)\\|\\(?:") "\\)"))
297 p end-re)
298 (save-excursion
299 (save-restriction
300 (widen)
301 (goto-char (point-min))
302 (while (and (< (setq p (point)) end)
303 (if (setq end-re (tildify--find-env beg-re pairs))
304 (and (funcall callback p (match-beginning 0))
305 (< (point) end)
306 (re-search-forward end-re nil t))
307 (funcall callback p end)
308 nil)))))))
310 (defun tildify--foreach-region (callback beg end)
311 "Call CALLBACK on portions of the buffer between BEG and END.
313 Which portions to call CALLBACK on is determined by
314 `tildify-foreach-region-function' variable. This function merely makes sure
315 CALLBACK is not called with portions of the buffer outside of [BEG END)."
316 (let ((func (lambda (reg-beg reg-end)
317 (setq reg-beg (max reg-beg beg) reg-end (min reg-end end))
318 (and (or (>= reg-beg reg-end)
319 (funcall callback reg-beg reg-end))
320 (< reg-end end)))))
321 (funcall tildify-foreach-region-function func beg end)))
323 (defun tildify--find-env (regexp pairs)
324 "Find environment using REGEXP.
325 Return regexp for the end of the environment found in PAIRS or nil if
326 no environment was found."
327 ;; Find environment
328 (when (re-search-forward regexp nil t)
329 (save-match-data
330 (let ((match (match-string 0)))
331 (while (not (eq (string-match (caar pairs) match) 0))
332 (setq pairs (cdr pairs)))
333 (let ((expression (cdar pairs)))
334 (if (stringp expression)
335 expression
336 (mapconcat
337 (lambda (expr)
338 (if (stringp expr)
339 expr
340 (regexp-quote (match-string expr match))))
341 expression
342 "")))))))
344 (defun tildify-tildify (beg end ask)
345 "Add tilde characters in the region between BEG and END.
346 This function does not do any further checking except of for comments and
347 macros.
349 If ASK is nil, perform replace without asking user for confirmation.
351 Returns (count . response) cons where count is number of string
352 replacements done and response is one of symbols: t (all right), nil
353 (quit), force (replace without further questions)."
354 (save-excursion
355 (goto-char beg)
356 (let ((regexp tildify-pattern)
357 (match-number 1)
358 (tilde (or (tildify--pick-alist-entry tildify-string-alist)
359 tildify-space-string))
360 (end-marker (copy-marker end))
361 answer
362 bad-answer
363 replace
364 quit
365 (message-log-max nil)
366 (count 0))
367 ;; For the time being, tildify-pattern-alist overwrites tildify-pattern
368 (let ((alist (tildify--pick-alist-entry tildify-pattern-alist)))
369 (when alist
370 (setq regexp (car alist) match-number (cadr alist))))
371 (while (and (not quit)
372 (re-search-forward regexp (marker-position end-marker) t))
373 (when (or (not ask)
374 (progn
375 (goto-char (match-beginning match-number))
376 (setq bad-answer t)
377 (while bad-answer
378 (setq bad-answer nil)
379 (message "Replace? (yn!q) ")
380 (setq answer (read-event)))
381 (cond
382 ((or (eq answer ?y) (eq answer ? ) (eq answer 'space))
383 (setq replace t))
384 ((eq answer ?n)
385 (setq replace nil))
386 ((eq answer ?!)
387 (setq replace t
388 ask nil))
389 ((eq answer ?q)
390 (setq replace nil
391 quit t))
393 (message "Press y, n, !, or q.")
394 (setq bad-answer t)))
395 replace))
396 (replace-match tilde t t nil match-number)
397 (setq count (1+ count))))
398 ;; Return value
399 (cons count (cond (quit nil)
400 ((not ask) 'force)
401 (t t))))))
404 ;;; *** Announce ***
406 (provide 'tildify)
409 ;; Local variables:
410 ;; coding: utf-8
411 ;; End:
413 ;;; tildify.el ends here