1 ;;; sregex.el --- symbolic regular expressions
3 ;; Copyright (C) 1997, 1998 Free Software Foundation, Inc.
5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 ;; This package allows you to write regular expressions using a
28 ;; totally new, Lisp-like syntax.
30 ;; A "symbolic regular expression" (sregex for short) is a Lisp form
31 ;; that, when evaluated, produces the string form of the specified
32 ;; regular expression. Here's a simple example:
34 ;; (sregexq (or "Bob" "Robert")) => "Bob\\|Robert"
36 ;; As you can see, an sregex is specified by placing one or more
37 ;; special clauses in a call to `sregexq'. The clause in this case is
38 ;; the `or' of two strings (not to be confused with the Lisp function
39 ;; `or'). The list of allowable clauses appears below.
41 ;; With sregex, it is never necessary to "escape" magic characters
42 ;; that are meant to be taken literally; that happens automatically.
45 ;; (sregexq "M*A*S*H") => "M\\*A\\*S\\*H"
47 ;; It is also unnecessary to "group" parts of the expression together
48 ;; to overcome operator precedence; that also happens automatically.
51 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(Bob\\|Robert\\)?"
53 ;; It *is* possible to group parts of the expression in order to refer
54 ;; to them with numbered backreferences:
56 ;; (sregexq (group (or "Go" "Run"))
58 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
60 ;; If `sregexq' needs to introduce its own grouping parentheses, it
61 ;; will automatically renumber your backreferences:
63 ;; (sregexq (opt "resent-")
64 ;; (group (or "to" "cc" "bcc"))
66 ;; (backref 1)) => "\\(resent-\\)?\\(to\\|cc\\|bcc\\): \\2"
68 ;; `sregexq' is a macro. Each time it is used, it constructs a simple
69 ;; Lisp expression that then invokes a moderately complex engine to
70 ;; interpret the sregex and render the string form. Because of this,
71 ;; I don't recommend sprinkling calls to `sregexq' throughout your
72 ;; code, the way one normally does with string regexes (which are
73 ;; cheap to evaluate). Instead, it's wiser to precompute the regexes
74 ;; you need wherever possible instead of repeatedly constructing the
75 ;; same ones over and over. Example:
77 ;; (let ((field-regex (sregexq (opt "resent-")
78 ;; (or "to" "cc" "bcc"))))
82 ;; (re-search-forward field-regex ...)
85 ;; The arguments to `sregexq' are automatically quoted, but the
86 ;; flipside of this is that it is not straightforward to include
87 ;; computed (i.e., non-constant) values in `sregexq' expressions. So
88 ;; `sregex' is a function that is like `sregexq' but which does not
89 ;; automatically quote its values. Literal sregex clauses must be
90 ;; explicitly quoted like so:
92 ;; (sregex '(or "Bob" "Robert")) => "Bob\\|Robert"
94 ;; but computed clauses can be included easily, allowing for the reuse
97 ;; (let ((dotstar '(0+ any))
98 ;; (whitespace '(1+ (syntax ?-)))
99 ;; (digits '(1+ (char (?0 . ?9)))))
100 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
102 ;; This package also provides sregex-specific versions of the Emacs
103 ;; functions `replace-match', `match-string',
104 ;; `match-string-no-properties', `match-beginning', `match-end', and
105 ;; `match-data'. In each case, the sregex version's name begins with
106 ;; `sregex-' and takes one additional optional parameter, an sregex
107 ;; "info" object. Each of these functions is concerned with numbered
108 ;; submatches. Since sregex may renumber submatches, alternate
109 ;; versions of these functions are needed that know how to adjust the
112 ;; The sregex info object for the most recently evaluated sregex can
113 ;; be obtained with `sregex-info'; so if you precompute your sregexes
114 ;; and you plan to use `replace-match' or one of the others with it,
115 ;; you need to record the info object for later use:
117 ;; (let* ((regex (sregexq (opt "resent-")
118 ;; (group (or "to" "cc" "bcc"))
120 ;; (regex-info (sregex-info)))
122 ;; (if (re-search-forward regex ...)
123 ;; (let ((which (sregex-match-string 1 nil regex-info)))
126 ;; In this example, `regex' is "\\(resent-\\)?\\(to\\|cc\\|bcc\\):",
127 ;; so the call to (sregex-match-string 1 ...) is automatically turned
128 ;; into a call to (match-string 2 ...).
130 ;; If the sregex info argument to `sregex-replace-match',
131 ;; `sregex-match-string', `sregex-match-string-no-properties',
132 ;; `sregex-match-beginning', `sregex-match-end', or
133 ;; `sregex-match-data' is omitted, the current value of (sregex-info)
136 ;; You can do your own sregex submatch renumbering with
137 ;; `sregex-backref-num'.
139 ;; Finally, `sregex-save-match-data' is like `save-match-data' but
140 ;; also saves and restores the information maintained by
143 ;; To use this package in a Lisp program, simply (require 'sregex).
145 ;; Here are the clauses allowed in an `sregex' or `sregexq'
149 ;; This stands for the literal string. If it contains
150 ;; metacharacters, they will be escaped in the resulting regex
151 ;; (using `regexp-quote').
153 ;; - the symbol `any'
154 ;; This stands for ".", a regex matching any character except
157 ;; - the symbol `bol'
158 ;; Stands for "^", matching the empty string at the beginning of a line
160 ;; - the symbol `eol'
161 ;; Stands for "$", matching the empty string at the end of a line
163 ;; - (group CLAUSE ...)
164 ;; Groups the given CLAUSEs using "\\(" and "\\)".
166 ;; - (sequence CLAUSE ...)
168 ;; Groups the given CLAUSEs; may or may not use "\\(" and "\\)".
169 ;; Clauses groups by `sequence' do not count for purposes of
170 ;; numbering backreferences. Use `sequence' in situations like
173 ;; (sregexq (or "dog" "cat"
174 ;; (sequence (opt "sea ") "monkey")))
175 ;; => "dog\\|cat\\|\\(sea \\)?monkey"
177 ;; where a single `or' alternate needs to contain multiple
181 ;; Matches the same string previously matched by the Nth "group" in
182 ;; the same sregex. N is a positive integer. In the resulting
183 ;; regex, N may be adjusted to account for automatically introduced
187 ;; Matches any one of the CLAUSEs by separating them with "\\|".
190 ;; Concatenates the given CLAUSEs and matches zero or more
191 ;; occurrences by appending "*".
194 ;; Concatenates the given CLAUSEs and matches one or more
195 ;; occurrences by appending "+".
197 ;; - (opt CLAUSE ...)
198 ;; Concatenates the given CLAUSEs and matches zero or one occurrence
201 ;; - (repeat MIN MAX CLAUSE ...)
202 ;; Concatenates the given CLAUSEs and constructs a regex matching at
203 ;; least MIN occurrences and at most MAX occurrences. MIN must be a
204 ;; non-negative integer. MAX must be a non-negative integer greater
205 ;; than or equal to MIN; or MAX can be nil to mean "infinity."
207 ;; - (char CHAR-CLAUSE ...)
208 ;; Creates a "character class" matching one character from the given
209 ;; set. See below for how to construct a CHAR-CLAUSE.
211 ;; - (not-char CHAR-CLAUSE ...)
212 ;; Creates a "character class" matching any one character not in the
213 ;; given set. See below for how to construct a CHAR-CLAUSE.
215 ;; - the symbol `bot'
216 ;; Stands for "\\`", matching the empty string at the beginning of
217 ;; text (beginning of a string or of a buffer).
219 ;; - the symbol `eot'
220 ;; Stands for "\\'", matching the empty string at the end of text.
222 ;; - the symbol `point'
223 ;; Stands for "\\=", matching the empty string at point.
225 ;; - the symbol `word-boundary'
226 ;; Stands for "\\b", matching the empty string at the beginning or
229 ;; - the symbol `not-word-boundary'
230 ;; Stands for "\\B", matching the empty string not at the beginning
233 ;; - the symbol `bow'
234 ;; Stands for "\\<", matching the empty string at the beginning of a
237 ;; - the symbol `eow'
238 ;; Stands for "\\>", matching the empty string at the end of a word.
240 ;; - the symbol `wordchar'
241 ;; Stands for the regex "\\w", matching a word-constituent character
242 ;; (as determined by the current syntax table)
244 ;; - the symbol `not-wordchar'
245 ;; Stands for the regex "\\W", matching a non-word-constituent
249 ;; Stands for the regex "\\sCODE", where CODE is a syntax table code
250 ;; (a single character). Matches any character with the requested
253 ;; - (not-syntax CODE)
254 ;; Stands for the regex "\\SCODE", where CODE is a syntax table code
255 ;; (a single character). Matches any character without the
259 ;; This is a "trapdoor" for including ordinary regular expression
260 ;; strings in the result. Some regular expressions are clearer when
261 ;; written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
262 ;; instance. However, see the note under "Bugs," below.
264 ;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
265 ;; has one of the following forms:
268 ;; Adds that character to the set.
271 ;; Adds all the characters in the string to the set.
273 ;; - A pair (MIN . MAX)
274 ;; Where MIN and MAX are characters, adds the range of characters
275 ;; from MIN through MAX to the set.
279 ;; Make (sregexq (or "a" (sequence "b" "c"))) return "a\\|bc" instead
282 ;; An earlier version of this package could optionally translate the
283 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For
284 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
285 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
290 ;; The (regex REGEX) form can confuse the code that distinguishes
291 ;; introduced groups from user-specified groups. Try to avoid using
292 ;; grouping within a `regex' form. Failing that, try to avoid using
293 ;; backrefs if you're using `regex'.
297 (defsubst sregex--value-unitp
(val) (nth 0 val
))
298 (defsubst sregex--value-groups
(val) (nth 1 val
))
299 (defsubst sregex--value-tree
(val) (nth 2 val
))
301 (defun sregex--make-value (unitp groups tree
)
302 (list unitp groups tree
))
304 (defvar sregex--current-sregex nil
305 "Global state for `sregex-info'.")
307 (defun sregex-info ()
308 "Return extra information about the latest call to `sregex'.
309 This extra information is needed in order to adjust user-requested
310 backreference numbers to numbers suitable for the generated regexp.
311 See e.g. `sregex-match-string' and `sregex-backref-num'."
312 sregex--current-sregex
)
315 ; (defadvice save-match-data (around sregex-save-match-data protect)
316 ; (let ((sregex--saved-sregex sregex--current-sregex))
319 ; (setq sregex--current-sregex sregex--saved-sregex))))
320 (defmacro sregex-save-match-data
(&rest forms
)
321 "Like `save-match-data', but also saves and restores `sregex-info' data."
322 `(let ((sregex--saved-sregex sregex--current-sregex
))
324 (save-match-data ,@forms
)
325 (setq sregex--current-sregex sregex--saved-sregex
))))
327 (defun sregex-replace-match (replacement
328 &optional fixedcase literal string subexp sregex
)
329 "Like `replace-match', for a regexp made with `sregex'.
330 This takes one additional optional argument, the `sregex' info, which
331 can be obtained with `sregex-info'. The SUBEXP argument is adjusted
332 to allow for \"introduced groups\". If the extra argument is omitted
333 or nil, it defaults to the current value of (sregex-info)."
334 (replace-match replacement fixedcase literal string
336 (sregex-backref-num subexp sregex
))))
338 (defun sregex-match-string (count &optional in-string sregex
)
339 "Like `match-string', for a regexp made with `sregex'.
340 This takes one additional optional argument, the `sregex' info, which
341 can be obtained with `sregex-info'. The COUNT argument is adjusted to
342 allow for \"introduced groups\". If the extra argument is omitted or
343 nil, it defaults to the current value of (sregex-info)."
344 (match-string (and count
345 (sregex-backref-num count sregex
))
348 (defun sregex-match-string-no-properties (count &optional in-string sregex
)
349 "Like `match-string-no-properties', for a regexp made with `sregex'.
350 This takes one additional optional argument, the `sregex' info, which
351 can be obtained with `sregex-info'. The COUNT argument is adjusted to
352 allow for \"introduced groups\". If the extra argument is omitted or
353 nil, it defaults to the current value of (sregex-info)."
354 (match-string-no-properties
356 (sregex-backref-num count sregex
))
359 (defun sregex-match-beginning (count &optional sregex
)
360 "Like `match-beginning', for a regexp made with `sregex'.
361 This takes one additional optional argument, the `sregex' info, which
362 can be obtained with `sregex-info'. The COUNT argument is adjusted to
363 allow for \"introduced groups\". If the extra argument is omitted or
364 nil, it defaults to the current value of (sregex-info)."
365 (match-beginning (sregex-backref-num count sregex
)))
367 (defun sregex-match-end (count &optional sregex
)
368 "Like `match-end', for a regexp made with `sregex'.
369 This takes one additional optional argument, the `sregex' info, which
370 can be obtained with `sregex-info'. The COUNT argument is adjusted to
371 allow for \"introduced groups\". If the extra argument is omitted or
372 nil, it defaults to the current value of (sregex-info)."
373 (match-end (sregex-backref-num count sregex
)))
375 (defun sregex-match-data (&optional sregex
)
376 "Like `match-data', for a regexp made with `sregex'.
377 This takes one additional optional argument, the `sregex' info, which
378 can be obtained with `sregex-info'. \"Introduced groups\" are removed
379 from the result. If the extra argument is omitted or nil, it defaults
380 to the current value of (sregex-info)."
381 (let* ((data (match-data))
382 (groups (sregex--value-groups (or sregex
383 sregex--current-sregex
)))
384 (result (list (car (cdr data
))
386 (setq data
(cdr (cdr data
)))
389 (setq result
(append (list (car (cdr data
))
392 (setq groups
(cdr groups
)
393 data
(cdr (cdr data
))))
396 (defun sregex--render-tree (tree sregex
)
397 (let ((key (car tree
)))
401 (mapconcat '(lambda (x)
402 (sregex--render-tree x sregex
))
408 (sregex--render-tree x sregex
))
412 (sregex--render-tree (cdr tree
) sregex
)
415 (concat (sregex--render-tree (cdr tree
) sregex
)
418 (concat (sregex--render-tree (cdr tree
) sregex
)
421 (concat (sregex--render-tree (cdr tree
) sregex
)
424 (let ((num (sregex-backref-num (cdr tree
) sregex
)))
426 (error "sregex: backref number %d too high after adjustment"
428 (concat "\\" (int-to-string num
)))))
429 (t (error "sregex internal error: unknown tree type %S"
432 (defun sregex (&rest exps
)
433 "Symbolic regular expression interpreter.
434 This is exactly like `sregexq' (q.v.) except that it evaluates all its
435 arguments, so literal sregex clauses must be quoted. For example:
437 (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
439 An argument-evaluating sregex interpreter lets you reuse sregex
442 (let ((dotstar '(0+ any))
443 (whitespace '(1+ (syntax ?-)))
444 (digits '(1+ (char (?0 . ?9)))))
445 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
447 (setq sregex--current-sregex
(sregex--sequence exps nil
))
448 (sregex--render-tree (sregex--value-tree sregex--current-sregex
)
449 sregex--current-sregex
)))
451 (defmacro sregexq
(&rest exps
)
452 "Symbolic regular expression interpreter.
453 This macro allows you to specify a regular expression (regexp) in
454 symbolic form, and converts it into the string form required by Emacs's
455 regex functions such as `re-search-forward' and `looking-at'. Here is
458 (sregexq (or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
460 As you can see, an sregex is specified by placing one or more special
461 clauses in a call to `sregexq'. The clause in this case is the `or'
462 of two strings (not to be confused with the Lisp function `or'). The
463 list of allowable clauses appears below.
465 With `sregex', it is never necessary to \"escape\" magic characters
466 that are meant to be taken literally; that happens automatically.
469 (sregexq \"M*A*S*H\") => \"M\\\\*A\\\\*S\\\\*H\"
471 It is also unnecessary to \"group\" parts of the expression together
472 to overcome operator precedence; that also happens automatically.
475 (sregexq (opt (or \"Bob\" \"Robert\"))) => \"\\\\(Bob\\\\|Robert\\\\)?\"
477 It *is* possible to group parts of the expression in order to refer
478 to them with numbered backreferences:
480 (sregexq (group (or \"Go\" \"Run\"))
482 (backref 1)) => \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
484 If `sregexq' needs to introduce its own grouping parentheses, it will
485 automatically renumber your backreferences:
487 (sregexq (opt \"resent-\")
488 (group (or \"to\" \"cc\" \"bcc\"))
490 (backref 1)) => \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
492 `sregexq' is a macro. Each time it is used, it constructs a simple
493 Lisp expression that then invokes a moderately complex engine to
494 interpret the sregex and render the string form. Because of this, I
495 don't recommend sprinkling calls to `sregexq' throughout your code,
496 the way one normally does with string regexes (which are cheap to
497 evaluate). Instead, it's wiser to precompute the regexes you need
498 wherever possible instead of repeatedly constructing the same ones
499 over and over. Example:
501 (let ((field-regex (sregexq (opt \"resent-\")
502 (or \"to\" \"cc\" \"bcc\"))))
506 (re-search-forward field-regex ...)
509 The arguments to `sregexq' are automatically quoted, but the
510 flipside of this is that it is not straightforward to include
511 computed (i.e., non-constant) values in `sregexq' expressions. So
512 `sregex' is a function that is like `sregexq' but which does not
513 automatically quote its values. Literal sregex clauses must be
514 explicitly quoted like so:
516 (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
518 but computed clauses can be included easily, allowing for the reuse
521 (let ((dotstar '(0+ any))
522 (whitespace '(1+ (syntax ?-)))
523 (digits '(1+ (char (?0 . ?9)))))
524 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"
526 Here are the clauses allowed in an `sregex' or `sregexq' expression:
529 This stands for the literal string. If it contains
530 metacharacters, they will be escaped in the resulting regex
531 (using `regexp-quote').
534 This stands for \".\", a regex matching any character except
538 Stands for \"^\", matching the empty string at the beginning of a line
541 Stands for \"$\", matching the empty string at the end of a line
544 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
546 - (sequence CLAUSE ...)
548 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
549 Clauses groups by `sequence' do not count for purposes of
550 numbering backreferences. Use `sequence' in situations like
553 (sregexq (or \"dog\" \"cat\"
554 (sequence (opt \"sea \") \"monkey\")))
555 => \"dog\\\\|cat\\\\|\\\\(sea \\\\)?monkey\"
557 where a single `or' alternate needs to contain multiple
561 Matches the same string previously matched by the Nth \"group\" in
562 the same sregex. N is a positive integer. In the resulting
563 regex, N may be adjusted to account for automatically introduced
567 Matches any one of the CLAUSEs by separating them with \"\\\\|\".
570 Concatenates the given CLAUSEs and matches zero or more
571 occurrences by appending \"*\".
574 Concatenates the given CLAUSEs and matches one or more
575 occurrences by appending \"+\".
578 Concatenates the given CLAUSEs and matches zero or one occurrence
581 - (repeat MIN MAX CLAUSE ...)
582 Concatenates the given CLAUSEs and constructs a regex matching at
583 least MIN occurrences and at most MAX occurrences. MIN must be a
584 non-negative integer. MAX must be a non-negative integer greater
585 than or equal to MIN; or MAX can be nil to mean \"infinity.\"
587 - (char CHAR-CLAUSE ...)
588 Creates a \"character class\" matching one character from the given
589 set. See below for how to construct a CHAR-CLAUSE.
591 - (not-char CHAR-CLAUSE ...)
592 Creates a \"character class\" matching any one character not in the
593 given set. See below for how to construct a CHAR-CLAUSE.
596 Stands for \"\\\\`\", matching the empty string at the beginning of
597 text (beginning of a string or of a buffer).
600 Stands for \"\\\\'\", matching the empty string at the end of text.
603 Stands for \"\\\\=\", matching the empty string at point.
605 - the symbol `word-boundary'
606 Stands for \"\\\\b\", matching the empty string at the beginning or
609 - the symbol `not-word-boundary'
610 Stands for \"\\\\B\", matching the empty string not at the beginning
614 Stands for \"\\\\\\=<\", matching the empty string at the beginning of a
618 Stands for \"\\\\\\=>\", matching the empty string at the end of a word.
620 - the symbol `wordchar'
621 Stands for the regex \"\\\\w\", matching a word-constituent character
622 (as determined by the current syntax table)
624 - the symbol `not-wordchar'
625 Stands for the regex \"\\\\W\", matching a non-word-constituent
629 Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
630 (a single character). Matches any character with the requested
634 Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
635 (a single character). Matches any character without the
639 This is a \"trapdoor\" for including ordinary regular expression
640 strings in the result. Some regular expressions are clearer when
641 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
642 instance. However, using this can confuse the code that
643 distinguishes introduced groups from user-specified groups. Avoid
644 using grouping within a `regex' form. Failing that, avoid using
645 backrefs if you're using `regex'.
647 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
648 has one of the following forms:
651 Adds that character to the set.
654 Adds all the characters in the string to the set.
657 Where MIN and MAX are characters, adds the range of characters
658 from MIN through MAX to the set."
659 `(apply 'sregex
',exps
))
661 (defun sregex--engine (exp combine
)
662 (let* ((val (cond ((stringp exp
)
663 (sregex--make-value (or (not (eq combine
'suffix
))
667 (regexp-quote exp
))))
669 (funcall (intern (concat "sregex--"
673 (funcall (intern (concat "sregex--"
674 (symbol-name (car exp
))))
677 (t (error "Invalid expression: %s" exp
))))
678 (unitp (sregex--value-unitp val
))
679 (groups (sregex--value-groups val
))
680 (tree (sregex--value-tree val
)))
681 (if (and combine
(not unitp
))
682 (sregex--make-value t
685 (sregex--make-value unitp groups tree
))))
687 (defun sregex--sequence (exps combine
)
688 (if (= (length exps
) 1)
689 (sregex--engine (car exps
) combine
)
691 (trees nil
)) ;grows in reverse
693 (let ((val (sregex--engine (car exps
) 'concat
)))
694 (setq groups
(append groups
695 (sregex--value-groups val
))
696 trees
(cons (sregex--value-tree val
) trees
)
698 (setq trees
(nreverse trees
))
699 (if (eq combine
'suffix
)
700 (sregex--make-value t
703 (cons 'sequence trees
)))
704 (sregex--make-value (not (eq combine
'suffix
))
706 (cons 'sequence trees
))))))
708 (defun sregex--group (exps combine
)
709 (let ((val (sregex--sequence exps nil
)))
710 (sregex--make-value t
711 (cons t
(sregex--value-groups val
))
712 (cons 'group
(sregex--value-tree val
)))))
714 (defun sregex-backref-num (n &optional sregex
)
715 "Adjust backreference number N according to SREGEX.
716 When `sregex' introduces parenthesized groups that the user didn't ask
717 for, the numbering of the groups that the user *did* ask for gets all
718 out of whack. This function accounts for introduced groups. Example:
720 (sregexq (opt \"ab\")
721 (group (or \"c\" \"d\"))) => \"\\\\(ab\\\\)?\\\\(c\\\\|d\\\\)\"
722 (setq info (sregex-info))
723 (sregex-backref-num 1 info) => 2
725 The SREGEX parameter is optional and defaults to the current value of
727 (let ((groups (sregex--value-groups (or sregex
728 sregex--current-sregex
)))
730 (while (and groups
(> n
0))
733 (setq result
(1+ result
)
734 groups
(cdr groups
)))
737 (defun sregex--backref (exps combine
)
738 (sregex--make-value t nil
(cons 'backref
(car exps
))))
740 (defun sregex--any (combine)
741 (sregex--make-value t nil
'(str .
".")))
743 (defun sregex--opt (exps combine
)
744 (let ((val (sregex--sequence exps
'suffix
)))
745 (sregex--make-value t
746 (sregex--value-groups val
)
747 (cons 'opt
(sregex--value-tree val
)))))
749 (defun sregex--0+ (exps combine
)
750 (let ((val (sregex--sequence exps
'suffix
)))
751 (sregex--make-value t
752 (sregex--value-groups val
)
753 (cons '0+ (sregex--value-tree val
)))))
754 (defun sregex--1+ (exps combine
)
755 (let ((val (sregex--sequence exps
'suffix
)))
756 (sregex--make-value t
757 (sregex--value-groups val
)
758 (cons '1+ (sregex--value-tree val
)))))
760 (defun sregex--repeat (exps combine
)
761 (let ((min (or (car exps
) 0))
762 (max (car (cdr exps
))))
763 (setq exps
(cdr (cdr exps
)))
765 (cond ((equal max
0) ;degenerate
766 (sregex--make-value t nil nil
))
768 (sregex--opt exps combine
))
770 (sregex--0+ exps combine
))
771 (t (sregex--sequence (make-list max
776 (sregex--sequence exps combine
))
778 (sregex--1+ exps combine
))
779 (t (sregex--sequence (append exps
783 (t (sregex--sequence (append exps
784 (list (append (list 'repeat
791 (defun sregex--or (exps combine
)
792 (if (= (length exps
) 1)
793 (sregex--engine (car exps
) combine
)
797 (let ((val (sregex--engine (car exps
) 'or
)))
798 (setq groups
(append groups
799 (sregex--value-groups val
))
800 trees
(cons (sregex--value-tree val
) trees
)
802 (sregex--make-value (eq combine
'or
)
804 (cons 'or
(nreverse trees
))))))
806 (defmacro sregex--char-range-aux
()
809 (if (and (<= 32 start
)
811 (setq startc
(char-to-string start
)
812 endc
(char-to-string end
))
813 (setq startc
(format "\\%03o" start
)
814 endc
(format "\\%03o" end
)))
816 (if (> end
(+ start
1))
817 (setq class
(concat class startc
"-" endc
))
818 (setq class
(concat class startc endc
)))
819 (setq class
(concat class startc
))))))
821 (defmacro sregex--char-range
(rstart rend
)
832 (sregex--char-range-aux)
836 (sregex--char-range-aux)))
838 (defun sregex--char-aux (complement args
)
839 (let ((chars (make-vector 256 nil
)))
841 (let ((arg (car args
)))
842 (cond ((integerp arg
)
850 (let ((start (car arg
))
860 (setq i
(1+ i
))))))))
861 (setq args
(cdr args
)))
862 ;; now chars is a map of the characters in the class
864 (caret (aref chars ?^
)))
868 (setq class
(concat class
"]"))
869 (aset chars ?\
] nil
)))
872 (setq class
(concat class
"-"))
873 (aset chars ?- nil
)))
876 (setq class
(concat class
"\\\\"))
877 (aset chars ?
\\ nil
)))
879 (sregex--char-range ?A ?Z
)
880 (sregex--char-range ?a ?z
)
881 (sregex--char-range ?
0 ?
9)
887 (setq class
(concat class
(char-to-string i
)))
891 (sregex--char-range 0 31)
892 (sregex--char-range 128 255)
897 (setq class
(concat class
(format "\\%03o" i
))))
901 (setq class
(concat class
"^")))
902 (concat "[" (if complement
"^") class
"]"))))
904 (defun sregex--char (exps combine
)
905 (sregex--make-value t nil
(cons 'str
(sregex--char-aux nil exps
))))
906 (defun sregex--not-char (exps combine
)
907 (sregex--make-value t nil
(cons 'str
(sregex--char-aux t exps
))))
909 (defun sregex--bol (combine)
910 (sregex--make-value t nil
'(str .
"^")))
911 (defun sregex--eol (combine)
912 (sregex--make-value t nil
'(str .
"$")))
914 (defun sregex--wordchar (combine)
915 (sregex--make-value t nil
'(str .
"\\w")))
916 (defun sregex--not-wordchar (combine)
917 (sregex--make-value t nil
'(str .
"\\W")))
919 (defun sregex--syntax (exps combine
)
920 (sregex--make-value t nil
(cons 'str
(format "\\s%c" (car exps
)))))
921 (defun sregex--not-syntax (exps combine
)
922 (sregex--make-value t nil
(cons 'str
(format "\\S%c" (car exps
)))))
924 (defun sregex--bot (combine)
925 (sregex--make-value t nil
(cons 'str
"\\`")))
926 (defun sregex--eot (combine)
927 (sregex--make-value t nil
(cons 'str
"\\'")))
929 (defun sregex--point (combine)
930 (sregex--make-value t nil
'(str .
"\\=")))
932 (defun sregex--word-boundary (combine)
933 (sregex--make-value t nil
'(str .
"\\b")))
934 (defun sregex--not-word-boundary (combine)
935 (sregex--make-value t nil
'(str .
"\\B")))
937 (defun sregex--bow (combine)
938 (sregex--make-value t nil
'(str .
"\\<")))
939 (defun sregex--eow (combine)
940 (sregex--make-value t nil
'(str .
"\\>")))
943 ;; trapdoor - usage discouraged
944 (defun sregex--regex (exps combine
)
945 (sregex--make-value nil nil
(car exps
)))
949 ;;; sregex.el ends here