Major merge with Emacs source tree. Docutils SVN version and Emacs
[docutils/kirr.git] / docutils / tools / editors / emacs / tests / re.el
blob9468018921e4de5a0f8088ea50b31686a6d3afbc
1 ;; Tests for the regular expression builder
3 (add-to-list 'load-path ".")
4 (load "ert-support" nil t)
6 (ert-deftest rst-re ()
7 "Tests `rst-re'."
8 (should (equal (rst-re "xy") "xy"))
9 (should (equal (rst-re ?A) "A"))
10 (should (equal (rst-re ?$) "\\$"))
11 (should (equal (rst-re 'exm-tag) "\\.\\."))
12 (should (equal (rst-re "xy" ?A ?$ 'exm-tag) "xyA\\$\\.\\."))
13 (should (equal (rst-re '(:seq "xy" ?A ?$ exm-tag)) "xyA\\$\\.\\."))
14 (should (equal (rst-re '(:shy "xy" ?A ?$ exm-tag)) "\\(?:xyA\\$\\.\\.\\)"))
15 (should (equal (rst-re '(:grp "xy" ?A ?$ exm-tag)) "\\(xyA\\$\\.\\.\\)"))
16 (should (equal (rst-re '(:alt "xy" ?A ?$ exm-tag))
17 "\\(?:xy\\|A\\|\\$\\|\\.\\.\\)"))
18 (should (equal (rst-re '(:seq (:seq "xy" ?A ?$ exm-tag))) "xyA\\$\\.\\."))
19 (should (equal (rst-re '(:grp (:alt "xy" ?A ?$ exm-tag)))
20 "\\(\\(?:xy\\|A\\|\\$\\|\\.\\.\\)\\)"))
21 (should (equal (rst-re '(:alt (:grp "xy" ?A ?$ exm-tag)))
22 "\\(?:\\(xyA\\$\\.\\.\\)\\)"))
23 (should (equal (rst-re '(:alt "xy" ?A) '(:grp ?$ exm-tag))
24 "\\(?:xy\\|A\\)\\(\\$\\.\\.\\)"))
25 (should-error (rst-re '(:unknown "xy")))
26 (should-error (rst-re [1]))
29 (defun re-equal-modify-orig (orig loc refactored repls)
30 (let ((case-fold-search nil))
31 (while (string-match "\\[ \t]" orig)
32 ;; Transpose horizontal whitespace
33 (setq orig (replace-match "[\t ]" t nil orig)))
34 (while (string-match "\\\\s \\*\\$" orig)
35 ;; Replace symbolic whitespace
36 (setq orig (replace-match "[\t ]*$" t nil orig)))
37 (dolist (regex-repl repls)
38 (if (string-match (car regex-repl) orig)
39 (setq orig (replace-match (cdr regex-repl) t t orig))
40 (error "Replacement regex /%s/ didn't match in '%s' for location '%s'"
41 (car regex-repl) orig loc)))
42 orig))
45 (defun re-equal (orig loc refactored &rest repls)
46 "Compare regex ORIG at location LOC to REFACTORED.
47 REPLS starts with a list of cons cells telling where regex in car
48 is replaced by cdr in ORIG."
49 (equal (re-equal-modify-orig orig loc refactored repls) refactored))
51 (defun re-equal-explain (orig loc refactored &rest repls)
52 (setq orig (re-equal-modify-orig orig loc refactored repls))
53 (ert--explain-not-equal orig refactored))
55 (put 're-equal 'ert-explainer 're-equal-explain)
57 (defun re-check-matches (orig loc refactored s pairs)
58 "Check matches and return those pairs which didn't work"
59 (let ((case-fold-search nil)
60 failed)
61 (dolist (pair pairs failed)
62 (let ((orig-mtc (if (string-match orig s)
63 (match-string (car pair) s)))
64 (refa-mtc (if (string-match refactored s)
65 (match-string (cdr pair) s))))
66 (if (not orig-mtc)
67 (error "Original regex '%s' didn't match string '%s' for location '%s'"
68 orig s loc)
69 (if (not refa-mtc)
70 (error "Refactored regex '%s' didn't match string '%s' for location '%s'"
71 refactored s loc)
72 (if (not (equal orig-mtc refa-mtc))
73 (push pair failed))))))))
75 (defun re-equal-matches (orig loc refactored matches &rest repls)
76 "Like `re-equal'. However, if MATCHES is non-nil it must be a
77 list with a string and cons cells each consisting of two numbers.
78 For each cons cell the string is matched with ORIG and REFACTORED
79 and the numbered matches are compared."
80 (and (equal (re-equal-modify-orig orig loc refactored repls) refactored)
81 (not (re-check-matches orig loc refactored
82 (car matches) (cdr matches)))))
84 (defun re-equal-matches-explain (orig loc refactored matches &rest repls)
85 (if (not (equal (re-equal-modify-orig orig loc refactored repls) refactored))
86 (apply 're-equal-explain orig loc refactored repls)
87 (let (result)
88 (dolist (failed (re-check-matches orig loc refactored
89 (car matches) (cdr matches)) result)
90 (push (list 'matchers-didnt-match (car failed) (cdr failed))
91 result)))))
93 (put 're-equal-matches 'ert-explainer 're-equal-matches-explain)
95 (ert-deftest rst-re-refactoring ()
96 "Test the refactorings done based on rst_el_1_68."
97 ;; Any comment or string "=== rst.el.~rst_el_1_68~:..." gives the line number
98 ;; of the refactored code in the original file for the previous expression.
99 (let* ((rst-bullets
100 '(?- ?* ?+))
101 ;; === rst.el.~rst_el_1_68~:451
102 (rst-re-bullets
103 (format "\\([%s][ \t]\\)[^ \t]" (regexp-quote (concat rst-bullets))))
104 ;; === rst.el.~rst_el_1_68~:1604
105 ;; More parameters
106 (c ?*)
107 (len 10)
108 (char ?+)
109 (adornment "$$$$")
110 (ado-re (regexp-quote adornment))
111 (fromchar ?.)
113 (should (re-equal
114 "^\\.\\. "
115 "=== rst.el.~rst_el_1_68~:398"
116 (rst-re "^" 'exm-sta)
117 (cons " $" "[ ]+") ;; Any whitespace may follow
119 (should (re-equal
120 "^[ \t]*\\S *\\w\\S *"
121 "=== rst.el.~rst_el_1_68~:567"
122 (rst-re 'lin-beg "\\S *\\w\\S *")
124 (should (re-equal
125 "::[ \t]*$"
126 "=== rst.el.~rst_el_1_68~:591"
127 (rst-re 'dcl-tag 'lin-end)
129 (should (re-equal
130 "\\.\\.\\.[ \t]*$"
131 "=== rst.el.~rst_el_1_68~:592"
132 (rst-re 'ell-tag 'lin-end)
134 (should (re-equal
135 ".[ \t]*$"
136 "=== rst.el.~rst_el_1_68~:594"
137 (rst-re "." 'lin-end)
139 (should (re-equal
140 "^[ \t]+"
141 "=== rst.el.~rst_el_1_68~:605"
142 (rst-re 'hws-sta)
143 (cons "^^" "") ;; No need to anchor for looking-at
145 (should (re-equal
146 "^[ \t]*$"
147 "=== rst.el.~rst_el_1_68~:744"
148 (rst-re 'lin-end)
149 (cons "^^" "") ;; No need to anchor for looking-at
151 (should (re-equal
152 "^[ \t]*$"
153 "=== rst.el.~rst_el_1_68~:1517"
154 (rst-re 'lin-end)
155 (cons "^^" "") ;; No need to anchor for looking-at
157 (should (re-equal
158 "^[ \t]*$"
159 "=== rst.el.~rst_el_1_68~:1545"
160 (rst-re 'lin-end)
161 (cons "^^" "") ;; No need to anchor for looking-at
163 (should (re-equal
164 "^[ \t]*$"
165 "=== rst.el.~rst_el_1_68~:1548"
166 (rst-re 'lin-end)
167 (cons "^^" "") ;; No need to anchor for looking-at
169 (should (re-equal
170 "[0-9]+"
171 "=== rst.el.~rst_el_1_68~:1657"
172 (rst-re 'num-tag)
174 (should (re-equal
175 "[IVXLCDMivxlcdm]+"
176 "=== rst.el.~rst_el_1_68~:1661"
177 (rst-re 'rom-tag)
179 (should (re-equal
180 "[IVXLCDMivxlcdm]+"
181 "=== rst.el.~rst_el_1_68~:1677"
182 (rst-re 'rom-tag)
184 (should (re-equal
185 "[a-zA-Z]"
186 "=== rst.el.~rst_el_1_68~:1685"
187 (rst-re 'ltr-tag)
189 (should (re-equal
190 "[ \t\n]*\\'"
191 "=== rst.el.~rst_el_1_68~:1762"
192 (rst-re "[ \t\n]*\\'")
194 (should (re-equal
195 "\\S .*\\S "
196 "=== rst.el.~rst_el_1_68~:1767"
197 (rst-re "\\S .*\\S ")
199 (should (re-equal
200 "[ \t]*$"
201 "=== rst.el.~rst_el_1_68~:2066"
202 (rst-re 'lin-end)
204 (should (re-equal
205 "[ \t]*$"
206 "=== rst.el.~rst_el_1_68~:2366"
207 (rst-re 'lin-end)
209 (should (re-equal
210 "^[ \t]*$"
211 "=== rst.el.~rst_el_1_68~:2414"
212 (rst-re 'lin-end)
213 (cons "^^" "") ;; No need to anchor for looking-at
215 (should (re-equal
216 "[ \t]*$"
217 "=== rst.el.~rst_el_1_68~:2610"
218 (rst-re 'lin-end)
220 (should (re-equal
221 "[ \t]*$"
222 "=== rst.el.~rst_el_1_68~:2612"
223 (rst-re 'lin-end)
225 (should (re-equal
226 "[ \t]*$"
227 "=== rst.el.~rst_el_1_68~:2645"
228 (rst-re 'lin-end)
230 (should (re-equal
231 "[^ \t]\\|[ \t]*\\.\\.[^ \t]\\|.*::$"
232 "=== rst.el.~rst_el_1_68~:3177"
233 (rst-re '(:alt
234 "[^ \t]"
235 (:seq hws-tag exm-tag "[^ \t]")
236 (:seq ".*" dcl-tag lin-end)))
237 (cons "^" "\\(?:") (cons "$" "\\)") ;; Add outermost shy group
238 (cons "::\\$" "::[ ]*$") ;; Allow trailing space after double
239 ;; colon
241 (should (re-equal
242 "\\s *$"
243 "=== rst.el.~rst_el_1_68~:3209"
244 (rst-re 'lin-end)
246 (should (re-equal
247 "^\\s *$"
248 "=== rst.el.~rst_el_1_68~:3215"
249 (rst-re 'linemp-tag)
251 (should (re-equal
252 "\\s *$"
253 "=== rst.el.~rst_el_1_68~:3253"
254 (rst-re 'lin-end)
256 (should (re-equal
257 "\\s *"
258 "=== rst.el.~rst_el_1_68~:3256"
259 (rst-re 'hws-tag)
260 (cons "\\\\s \\*" "[\t ]*") ;; Replace symbolic by real whitespace
262 (should (re-equal
263 "\\s *$"
264 "=== rst.el.~rst_el_1_68~:3261"
265 (rst-re 'lin-end)
267 (should (re-equal
268 "\\s *"
269 "=== rst.el.~rst_el_1_68~:3268"
270 (rst-re 'hws-tag)
271 (cons "\\\\s \\*" "[\t ]*") ;; Replace symbolic by real whitespace
273 (should (re-equal
274 (regexp-quote adornment)
275 "=== rst.el.~rst_el_1_68~:3346"
276 (rst-re (regexp-quote adornment))
278 (should (re-equal
279 "\\s *$"
280 "=== rst.el.~rst_el_1_68~:3354"
281 (rst-re 'lin-end)
283 (should (re-equal
284 "\\s *$"
285 "=== rst.el.~rst_el_1_68~:3358"
286 (rst-re 'lin-end)
288 (should (re-equal
289 "\\s *$"
290 "=== rst.el.~rst_el_1_68~:3374"
291 (rst-re 'lin-end)
293 (should (re-equal
294 (concat "\\(" ado-re "\\)\\s *$")
295 "=== rst.el.~rst_el_1_68~:3377"
296 (rst-re (list :grp
297 ado-re)
298 'lin-end)
300 (should (re-equal
301 (concat "\\(" ado-re "\\)\\s *$")
302 "=== rst.el.~rst_el_1_68~:3393"
303 (rst-re (list :grp
304 ado-re)
305 'lin-end)
307 (should (re-equal
308 (concat "^" (regexp-quote (string fromchar)) "+\\( *\\)$")
309 "=== rst.el.~rst_el_1_68~:3590"
310 (rst-re "^" fromchar "+\\( *\\)$")
312 (should (re-equal
313 "\f\\|>*[ \t]*$\\|>*[ \t]*[-+*] \\|>*[ \t]*[0-9#]+\\. "
314 "=== rst.el.~rst_el_1_68~:391"
315 (rst-re '(:alt
316 "\f"
317 lin-end
318 (:seq hws-tag itmany-sta-1)))
319 (cons "^" "\\(?:") (cons "$" "\\)") ;; Add outermost shy group
320 (cons (regexp-quote "[-+*] \\|>*[ ]*[0-9#]+\\. ")
321 "\\(\\(?:\\(?:\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\)\\.\\|(?\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\))\\)\\|[-*+\u2022\u2023\u2043]\\)\\)[ ]+"
322 ) ;; Now matches all items
323 (cons ">\\*" "") ;; Remove ">" prefix
324 (cons ">\\*" "") ;; Remove another ">" prefix
325 (cons "\\[\t ]\\+" "\\(?:[\t ]+\\|$\\)") ;; Item tag needs no
326 ;; trailing whitespace
328 (should (re-equal
329 (format "[%s]+[ \t]*$" (char-to-string c))
330 "=== rst.el.~rst_el_1_68~:587"
331 (rst-re c "+" 'lin-end)
332 (cons "\\[\\*]" "\\*") ;; Use quoting instead of char class
334 (should (re-equal
335 (concat "^"
336 (regexp-quote (make-string len char))
337 "$")
338 "=== rst.el.~rst_el_1_68~:941"
339 (rst-re "^" char (format "\\{%d\\}" len) "$")
340 (cons "\\(\\\\\\+\\)\\{9\\}\\$"
341 "\\{10\\}$") ;; Use regex repeat instead of explicit repeat
343 (should (re-equal
344 (format "#.\\|[%s]"
345 (regexp-quote (concat rst-bullets)))
346 "=== rst.el.~rst_el_1_68~:1653"
347 (rst-re '(:alt
348 enmaut-tag
349 bul-tag))
350 (cons "^" "\\(?:") (cons "$" "\\)") ;; Add outermost shy group
351 (cons (regexp-quote "[-\\*\\+]")
352 "[-*+\u2022\u2023\u2043]") ;; Wrongly quoted characters in
353 ;; class and more bullets
354 (cons "#\\." "\\(?:#\\.\\|(?#)\\)") ;; Use all auto enumerators
356 (should (re-equal
357 "[a-zA-Z]+"
358 "=== rst.el.~rst_el_1_68~:1672"
359 (rst-re 'ltr-tag)
360 (cons "\\+$" "") ;; Wrong in source
362 (should (re-equal
363 rst-re-bullets
364 "=== rst.el.~rst_el_1_68~:1735"
365 (rst-re 'bul-sta)
366 (cons (regexp-quote "[-\\*\\+]")
367 "[-*+\u2022\u2023\u2043]") ;; Wrongly quoted characters in
368 ;; class and more bullets
369 (cons "\\[^ \t]" "") ;; Accept bullets without content
370 (cons "^\\\\(" "") (cons "\\\\)" "") ;; Remove superfluous group
371 (cons "$" "+") ;; Allow more whitespace
372 (cons "\\[\t ]\\+" "\\(?:[\t ]+\\|$\\)") ;; Item tag needs no
373 ;; trailing whitespace
375 (should (re-equal
376 "^\\.\\. contents[ \t]*::\\(.*\\)\n\\([ \t]+:\\w+:.*\n\\)*\\.\\."
377 "=== rst.el.~rst_el_1_68~:2056"
378 (rst-re "^" 'exm-sta "contents" 'dcl-tag ".*\n"
379 "\\(?:" 'hws-sta 'fld-tag ".*\n\\)*" 'exm-tag)
380 (cons " contents\\[\t ]\\*"
381 "[\t ]+contents") ;; Any whitespace before but no after
382 (cons "\\\\(\\.\\*\\\\)" ".*") ;; Remove superfluous group
383 (cons "\\\\(" "\\(?:") ;; Make group shy
384 (cons ":\\\\w\\+:" ":\\(?:[^:\n]\\|\\\\:\\)+:") ;; Use improved
385 ;; field tag
387 (should (re-equal
388 "[ \t]+[^ \t]"
389 "=== rst.el.~rst_el_1_68~:2065"
390 (rst-re 'hws-sta "\\S ")
391 (cons "\\[^ \t]" "\\S ") ;; Require non-whitespace instead
392 ;; of only non-horizontal whitespace
396 (ert-deftest rst-re-refactoring-complicated ()
397 "Try to test complicated refactorings done based on rst_el_1_68."
398 :expected-result :failed ;; These have been reviewed logically and are ok
399 (let* ((rst-bullets
400 '(?- ?* ?+))
401 ;; === rst.el.~rst_el_1_68~:451
402 (rst-re-enumerator "\\(?:[a-zA-Z]\\|[0-9IVXLCDMivxlcdm]+\\)")
403 ;; === rst.el.~rst_el_1_68~:1608
404 (rst-re-enumerations
405 (format "^[ \t]*\\(%s.\\|(?%s)\\)[ \t]"
406 rst-re-enumerator
407 rst-re-enumerator))
408 ;; === rst.el.~rst_el_1_68~:1610
409 (rst-re-items
410 (format "^[ \t]*\\([%s]\\|\\(#\\|%s\\)\\.\\|(?%s)\\)[ \t]"
411 (regexp-quote (concat rst-bullets))
412 rst-re-enumerator
413 rst-re-enumerator))
414 ;; === rst.el.~rst_el_1_68~:1616
415 ;; More parameters
416 (char ?+)
418 (should (re-equal
419 rst-re-items
420 "=== rst.el.~rst_el_1_68~:2718"
421 (rst-re 'itmany-sta-1)
422 (cons "^^\\[\t ]\\*" "") ;; Wrongly anchored at the beginning of
423 ;; the line
424 (cons (regexp-quote "\\(#\\|\\(?:[a-zA-Z]\\|[0-9IVXLCDMivxlcdm]+\\)\\)")
425 "\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\)"
426 ) ;; Replace counter for "\\."
427 (cons (regexp-quote "\\(?:[a-zA-Z]\\|[0-9IVXLCDMivxlcdm]+\\)")
428 "\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\)"
429 ) ;; Replace counter for "(?)"
430 (cons "$" "+") ;; Allow more whitespace
431 (cons "^\\\\(" "\\(\\(?:") (cons "\\[\t ]\\+$" "\\)[\t ]+"
432 ) ;; Add superfluous shy group
433 (cons (regexp-quote "[-\\*\\+]\\|") "") ;; Remove wrongly quoted
434 ;; characters
435 (cons (regexp-quote "\\)\\)[\t ]+")
436 "\\|[-*+\u2022\u2023\u2043]\\)\\)[\t ]+"
437 ) ;; Re-add bullets
439 (should (re-equal
440 rst-re-items
441 "=== rst.el.~rst_el_1_68~:2724"
442 (rst-re 'itmany-beg-1)
444 (should (re-equal
445 rst-re-items
446 "=== rst.el.~rst_el_1_68~:1649"
447 (rst-re 'itmany-beg-1)
449 (should (re-equal
450 rst-re-enumerations
451 "=== rst.el.~rst_el_1_68~:1671"
452 (rst-re 'enmexp-beg)
454 (should (re-equal
455 rst-re-items
456 "=== rst.el.~rst_el_1_68~:1719"
457 (rst-re 'itmany-beg-1)
459 (should (re-equal
460 (concat
461 "\\(?:"
462 "\\(\\(?:[0-9a-zA-Z#]\\{1,3\\}[.):-]\\|[*+-]\\)[ \t]+\\)[^ \t\n]"
463 "\\|"
464 (format "\\(%s%s+[ \t]+\\)[^ \t\n]"
465 (regexp-quote (char-to-string char))
466 (regexp-quote (char-to-string char)))
467 "\\)")
468 "=== rst.el.~rst_el_1_68~:2430"
469 (rst-re
470 `(:grp
471 (:alt
472 itmany-tag
473 (:seq ,(char-after) "\\{2,\\}"))
474 hws-sta)
475 "\\S ")
476 (cons "^\\\\(\\?:" "") (cons "\\\\)$" "") ;; Remove superfluous
477 ;; shy group
478 (cons (regexp-quote "[0-9a-zA-Z#]\\{1,3\\}[.):-]\\|[*+-]")
479 "\\(\\(?:\\(?:\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\)\\.\\|(?\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\))\\)\\|[-*+\u2022\u2023\u2043]\\)\\)"
480 ) ;; Replace wrong item tag by correct one
481 (cons (regexp-quote "\\+\\++")
482 "\\+\\{2,\\}") ;; Use regex repeat instead of explicit repeat
483 (cons "\\[^ \t\n]" "\\S ") ;; Use symbolic non-whitespace
484 (cons "\\[^ \t\n]" "\\S ") ;; Use symbolic non-whitespace again
485 (cons "\\\\S " "") ;; Factor out symbolic non-whitespace
489 (ert-deftest rst-re-refactoring-font-lock ()
490 "Test the refactorings in font-lock done based on rst_el_1_68."
491 ;; Any comment or string "=== rst.el.~rst_el_1_68~:..." gives the line number
492 ;; of the refactored code in the original file for the previous expression.
493 (let* ((rst-use-char-classes t)
494 (rst-use-unicode t)
495 ;; horizontal white space
496 (re-hws "[\t ]")
497 ;; beginning of line with possible indentation
498 (re-bol (concat "^" re-hws "*"))
499 ;; Separates block lead-ins from their content
500 (re-blksep1 (concat "\\(" re-hws "+\\|$\\)"))
501 ;; explicit markup tag
502 (re-emt "\\.\\.")
503 ;; explicit markup start
504 (re-ems (concat re-emt re-hws "+"))
505 ;; inline markup prefix
506 (re-imp1 (concat "\\(^\\|" re-hws "\\|[-'\"([{<"
507 (if rst-use-unicode
508 "\u2018\u201c\u00ab\u2019"
510 "/:]\\)"))
511 ;; inline markup suffix
512 (re-ims1 (concat "\\(" re-hws "\\|[]-'\")}>"
513 (if rst-use-unicode
514 "\u2019\u201d\u00bb"
516 "/:.,;!?\\]\\|$\\)"))
517 ;; symbol character
518 (re-sym1 "\\(\\sw\\|\\s_\\)")
519 ;; inline markup content begin
520 (re-imbeg2 "\\(\\S \\|\\S \\([^")
522 ;; There seems to be a bug leading to error "Stack overflow in regexp
523 ;; matcher" when "|" or "\\*" are the characters searched for
524 (re-imendbegbeg
525 (if (< emacs-major-version 21)
527 "\\]\\|\\\\."))
528 ;; inline markup content end
529 (re-imendbeg (concat re-imendbegbeg "\\)\\{0,"
530 (format "%d" rst-max-inline-length)
531 "\\}[^\t "))
532 (re-imendend "\\\\]\\)")
533 ;; inline markup content without asterisk
534 (re-ima2 (concat re-imbeg2 "*" re-imendbeg "*" re-imendend))
535 ;; inline markup content without backquote
536 (re-imb2 (concat re-imbeg2 "`" re-imendbeg "`" re-imendend))
537 ;; inline markup content without vertical bar
538 (re-imv2 (concat re-imbeg2 "|" re-imendbeg "|" re-imendend))
539 ;; Supported URI schemes
540 (re-uris1 "\\(acap\\|cid\\|data\\|dav\\|fax\\|file\\|ftp\\|gopher\\|http\\|https\\|imap\\|ldap\\|mailto\\|mid\\|modem\\|news\\|nfs\\|nntp\\|pop\\|prospero\\|rtsp\\|service\\|sip\\|tel\\|telnet\\|tip\\|urn\\|vemmi\\|wais\\)")
541 ;; Line starting with adornment and optional whitespace; complete
542 ;; adornment is in (match-string 1); there must be at least 3
543 ;; characters because otherwise explicit markup start would be
544 ;; recognized
545 (re-ado2 (concat "^\\(\\(["
546 (if rst-use-char-classes
547 "^[:word:][:space:][:cntrl:]"
548 "^\\w \t\x00-\x1F")
549 "]\\)\\2\\2+\\)" re-hws "*$"))
551 (should (re-equal-matches
552 ;; `Bullet Lists`_
553 (concat re-bol "\\([-*+]" re-blksep1 "\\)")
554 "=== rst.el.~rst_el_1_68~:3011"
555 (rst-re 'lin-beg '(:grp bul-sta))
556 (list "*"
557 (cons 1 1))
558 (cons (regexp-quote "[-*+]")
559 "[-*+\u2022\u2023\u2043]") ;; More bullets
560 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
562 (should (re-equal-matches
563 ;; `Enumerated Lists`_
564 (concat re-bol "\\((?\\(#\\|[0-9]+\\|[A-Za-z]\\|[IVXLCMivxlcm]+\\)[.)]"
565 re-blksep1 "\\)")
566 "=== rst.el.~rst_el_1_68~:3015"
567 (rst-re 'lin-beg '(:grp enmany-sta))
568 (list " (#) Item"
569 (cons 1 1))
570 (cons (regexp-quote
571 "(?\\(#\\|[0-9]+\\|[A-Za-z]\\|[IVXLCMivxlcm]+\\)[.)]")
572 "\\(?:\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\)\\.\\|(?\\(?:[a-zA-Z]\\|[0-9]+\\|[IVXLCDMivxlcdm]+\\|#\\))\\)"
573 ) ;; Enumeration tags are more sophisticated
574 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
576 (should (re-equal-matches
577 ;; `Field Lists`_
578 (concat re-bol "\\(:[^:\n]+:\\)" re-blksep1)
579 "=== rst.el.~rst_el_1_68~:3021"
580 (rst-re 'lin-beg '(:grp fld-tag) 'bli-sfx)
581 (list " :some field: "
582 (cons 1 1))
583 (cons "\\[^:\n]" "\\(?:[^:\n]\\|\\\\:\\)") ;; Field name more
584 ;; sophisticated
585 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
587 (should (re-equal-matches
588 ;; `Option Lists`_
589 (concat re-bol "\\(\\(\\(\\([-+/]\\|--\\)\\sw\\(-\\|\\sw\\)*"
590 "\\([ =]\\S +\\)?\\)\\(,[\t ]\\)?\\)+\\)\\($\\|[\t ]\\{2\\}\\)")
591 "=== rst.el.~rst_el_1_68~:3025"
592 (rst-re 'lin-beg '(:grp opt-tag (:shy optsep-tag opt-tag) "*")
593 '(:alt "$" (:seq hws-prt "\\{2\\}")))
594 (list " --len=length, -l length Explanation"
595 (cons 1 1))
596 (cons (regexp-quote "\\(\\(\\(\\([-+/]\\|--\\)\\sw\\(-\\|\\sw\\)*\\([ =]\\S +\\)?\\)\\(,[ ]\\)?\\)+\\)")
597 "\\(\\(?:\\(?:[-+/]\\|--\\)\\sw\\(?:-\\|\\sw\\)*\\(?:[ =]\\S +\\)?\\)\\(?:\\(?:,[ ]\\)\\(?:\\(?:[-+/]\\|--\\)\\sw\\(?:-\\|\\sw\\)*\\(?:[ =]\\S +\\)?\\)\\)*\\)"
598 ) ;; Option recognition more sophisticated
599 (cons "\\\\(\\$" "\\(?:$") ;; Make a group shy
601 (should (re-equal-matches
602 ;; `Line Blocks`_
603 (concat re-bol "\\(|" re-blksep1 "\\)[^|\n]*$")
604 "=== rst.el.~rst_el_1_68~:3030"
605 (rst-re 'lin-beg '(:grp "|" bli-sfx) "[^|\n]*$")
606 (list " | Some text"
607 (cons 1 1))
608 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
610 (should (re-equal-matches
611 ;; `Footnotes`_ / `Citations`_
612 (concat re-bol "\\(" re-ems "\\[[^[\n]+\\]\\)" re-blksep1)
613 "=== rst.el.~rst_el_1_68~:3038"
614 (rst-re 'lin-beg '(:grp exm-sta fnc-tag) 'bli-sfx)
615 (list ".. [#]"
616 (cons 1 1))
617 (cons "\\[^\\[" "[^]") ;; Error correction in old code
618 (cons "\\\\]" "]") ;; Remove superfluous quote
619 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
621 (should (re-equal-matches
622 ;; `Directives`_ / `Substitution Definitions`_
623 (concat re-bol "\\(" re-ems "\\)\\(\\(|[^|\n]+|[\t ]+\\)?\\)\\("
624 re-sym1 "+::\\)" re-blksep1)
625 "=== rst.el.~rst_el_1_68~:3042"
626 (rst-re 'lin-beg '(:grp exm-sta)
627 '(:grp (:shy subdef-tag hws-sta) "?")
628 '(:grp sym-tag dcl-tag) 'bli-sfx)
629 (list ".. |attr| replace:: val"
630 (cons 1 1)
631 (cons 2 2)
632 (cons 4 3))
633 (cons "\\\\(|" "\\(?:|") ;; Make a group shy
634 (cons "\\[^|\n]\\+" "\\(?:\\S \\|\\S \\(?:[^|\\\n]\\|\\\\.\\)\\{0,1000\\}[^ |\\]\\)"
635 ) ;; Symbol name more sophisticated
636 (cons (regexp-quote "\\(\\sw\\|\\s_\\)+")
637 "\\(?:\\sw+\\(?:[-+.:_]\\sw+\\)*\\)") ;; New syntax for
638 ;; symbols
639 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
641 (should (re-equal-matches
642 ;; `Hyperlink Targets`_
643 (concat re-bol "\\(" re-ems "_\\([^:\\`\n]\\|\\\\.\\|`[^`\n]+`\\)+:\\)"
644 re-blksep1)
645 "=== rst.el.~rst_el_1_68~:3049"
646 (rst-re 'lin-beg
647 '(:grp exm-sta "_" (:alt
648 (:seq "`" ilcbkqdef-tag "`")
649 (:seq (:alt "[^:\\\n]" "\\\\.") "+")) ":")
650 'bli-sfx)
651 (list ".. _`some\\: target`:"
652 (cons 1 1))
653 (cons (regexp-quote "\\([^:\\`\n]\\|\\\\.\\|`[^`\n]+`\\)+")
654 "\\(?:`\\(?:\\S \\|\\S \\(?:[^`\\\n]\\|\\\\.\\)\\{0,1000\\}[^ `\\]\\)`\\|\\(?:[^:\\\n]\\|\\\\.\\)+\\)"
655 ) ;; Hyperlink name recognition more sophisticated
656 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
658 (should (re-equal-matches
659 ;; `Hyperlink Targets`_
660 (concat re-bol "\\(__\\)" re-blksep1)
661 "=== rst.el.~rst_el_1_68~:3053"
662 (rst-re 'lin-beg '(:grp "__") 'bli-sfx)
663 (list " __"
664 (cons 1 1))
665 (cons "\\\\(\\[\t " "\\(?:[\t ") ;; Make a group shy
667 (should (re-equal-matches
668 ;; `Strong Emphasis`_
669 (concat re-imp1 "\\(\\*\\*" re-ima2 "\\*\\*\\)" re-ims1)
670 "=== rst.el.~rst_el_1_68~:3062"
671 (rst-re 'ilm-pfx '(:grp "\\*\\*" ilcast-tag "\\*\\*") 'ilm-sfx)
672 (list "abc **def** ghi"
673 (cons 2 1))
674 (cons "^\\\\(" "\\(?:") ;; Make a group shy
675 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
676 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
677 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
678 (cons (regexp-quote "\\|$") "")
679 (cons (regexp-quote "\\([\t ]")
680 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
682 (should (re-equal-matches
683 ;; `Emphasis`_
684 (concat re-imp1 "\\(\\*" re-ima2 "\\*\\)" re-ims1)
685 "=== rst.el.~rst_el_1_68~:3066"
686 (rst-re 'ilm-pfx '(:grp "\\*" ilcast-tag "\\*") 'ilm-sfx)
687 (list "*x*"
688 (cons 2 1))
689 (cons "^\\\\(" "\\(?:") ;; Make a group shy
690 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
691 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
692 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
693 (cons (regexp-quote "\\|$") "")
694 (cons (regexp-quote "\\([\t ]")
695 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
697 (should (re-equal-matches
698 ;; `Inline Literals`_
699 (concat re-imp1 "\\(``" re-imb2 "``\\)" re-ims1)
700 "=== rst.el.~rst_el_1_68~:3070"
701 (rst-re 'ilm-pfx '(:grp "``" ilcbkq-tag "``") 'ilm-sfx)
702 (list "``co de``"
703 (cons 2 1))
704 (cons "^\\\\(" "\\(?:") ;; Make a group shy
705 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
706 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
707 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
708 (cons (regexp-quote "\\|$") "")
709 (cons (regexp-quote "\\([\t ]")
710 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
712 (should (re-equal-matches
713 ;; `Inline Internal Targets`_
714 (concat re-imp1 "\\(_`" re-imb2 "`\\)" re-ims1)
715 "=== rst.el.~rst_el_1_68~:3074"
716 (rst-re 'ilm-pfx '(:grp "_`" ilcbkq-tag "`") 'ilm-sfx)
717 (list "_`Inline\ntarget`"
718 (cons 2 1))
719 (cons "^\\\\(" "\\(?:") ;; Make a group shy
720 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
721 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
722 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
723 (cons (regexp-quote "\\|$") "")
724 (cons (regexp-quote "\\([\t ]")
725 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
727 (should (re-equal-matches
728 ;; `Hyperlink References`_
729 (concat re-imp1 "\\(\\(`" re-imb2 "`\\|\\(\\sw\\(\\sw\\|-\\)+\\sw\\)\\)__?\\)" re-ims1)
730 "=== rst.el.~rst_el_1_68~:3079"
731 (rst-re 'ilm-pfx '(:grp (:alt (:seq "`" ilcbkq-tag "`")
732 (:seq "\\sw" (:alt "\\sw" "-") "+\\sw"))
733 "__?") 'ilm-sfx)
734 (list "<`xxx`__>"
735 (cons 2 1))
736 (cons "^\\\\(" "\\(?:") ;; Make a group shy
737 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
738 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
739 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
740 (cons (regexp-quote "\\|$") "")
741 (cons (regexp-quote "\\([\t ]")
742 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
743 (cons "\\\\(`" "\\(?:`") ;; Make a group shy
744 (cons "\\\\(\\\\sw" "\\sw")
745 (cons "\\\\sw\\\\)" "\\sw") ;; Remove a group
746 (cons "sw\\\\(\\\\sw" "sw\\(?:\\sw") ;; Make a group shy
748 (should (re-equal-matches
749 ;; `Interpreted Text`_
750 (concat re-imp1 "\\(\\(:" re-sym1 "+:\\)?\\)\\(`" re-imb2 "`\\)\\(\\(:"
751 re-sym1 "+:\\)?\\)" re-ims1)
752 "=== rst.el.~rst_el_1_68~:3083"
753 (rst-re 'ilm-pfx '(:grp (:shy ":" sym-tag ":") "?")
754 '(:grp "`" ilcbkq-tag "`")
755 '(:grp (:shy ":" sym-tag ":") "?") 'ilm-sfx)
756 (list "`Interpreted`"
757 (cons 2 1)
758 (cons 5 2)
759 (cons 8 3))
760 (cons "^\\\\(" "\\(?:") ;; Make a group shy
761 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
762 (cons "\\\\(\\[^" "\\(?:[^") ;; Make a group shy
763 (cons "\\\\(:" "\\(?::") ;; Make a group shy
764 (cons "\\\\(:" "\\(?::") ;; Make a group shy
765 (cons (regexp-quote "\\(\\sw\\|\\s_\\)+")
766 "\\(?:\\sw+\\(?:[-+.:_]\\sw+\\)*\\)") ;; New syntax for
767 ;; symbols
768 (cons (regexp-quote "\\(\\sw\\|\\s_\\)+")
769 "\\(?:\\sw+\\(?:[-+.:_]\\sw+\\)*\\)") ;; New syntax for
770 ;; symbols
771 (cons (regexp-quote "\\\\]") "\\]") ;; Remove superfluous quote
772 (cons (regexp-quote "\\|$") "")
773 (cons (regexp-quote "\\([\t ]")
774 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
776 (should (re-equal-matches
777 ;; `Footnote References`_ / `Citation References`_
778 (concat re-imp1 "\\(\\[[^]]+\\]_\\)" re-ims1)
779 "=== rst.el.~rst_el_1_68~:3090"
780 (rst-re 'ilm-pfx '(:grp fnc-tag "_") 'ilm-sfx)
781 (list "[1]_"
782 (cons 2 1))
783 (cons "^\\\\(" "\\(?:") ;; Make a group shy
784 (cons "]]" "]\n]") ;; A reference may not contain \n
785 (cons "\\\\]" "]") ;; Remove superfluous quote
786 (cons (regexp-quote "\\|$") "")
787 (cons (regexp-quote "\\([\t ]")
788 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
790 (should (re-equal-matches
791 ;; `Substitution References`_
792 (concat re-imp1 "\\(|" re-imv2 "|\\)" re-ims1)
793 "=== rst.el.~rst_el_1_68~:3094"
794 (rst-re 'ilm-pfx '(:grp sub-tag) 'ilm-sfx)
795 (list "|attr|"
796 (cons 2 1))
797 (cons "^\\\\(" "\\(?:") ;; Make a group shy
798 (cons "\\\\(\\\\S" "\\(?:\\S") ;; Make a group shy
799 (cons (regexp-quote "\\([^|") "\\(?:[^|") ;; Make a group shy
800 (cons "\\\\]" "]") ;; Remove superfluous quote
801 (cons "\\\\]" "]") ;; Remove superfluous quote
802 (cons "\\[^|]" "[^|\\]") ;; Improve recognition
803 (cons (regexp-quote "\\|$") "")
804 (cons (regexp-quote "\\([\t ]")
805 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
807 (should (re-equal-matches
808 ;; `Standalone Hyperlinks`_
809 (concat re-imp1 "\\(" re-uris1 ":\\S +\\)" re-ims1)
810 "=== rst.el.~rst_el_1_68~:3099"
811 (rst-re 'ilm-pfx '(:grp uri-tag ":\\S +") 'ilm-sfx)
812 (list "http://example.com/"
813 (cons 2 1))
814 (cons "^\\\\(" "\\(?:") ;; Make a group shy
815 (cons "\\\\(acap" "\\(?:acap") ;; Make a group shy
816 (cons (regexp-quote "\\|$") "")
817 (cons (regexp-quote "\\([\t ]")
818 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
820 (should (re-equal-matches
821 ;; `Standalone Hyperlinks`_
822 (concat re-imp1 "\\(" re-sym1 "+@" re-sym1 "+\\)" re-ims1)
823 "=== rst.el.~rst_el_1_68~:3102"
824 (rst-re 'ilm-pfx '(:grp sym-tag "@" sym-tag ) 'ilm-sfx)
825 (list "someone@example"
826 (cons 2 1))
827 (cons "^\\\\(" "\\(?:") ;; Make a group shy
828 (cons (regexp-quote "\\(\\sw\\|\\s_\\)+")
829 "\\(?:\\sw+\\(?:[-+.:_]\\sw+\\)*\\)") ;; New syntax for
830 ;; symbols
831 (cons (regexp-quote "\\(\\sw\\|\\s_\\)+")
832 "\\(?:\\sw+\\(?:[-+.:_]\\sw+\\)*\\)") ;; New syntax for
833 ;; symbols
834 (cons (regexp-quote "\\|$") "")
835 (cons (regexp-quote "\\([\t ]")
836 "\\(?:$\\|[\t ]") ;; Move "$" in regex and make a group shy
838 (should (re-equal
839 ;; Sections_ / Transitions_
840 re-ado2
841 "=== rst.el.~rst_el_1_68~:3109"
842 (rst-re 'ado-beg-2-1)
843 (cons "\\^\\[:word:]\\[:space:]\\[:cntrl:]"
844 "]!\"#$%&'()*+,./:;<=>?@[\\^_`{|}~-") ;; Use real adornment
845 ;; characters
846 (cons "2\\+" "{2,\\}") ;; Use repeat count
848 (should (re-equal
849 ;; `Comments`_
850 (concat re-bol "\\(" re-ems "\\)\[^[|_\n]\\([^:\n]\\|:\\([^:\n]\\|$\\)\\)*$")
851 "=== rst.el.~rst_el_1_68~:3128"
852 (rst-re 'lin-beg '(:grp exm-sta) "[^\[|_\n]"
853 '(:alt "[^:\n]" (:seq ":" (:alt "[^:\n]" "$"))) "*$")
854 (cons "\\\\(\\[^:" "\\(?:[^:") ;; Make a group shy
855 (cons "\\\\(\\[^:" "\\(?:[^:") ;; Make a group shy
857 (should (re-equal-matches
858 ;; `Comments`_
859 (concat re-bol "\\(" re-emt "\\)\\(\\s *\\)$")
860 "=== rst.el.~rst_el_1_68~:3135"
861 (rst-re 'lin-beg '(:grp exm-tag) '(:grp hws-tag) "$")
862 (list ".. "
863 (cons 1 1)
864 (cons 2 2))
865 (cons "\\\\s " "[\t ]") ;; Only horizontal space
867 (should (re-equal-matches
868 ;; `Literal Blocks`_
869 (concat re-bol "\\(\\([^.\n]\\|\\.[^.\n]\\).*\\)?\\(::\\)$")
870 "=== rst.el.~rst_el_1_68~:3145"
871 (rst-re 'lin-beg '(:shy (:alt "[^.\n]" "\\.[^.\n]") ".*") "?"
872 '(:grp dcl-tag) "$")
873 (list "Some text ::"
874 (cons 3 1))
875 (cons "\\\\(\\\\(" "\\(?:\\(?:") ;; Make two groups shy
877 (should (re-equal-matches
878 ;; `Doctest Blocks`_
879 (concat re-bol "\\(>>>\\|\\.\\.\\.\\)\\(.+\\)")
880 "=== rst.el.~rst_el_1_68~:3154"
881 (rst-re 'lin-beg '(:grp (:alt ">>>" ell-tag)) '(:grp ".+"))
882 (list ">>> content"
883 (cons 1 1)
884 (cons 2 2))
885 (cons ">>>" "\\(?:>>>") (cons "\\.\\\\)" ".\\)\\)") ;; Add a shy
886 ;; group