Merge from Gnulib
[emacs.git] / lisp / progmodes / ebnf-bnf.el
blob7d426f9491cbbca68cf0b625131db25812c5c263
1 ;;; ebnf-bnf.el --- parser for EBNF
3 ;; Copyright (C) 1999-2017 Free Software Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Old-Version: 1.10
9 ;; Package: ebnf2ps
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
26 ;;; Commentary:
28 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This is part of ebnf2ps package.
33 ;; This package defines a parser for EBNF.
35 ;; See ebnf2ps.el for documentation.
38 ;; EBNF Syntax
39 ;; -----------
41 ;; The current EBNF that ebnf2ps accepts has the following constructions:
43 ;; ; comment (until end of line)
44 ;; A non-terminal
45 ;; "C" terminal
46 ;; ?C? special
47 ;; $A default non-terminal
48 ;; $"C" default terminal
49 ;; $?C? default special
50 ;; A = B. production (A is the header and B the body)
51 ;; C D sequence (C occurs before D)
52 ;; C | D alternative (C or D occurs)
53 ;; A - B exception (A excluding B, B without any non-terminal)
54 ;; n * A repetition (A repeats at least n (integer) times)
55 ;; n * n A repetition (A repeats exactly n (integer) times)
56 ;; n * m A repetition (A repeats at least n (integer) and at most
57 ;; m (integer) times)
58 ;; (C) group (expression C is grouped together)
59 ;; [C] optional (C may or not occurs)
60 ;; C+ one or more occurrences of C
61 ;; {C}+ one or more occurrences of C
62 ;; {C}* zero or more occurrences of C
63 ;; {C} zero or more occurrences of C
64 ;; C / D equivalent to: C {D C}*
65 ;; {C || D}+ equivalent to: C {D C}*
66 ;; {C || D}* equivalent to: [C {D C}*]
67 ;; {C || D} equivalent to: [C {D C}*]
69 ;; The EBNF syntax written using the notation above is:
71 ;; EBNF = {production}+.
73 ;; production = non_terminal "=" body ".". ;; production
75 ;; body = {sequence || "|"}*. ;; alternative
77 ;; sequence = {exception}*. ;; sequence
79 ;; exception = repeat [ "-" repeat]. ;; exception
81 ;; repeat = [ integer "*" [ integer ]] term. ;; repetition
83 ;; term = factor
84 ;; | [factor] "+" ;; one-or-more
85 ;; | [factor] "/" [factor] ;; one-or-more
86 ;; .
88 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
89 ;; | [ "$" ] non_terminal ;; non-terminal
90 ;; | [ "$" ] "?" special "?" ;; special
91 ;; | "(" body ")" ;; group
92 ;; | "[" body "]" ;; zero-or-one
93 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
94 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
95 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
96 ;; .
98 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
99 ;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper
100 ;; ;; and lower), 8-bit accentuated characters,
101 ;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
102 ;; ;; "<", ">", "@", "\", "^", "_", "`" and "~".
104 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
105 ;; ;; that is, a valid terminal accepts any printable character (including
106 ;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
107 ;; ;; terminal. Also, accepts escaped characters, that is, a character
108 ;; ;; pair starting with `\' followed by a printable character, for
109 ;; ;; example: \", \\.
111 ;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
112 ;; ;; that is, a valid special accepts any printable character (including
113 ;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
114 ;; ;; delimit a special.
116 ;; integer = "[0-9]+".
117 ;; ;; that is, an integer is a sequence of one or more decimal digits.
119 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
120 ;; ;; that is, a comment starts with the character `;' and terminates at end
121 ;; ;; of line. Also, it only accepts printable characters (including 8-bit
122 ;; ;; accentuated characters) and tabs.
125 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127 ;;; Code:
130 (require 'ebnf-otz)
133 (defvar ebnf-bnf-lex nil
134 "Value returned by `ebnf-bnf-lex' function.")
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
138 ;; Syntactic analyzer
141 ;;; EBNF = {production}+.
143 (defun ebnf-bnf-parser (start)
144 "EBNF parser."
145 (let ((total (+ (- ebnf-limit start) 1))
146 (bias (1- start))
147 (origin (point))
148 prod-list token rule)
149 (goto-char start)
150 (setq token (ebnf-bnf-lex))
151 (and (eq token 'end-of-input)
152 (error "Invalid EBNF file format"))
153 (while (not (eq token 'end-of-input))
154 (ebnf-message-float
155 "Parsing...%s%%"
156 (/ (* (- (point) bias) 100.0) total))
157 (setq token (ebnf-production token)
158 rule (cdr token)
159 token (car token))
160 (or (ebnf-add-empty-rule-list rule)
161 (setq prod-list (cons rule prod-list))))
162 (goto-char origin)
163 prod-list))
166 ;;; production = non-terminal "=" body ".".
168 (defun ebnf-production (token)
169 (let ((header ebnf-bnf-lex)
170 (action ebnf-action)
171 body)
172 (setq ebnf-action nil)
173 (or (eq token 'non-terminal)
174 (error "Invalid header production"))
175 (or (eq (ebnf-bnf-lex) 'equal)
176 (error "Invalid production: missing `='"))
177 (setq body (ebnf-body))
178 (or (eq (car body) 'period)
179 (error "Invalid production: missing `.'"))
180 (setq body (cdr body))
181 (ebnf-eps-add-production header)
182 (cons (ebnf-bnf-lex)
183 (ebnf-make-production header body action))))
186 ;;; body = {sequence || "|"}*.
188 (defun ebnf-body ()
189 (let (body sequence)
190 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
191 (setq sequence (cdr sequence)
192 body (cons sequence body)))
193 (ebnf-token-alternative body sequence)))
196 ;;; sequence = {exception}*.
198 (defun ebnf-sequence ()
199 (let ((token (ebnf-bnf-lex))
200 seq term)
201 (while (setq term (ebnf-exception token)
202 token (car term)
203 term (cdr term))
204 (setq seq (cons term seq)))
205 (cons token
206 (ebnf-token-sequence seq))))
209 ;;; exception = repeat [ "-" repeat].
211 (defun ebnf-exception (token)
212 (let ((term (ebnf-repeat token)))
213 (if (not (eq (car term) 'except))
214 ;; repeat
215 term
216 ;; repeat - repeat
217 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
218 (ebnf-no-non-terminal (cdr exception))
219 (ebnf-token-except (cdr term) exception)))))
222 (defun ebnf-no-non-terminal (node)
223 (and (vectorp node)
224 (let ((kind (ebnf-node-kind node)))
225 (cond
226 ((eq kind 'ebnf-generate-non-terminal)
227 (error "Exception sequence should not contain a non-terminal"))
228 ((eq kind 'ebnf-generate-repeat)
229 (ebnf-no-non-terminal (ebnf-node-separator node)))
230 ((memq kind '(ebnf-generate-optional ebnf-generate-except))
231 (ebnf-no-non-terminal (ebnf-node-list node)))
232 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
233 (ebnf-no-non-terminal (ebnf-node-list node))
234 (ebnf-no-non-terminal (ebnf-node-separator node)))
235 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
236 (let ((seq (ebnf-node-list node)))
237 (while seq
238 (ebnf-no-non-terminal (car seq))
239 (setq seq (cdr seq)))))
240 ))))
243 ;;; repeat = [ integer "*" [ integer ]] term.
245 (defun ebnf-repeat (token)
246 (if (not (eq token 'integer))
247 (ebnf-term token)
248 (let ((times ebnf-bnf-lex)
249 upper)
250 (or (eq (ebnf-bnf-lex) 'repeat)
251 (error "Missing `*'"))
252 (setq token (ebnf-bnf-lex))
253 (when (eq token 'integer)
254 (setq upper ebnf-bnf-lex
255 token (ebnf-bnf-lex)))
256 (ebnf-token-repeat times (ebnf-term token) upper))))
259 ;;; term = factor
260 ;;; | [factor] "+" ;; one-or-more
261 ;;; | [factor] "/" [factor] ;; one-or-more
262 ;;; .
264 (defun ebnf-term (token)
265 (let ((factor (ebnf-factor token)))
266 (and factor
267 (setq token (ebnf-bnf-lex)))
268 (cond
269 ;; [factor] +
270 ((eq token 'one-or-more)
271 (cons (ebnf-bnf-lex)
272 (and factor
273 (let ((kind (ebnf-node-kind factor)))
274 (cond
275 ;; { A }+ + ==> { A }+
276 ;; { A }* + ==> { A }*
277 ((memq kind '(ebnf-generate-zero-or-more
278 ebnf-generate-one-or-more))
279 factor)
280 ;; [ A ] + ==> { A }*
281 ((eq kind 'ebnf-generate-optional)
282 (ebnf-make-zero-or-more (list factor)))
283 ;; A +
285 (ebnf-make-one-or-more (list factor)))
286 )))))
287 ;; [factor] / [factor]
288 ((eq token 'list)
289 (setq token (ebnf-bnf-lex))
290 (let ((sep (ebnf-factor token)))
291 (and sep
292 (setq factor (or factor (ebnf-make-empty))))
293 (cons (if sep
294 (ebnf-bnf-lex)
295 token)
296 (and factor
297 (ebnf-make-one-or-more factor sep)))))
298 ;; factor
300 (cons token factor))
304 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
305 ;;; | [ "$" ] non_terminal ;; non-terminal
306 ;;; | [ "$" ] "?" special "?" ;; special
307 ;;; | "(" body ")" ;; group
308 ;;; | "[" body "]" ;; zero-or-one
309 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
310 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
311 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
312 ;;; .
314 (defun ebnf-factor (token)
315 (cond
316 ;; terminal
317 ((eq token 'terminal)
318 (ebnf-make-terminal ebnf-bnf-lex))
319 ;; non-terminal
320 ((eq token 'non-terminal)
321 (ebnf-make-non-terminal ebnf-bnf-lex))
322 ;; special
323 ((eq token 'special)
324 (ebnf-make-special ebnf-bnf-lex))
325 ;; group
326 ((eq token 'begin-group)
327 (let ((body (ebnf-body)))
328 (or (eq (car body) 'end-group)
329 (error "Missing `)'"))
330 (cdr body)))
331 ;; optional
332 ((eq token 'begin-optional)
333 (let ((body (ebnf-body)))
334 (or (eq (car body) 'end-optional)
335 (error "Missing `]'"))
336 (ebnf-token-optional (cdr body))))
337 ;; list
338 ((eq token 'begin-list)
339 (let* ((body (ebnf-body))
340 (token (car body))
341 (list-part (cdr body))
342 sep-part)
343 (and (eq token 'list-separator)
344 ;; { A || B }
345 (setq body (ebnf-body) ; get separator
346 token (car body)
347 sep-part (cdr body)))
348 (cond
349 ;; { A }+
350 ((eq token 'end-one-or-more)
351 (ebnf-make-one-or-more list-part sep-part))
352 ;; { A }*
353 ((eq token 'end-zero-or-more)
354 (ebnf-make-zero-or-more list-part sep-part))
356 (error "Missing `}+', `}*' or `}'"))
358 ;; no term
360 nil)
364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 ;; Lexical analyzer
368 (defconst ebnf-bnf-token-table (make-vector 256 'error)
369 "Vector used to map characters to a lexical token.")
372 (defun ebnf-bnf-initialize ()
373 "Initialize EBNF token table."
374 ;; control character & control 8-bit character are set to `error'
375 (let ((char ?\040))
376 ;; printable character:
377 (while (< char ?\060)
378 (aset ebnf-bnf-token-table char 'non-terminal)
379 (setq char (1+ char)))
380 ;; digits:
381 (while (< char ?\072)
382 (aset ebnf-bnf-token-table char 'integer)
383 (setq char (1+ char)))
384 ;; printable character:
385 (while (< char ?\177)
386 (aset ebnf-bnf-token-table char 'non-terminal)
387 (setq char (1+ char)))
388 ;; European 8-bit accentuated characters:
389 (setq char ?\240)
390 (while (< char ?\400)
391 (aset ebnf-bnf-token-table char 'non-terminal)
392 (setq char (1+ char)))
393 ;; Override space characters:
394 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
395 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
396 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
397 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
398 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
399 ;; Override form feed character:
400 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
401 ;; Override other lexical characters:
402 (aset ebnf-bnf-token-table ?\" 'terminal)
403 (aset ebnf-bnf-token-table ?\? 'special)
404 (aset ebnf-bnf-token-table ?\( 'begin-group)
405 (aset ebnf-bnf-token-table ?\) 'end-group)
406 (aset ebnf-bnf-token-table ?* 'repeat)
407 (aset ebnf-bnf-token-table ?- 'except)
408 (aset ebnf-bnf-token-table ?= 'equal)
409 (aset ebnf-bnf-token-table ?\[ 'begin-optional)
410 (aset ebnf-bnf-token-table ?\] 'end-optional)
411 (aset ebnf-bnf-token-table ?\{ 'begin-list)
412 (aset ebnf-bnf-token-table ?| 'alternative)
413 (aset ebnf-bnf-token-table ?\} 'end-list)
414 (aset ebnf-bnf-token-table ?/ 'list)
415 (aset ebnf-bnf-token-table ?+ 'one-or-more)
416 (aset ebnf-bnf-token-table ?$ 'default)
417 ;; Override comment character:
418 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
419 ;; Override end of production character:
420 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
423 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
424 (defconst ebnf-bnf-non-terminal-chars
425 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
428 (defun ebnf-bnf-lex ()
429 "Lexical analyzer for EBNF.
431 Return a lexical token.
433 See documentation for variable `ebnf-bnf-lex'."
434 (if (>= (point) ebnf-limit)
435 'end-of-input
436 (let (token)
437 ;; skip spaces and comments
438 (while (if (> (following-char) 255)
439 (progn
440 (setq token 'error)
441 nil)
442 (setq token (aref ebnf-bnf-token-table (following-char)))
443 (cond
444 ((eq token 'space)
445 (skip-chars-forward " \013\n\r\t" ebnf-limit)
446 (< (point) ebnf-limit))
447 ((eq token 'comment)
448 (ebnf-bnf-skip-comment))
449 ((eq token 'form-feed)
450 (forward-char)
451 (setq ebnf-action 'form-feed))
452 (t nil)
454 (setq ebnf-default-p nil)
455 (cond
456 ;; end of input
457 ((>= (point) ebnf-limit)
458 'end-of-input)
459 ;; error
460 ((eq token 'error)
461 (error "Invalid character"))
462 ;; default
463 ((eq token 'default)
464 (forward-char)
465 (if (memq (aref ebnf-bnf-token-table (following-char))
466 '(terminal non-terminal special))
467 (prog1
468 (ebnf-bnf-lex)
469 (setq ebnf-default-p t))
470 (error "Invalid `default' element")))
471 ;; integer
472 ((eq token 'integer)
473 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
474 'integer)
475 ;; special: ?special?
476 ((eq token 'special)
477 (setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
478 (ebnf-string " ->@-~" ?\? "special")
479 (and ebnf-special-show-delimiter "?")))
480 'special)
481 ;; terminal: "string"
482 ((eq token 'terminal)
483 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
484 'terminal)
485 ;; non-terminal or terminal
486 ((eq token 'non-terminal)
487 (setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
488 (let ((case-fold-search ebnf-case-fold-search)
489 match)
490 (if (and ebnf-terminal-regexp
491 (setq match (string-match ebnf-terminal-regexp
492 ebnf-bnf-lex))
493 (zerop match)
494 (= (match-end 0) (length ebnf-bnf-lex)))
495 'terminal
496 'non-terminal)))
497 ;; end of list: }+, }*, }
498 ((eq token 'end-list)
499 (forward-char)
500 (cond
501 ((= (following-char) ?+)
502 (forward-char)
503 'end-one-or-more)
504 ((= (following-char) ?*)
505 (forward-char)
506 'end-zero-or-more)
508 'end-zero-or-more)
510 ;; alternative: |, ||
511 ((eq token 'alternative)
512 (forward-char)
513 (if (/= (following-char) ?|)
514 'alternative
515 (forward-char)
516 'list-separator))
517 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
519 (forward-char)
520 token)
521 ))))
524 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
525 (defconst ebnf-bnf-comment-chars
526 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
529 (defun ebnf-bnf-skip-comment ()
530 (forward-char)
531 (cond
532 ;; open EPS file
533 ((and ebnf-eps-executing (= (following-char) ?\[))
534 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
535 ;; close EPS file
536 ((and ebnf-eps-executing (= (following-char) ?\]))
537 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
538 ;; EPS header
539 ((and ebnf-eps-executing (= (following-char) ?H))
540 (ebnf-eps-header-comment (ebnf-bnf-eps-filename)))
541 ;; EPS footer
542 ((and ebnf-eps-executing (= (following-char) ?F))
543 (ebnf-eps-footer-comment (ebnf-bnf-eps-filename)))
544 ;; any other action in comment
546 (setq ebnf-action (aref ebnf-comment-table (following-char)))
547 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
549 ;; check for a valid end of comment
550 (cond ((>= (point) ebnf-limit)
551 nil)
552 ((= (following-char) ?\n)
553 (forward-char)
556 (error "Invalid character"))
560 (defun ebnf-bnf-eps-filename ()
561 (forward-char)
562 (ebnf-buffer-substring ebnf-bnf-comment-chars))
565 (defun ebnf-unescape-string (str)
566 (let* ((len (length str))
567 (size (1- len))
568 (istr 0)
569 (n-esc 0))
570 ;; count number of escapes
571 (while (< istr size)
572 (setq istr (+ istr
573 (if (= (aref str istr) ?\\)
574 (progn
575 (setq n-esc (1+ n-esc))
577 1))))
578 (if (zerop n-esc)
579 ;; no escapes
581 ;; at least one escape
582 (let ((new (make-string (- len n-esc) ?\ ))
583 (inew 0))
584 ;; eliminate all escapes
585 (setq istr 0)
586 (while (> n-esc 0)
587 (and (= (aref str istr) ?\\)
588 (setq istr (1+ istr)
589 n-esc (1- n-esc)))
590 (aset new inew (aref str istr))
591 (setq inew (1+ inew)
592 istr (1+ istr)))
593 ;; remaining string has no escape
594 (while (< istr len)
595 (aset new inew (aref str istr))
596 (setq inew (1+ inew)
597 istr (1+ istr)))
598 new))))
601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
604 (provide 'ebnf-bnf)
607 ;;; ebnf-bnf.el ends here