* ibuf-ext.el (mode-name sorter, ibuffer-mark-by-mode-regexp):
[emacs.git] / lisp / progmodes / ebnf-bnf.el
blob8f6ecd99d2467a406dc88790af6ef8a7bd0b221a
1 ;;; ebnf-bnf.el --- parser for EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
9 ;; Version: 1.10
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 3, or (at your option)
16 ;; any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
28 ;;; Commentary:
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for EBNF.
37 ;; See ebnf2ps.el for documentation.
40 ;; EBNF Syntax
41 ;; -----------
43 ;; The current EBNF that ebnf2ps accepts has the following constructions:
45 ;; ; comment (until end of line)
46 ;; A non-terminal
47 ;; "C" terminal
48 ;; ?C? special
49 ;; $A default non-terminal
50 ;; $"C" default terminal
51 ;; $?C? default special
52 ;; A = B. production (A is the header and B the body)
53 ;; C D sequence (C occurs before D)
54 ;; C | D alternative (C or D occurs)
55 ;; A - B exception (A excluding B, B without any non-terminal)
56 ;; n * A repetition (A repeats at least n (integer) times)
57 ;; n * n A repetition (A repeats exactly n (integer) times)
58 ;; n * m A repetition (A repeats at least n (integer) and at most
59 ;; m (integer) times)
60 ;; (C) group (expression C is grouped together)
61 ;; [C] optional (C may or not occurs)
62 ;; C+ one or more occurrences of C
63 ;; {C}+ one or more occurrences of C
64 ;; {C}* zero or more occurrences of C
65 ;; {C} zero or more occurrences of C
66 ;; C / D equivalent to: C {D C}*
67 ;; {C || D}+ equivalent to: C {D C}*
68 ;; {C || D}* equivalent to: [C {D C}*]
69 ;; {C || D} equivalent to: [C {D C}*]
71 ;; The EBNF syntax written using the notation above is:
73 ;; EBNF = {production}+.
75 ;; production = non_terminal "=" body ".". ;; production
77 ;; body = {sequence || "|"}*. ;; alternative
79 ;; sequence = {exception}*. ;; sequence
81 ;; exception = repeat [ "-" repeat]. ;; exception
83 ;; repeat = [ integer "*" [ integer ]] term. ;; repetition
85 ;; term = factor
86 ;; | [factor] "+" ;; one-or-more
87 ;; | [factor] "/" [factor] ;; one-or-more
88 ;; .
90 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
91 ;; | [ "$" ] non_terminal ;; non-terminal
92 ;; | [ "$" ] "?" special "?" ;; special
93 ;; | "(" body ")" ;; group
94 ;; | "[" body "]" ;; zero-or-one
95 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
96 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
97 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
98 ;; .
100 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
101 ;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper
102 ;; ;; and lower), 8-bit accentuated characters,
103 ;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
104 ;; ;; "<", ">", "@", "\", "^", "_", "`" and "~".
106 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
107 ;; ;; that is, a valid terminal accepts any printable character (including
108 ;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
109 ;; ;; terminal. Also, accepts escaped characters, that is, a character
110 ;; ;; pair starting with `\' followed by a printable character, for
111 ;; ;; example: \", \\.
113 ;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
114 ;; ;; that is, a valid special accepts any printable character (including
115 ;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
116 ;; ;; delimit a special.
118 ;; integer = "[0-9]+".
119 ;; ;; that is, an integer is a sequence of one or more decimal digits.
121 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
122 ;; ;; that is, a comment starts with the character `;' and terminates at end
123 ;; ;; of line. Also, it only accepts printable characters (including 8-bit
124 ;; ;; accentuated characters) and tabs.
127 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129 ;;; Code:
132 (require 'ebnf-otz)
135 (defvar ebnf-bnf-lex nil
136 "Value returned by `ebnf-bnf-lex' function.")
139 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
140 ;; Syntactic analyzer
143 ;;; EBNF = {production}+.
145 (defun ebnf-bnf-parser (start)
146 "EBNF parser."
147 (let ((total (+ (- ebnf-limit start) 1))
148 (bias (1- start))
149 (origin (point))
150 prod-list token rule)
151 (goto-char start)
152 (setq token (ebnf-bnf-lex))
153 (and (eq token 'end-of-input)
154 (error "Invalid EBNF file format"))
155 (while (not (eq token 'end-of-input))
156 (ebnf-message-float
157 "Parsing...%s%%"
158 (/ (* (- (point) bias) 100.0) total))
159 (setq token (ebnf-production token)
160 rule (cdr token)
161 token (car token))
162 (or (ebnf-add-empty-rule-list rule)
163 (setq prod-list (cons rule prod-list))))
164 (goto-char origin)
165 prod-list))
168 ;;; production = non-terminal "=" body ".".
170 (defun ebnf-production (token)
171 (let ((header ebnf-bnf-lex)
172 (action ebnf-action)
173 body)
174 (setq ebnf-action nil)
175 (or (eq token 'non-terminal)
176 (error "Invalid header production"))
177 (or (eq (ebnf-bnf-lex) 'equal)
178 (error "Invalid production: missing `='"))
179 (setq body (ebnf-body))
180 (or (eq (car body) 'period)
181 (error "Invalid production: missing `.'"))
182 (setq body (cdr body))
183 (ebnf-eps-add-production header)
184 (cons (ebnf-bnf-lex)
185 (ebnf-make-production header body action))))
188 ;;; body = {sequence || "|"}*.
190 (defun ebnf-body ()
191 (let (body sequence)
192 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
193 (setq sequence (cdr sequence)
194 body (cons sequence body)))
195 (ebnf-token-alternative body sequence)))
198 ;;; sequence = {exception}*.
200 (defun ebnf-sequence ()
201 (let ((token (ebnf-bnf-lex))
202 seq term)
203 (while (setq term (ebnf-exception token)
204 token (car term)
205 term (cdr term))
206 (setq seq (cons term seq)))
207 (cons token
208 (ebnf-token-sequence seq))))
211 ;;; exception = repeat [ "-" repeat].
213 (defun ebnf-exception (token)
214 (let ((term (ebnf-repeat token)))
215 (if (not (eq (car term) 'except))
216 ;; repeat
217 term
218 ;; repeat - repeat
219 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
220 (ebnf-no-non-terminal (cdr exception))
221 (ebnf-token-except (cdr term) exception)))))
224 (defun ebnf-no-non-terminal (node)
225 (and (vectorp node)
226 (let ((kind (ebnf-node-kind node)))
227 (cond
228 ((eq kind 'ebnf-generate-non-terminal)
229 (error "Exception sequence should not contain a non-terminal"))
230 ((eq kind 'ebnf-generate-repeat)
231 (ebnf-no-non-terminal (ebnf-node-separator node)))
232 ((memq kind '(ebnf-generate-optional ebnf-generate-except))
233 (ebnf-no-non-terminal (ebnf-node-list node)))
234 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
235 (ebnf-no-non-terminal (ebnf-node-list node))
236 (ebnf-no-non-terminal (ebnf-node-separator node)))
237 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
238 (let ((seq (ebnf-node-list node)))
239 (while seq
240 (ebnf-no-non-terminal (car seq))
241 (setq seq (cdr seq)))))
242 ))))
245 ;;; repeat = [ integer "*" [ integer ]] term.
247 (defun ebnf-repeat (token)
248 (if (not (eq token 'integer))
249 (ebnf-term token)
250 (let ((times ebnf-bnf-lex)
251 upper)
252 (or (eq (ebnf-bnf-lex) 'repeat)
253 (error "Missing `*'"))
254 (setq token (ebnf-bnf-lex))
255 (when (eq token 'integer)
256 (setq upper ebnf-bnf-lex
257 token (ebnf-bnf-lex)))
258 (ebnf-token-repeat times (ebnf-term token) upper))))
261 ;;; term = factor
262 ;;; | [factor] "+" ;; one-or-more
263 ;;; | [factor] "/" [factor] ;; one-or-more
264 ;;; .
266 (defun ebnf-term (token)
267 (let ((factor (ebnf-factor token)))
268 (and factor
269 (setq token (ebnf-bnf-lex)))
270 (cond
271 ;; [factor] +
272 ((eq token 'one-or-more)
273 (cons (ebnf-bnf-lex)
274 (and factor
275 (let ((kind (ebnf-node-kind factor)))
276 (cond
277 ;; { A }+ + ==> { A }+
278 ;; { A }* + ==> { A }*
279 ((memq kind '(ebnf-generate-zero-or-more
280 ebnf-generate-one-or-more))
281 factor)
282 ;; [ A ] + ==> { A }*
283 ((eq kind 'ebnf-generate-optional)
284 (ebnf-make-zero-or-more (list factor)))
285 ;; A +
287 (ebnf-make-one-or-more (list factor)))
288 )))))
289 ;; [factor] / [factor]
290 ((eq token 'list)
291 (setq token (ebnf-bnf-lex))
292 (let ((sep (ebnf-factor token)))
293 (and sep
294 (setq factor (or factor (ebnf-make-empty))))
295 (cons (if sep
296 (ebnf-bnf-lex)
297 token)
298 (and factor
299 (ebnf-make-one-or-more factor sep)))))
300 ;; factor
302 (cons token factor))
306 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
307 ;;; | [ "$" ] non_terminal ;; non-terminal
308 ;;; | [ "$" ] "?" special "?" ;; special
309 ;;; | "(" body ")" ;; group
310 ;;; | "[" body "]" ;; zero-or-one
311 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
312 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
313 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
314 ;;; .
316 (defun ebnf-factor (token)
317 (cond
318 ;; terminal
319 ((eq token 'terminal)
320 (ebnf-make-terminal ebnf-bnf-lex))
321 ;; non-terminal
322 ((eq token 'non-terminal)
323 (ebnf-make-non-terminal ebnf-bnf-lex))
324 ;; special
325 ((eq token 'special)
326 (ebnf-make-special ebnf-bnf-lex))
327 ;; group
328 ((eq token 'begin-group)
329 (let ((body (ebnf-body)))
330 (or (eq (car body) 'end-group)
331 (error "Missing `)'"))
332 (cdr body)))
333 ;; optional
334 ((eq token 'begin-optional)
335 (let ((body (ebnf-body)))
336 (or (eq (car body) 'end-optional)
337 (error "Missing `]'"))
338 (ebnf-token-optional (cdr body))))
339 ;; list
340 ((eq token 'begin-list)
341 (let* ((body (ebnf-body))
342 (token (car body))
343 (list-part (cdr body))
344 sep-part)
345 (and (eq token 'list-separator)
346 ;; { A || B }
347 (setq body (ebnf-body) ; get separator
348 token (car body)
349 sep-part (cdr body)))
350 (cond
351 ;; { A }+
352 ((eq token 'end-one-or-more)
353 (ebnf-make-one-or-more list-part sep-part))
354 ;; { A }*
355 ((eq token 'end-zero-or-more)
356 (ebnf-make-zero-or-more list-part sep-part))
358 (error "Missing `}+', `}*' or `}'"))
360 ;; no term
362 nil)
366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
367 ;; Lexical analyzer
370 (defconst ebnf-bnf-token-table (make-vector 256 'error)
371 "Vector used to map characters to a lexical token.")
374 (defun ebnf-bnf-initialize ()
375 "Initialize EBNF token table."
376 ;; control character & control 8-bit character are set to `error'
377 (let ((char ?\040))
378 ;; printable character:
379 (while (< char ?\060)
380 (aset ebnf-bnf-token-table char 'non-terminal)
381 (setq char (1+ char)))
382 ;; digits:
383 (while (< char ?\072)
384 (aset ebnf-bnf-token-table char 'integer)
385 (setq char (1+ char)))
386 ;; printable character:
387 (while (< char ?\177)
388 (aset ebnf-bnf-token-table char 'non-terminal)
389 (setq char (1+ char)))
390 ;; European 8-bit accentuated characters:
391 (setq char ?\240)
392 (while (< char ?\400)
393 (aset ebnf-bnf-token-table char 'non-terminal)
394 (setq char (1+ char)))
395 ;; Override space characters:
396 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
397 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
398 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
399 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
400 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
401 ;; Override form feed character:
402 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
403 ;; Override other lexical characters:
404 (aset ebnf-bnf-token-table ?\" 'terminal)
405 (aset ebnf-bnf-token-table ?\? 'special)
406 (aset ebnf-bnf-token-table ?\( 'begin-group)
407 (aset ebnf-bnf-token-table ?\) 'end-group)
408 (aset ebnf-bnf-token-table ?* 'repeat)
409 (aset ebnf-bnf-token-table ?- 'except)
410 (aset ebnf-bnf-token-table ?= 'equal)
411 (aset ebnf-bnf-token-table ?\[ 'begin-optional)
412 (aset ebnf-bnf-token-table ?\] 'end-optional)
413 (aset ebnf-bnf-token-table ?\{ 'begin-list)
414 (aset ebnf-bnf-token-table ?| 'alternative)
415 (aset ebnf-bnf-token-table ?\} 'end-list)
416 (aset ebnf-bnf-token-table ?/ 'list)
417 (aset ebnf-bnf-token-table ?+ 'one-or-more)
418 (aset ebnf-bnf-token-table ?$ 'default)
419 ;; Override comment character:
420 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
421 ;; Override end of production character:
422 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
425 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
426 (defconst ebnf-bnf-non-terminal-chars
427 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
430 (defun ebnf-bnf-lex ()
431 "Lexical analyzer for EBNF.
433 Return a lexical token.
435 See documentation for variable `ebnf-bnf-lex'."
436 (if (>= (point) ebnf-limit)
437 'end-of-input
438 (let (token)
439 ;; skip spaces and comments
440 (while (if (> (following-char) 255)
441 (progn
442 (setq token 'error)
443 nil)
444 (setq token (aref ebnf-bnf-token-table (following-char)))
445 (cond
446 ((eq token 'space)
447 (skip-chars-forward " \013\n\r\t" ebnf-limit)
448 (< (point) ebnf-limit))
449 ((eq token 'comment)
450 (ebnf-bnf-skip-comment))
451 ((eq token 'form-feed)
452 (forward-char)
453 (setq ebnf-action 'form-feed))
454 (t nil)
456 (setq ebnf-default-p nil)
457 (cond
458 ;; end of input
459 ((>= (point) ebnf-limit)
460 'end-of-input)
461 ;; error
462 ((eq token 'error)
463 (error "Invalid character"))
464 ;; default
465 ((eq token 'default)
466 (forward-char)
467 (if (memq (aref ebnf-bnf-token-table (following-char))
468 '(terminal non-terminal special))
469 (prog1
470 (ebnf-bnf-lex)
471 (setq ebnf-default-p t))
472 (error "Invalid `default' element")))
473 ;; integer
474 ((eq token 'integer)
475 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
476 'integer)
477 ;; special: ?special?
478 ((eq token 'special)
479 (setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
480 (ebnf-string " ->@-~" ?\? "special")
481 (and ebnf-special-show-delimiter "?")))
482 'special)
483 ;; terminal: "string"
484 ((eq token 'terminal)
485 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
486 'terminal)
487 ;; non-terminal or terminal
488 ((eq token 'non-terminal)
489 (setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
490 (let ((case-fold-search ebnf-case-fold-search)
491 match)
492 (if (and ebnf-terminal-regexp
493 (setq match (string-match ebnf-terminal-regexp
494 ebnf-bnf-lex))
495 (zerop match)
496 (= (match-end 0) (length ebnf-bnf-lex)))
497 'terminal
498 'non-terminal)))
499 ;; end of list: }+, }*, }
500 ((eq token 'end-list)
501 (forward-char)
502 (cond
503 ((= (following-char) ?+)
504 (forward-char)
505 'end-one-or-more)
506 ((= (following-char) ?*)
507 (forward-char)
508 'end-zero-or-more)
510 'end-zero-or-more)
512 ;; alternative: |, ||
513 ((eq token 'alternative)
514 (forward-char)
515 (if (/= (following-char) ?|)
516 'alternative
517 (forward-char)
518 'list-separator))
519 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
521 (forward-char)
522 token)
523 ))))
526 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
527 (defconst ebnf-bnf-comment-chars
528 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
531 (defun ebnf-bnf-skip-comment ()
532 (forward-char)
533 (cond
534 ;; open EPS file
535 ((and ebnf-eps-executing (= (following-char) ?\[))
536 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
537 ;; close EPS file
538 ((and ebnf-eps-executing (= (following-char) ?\]))
539 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
540 ;; EPS header
541 ((and ebnf-eps-executing (= (following-char) ?H))
542 (ebnf-eps-header-comment (ebnf-bnf-eps-filename)))
543 ;; EPS footer
544 ((and ebnf-eps-executing (= (following-char) ?F))
545 (ebnf-eps-footer-comment (ebnf-bnf-eps-filename)))
546 ;; any other action in comment
548 (setq ebnf-action (aref ebnf-comment-table (following-char)))
549 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
551 ;; check for a valid end of comment
552 (cond ((>= (point) ebnf-limit)
553 nil)
554 ((= (following-char) ?\n)
555 (forward-char)
558 (error "Invalid character"))
562 (defun ebnf-bnf-eps-filename ()
563 (forward-char)
564 (ebnf-buffer-substring ebnf-bnf-comment-chars))
567 (defun ebnf-unescape-string (str)
568 (let* ((len (length str))
569 (size (1- len))
570 (istr 0)
571 (n-esc 0))
572 ;; count number of escapes
573 (while (< istr size)
574 (setq istr (+ istr
575 (if (= (aref str istr) ?\\)
576 (progn
577 (setq n-esc (1+ n-esc))
579 1))))
580 (if (zerop n-esc)
581 ;; no escapes
583 ;; at least one escape
584 (let ((new (make-string (- len n-esc) ?\ ))
585 (inew 0))
586 ;; eliminate all escapes
587 (setq istr 0)
588 (while (> n-esc 0)
589 (and (= (aref str istr) ?\\)
590 (setq istr (1+ istr)
591 n-esc (1- n-esc)))
592 (aset new inew (aref str istr))
593 (setq inew (1+ inew)
594 istr (1+ istr)))
595 ;; remaining string has no escape
596 (while (< istr len)
597 (aset new inew (aref str istr))
598 (setq inew (1+ inew)
599 istr (1+ istr)))
600 new))))
603 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
606 (provide 'ebnf-bnf)
609 ;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
610 ;;; ebnf-bnf.el ends here