1 ;;; ebnf-bnf.el --- parser for EBNF
3 ;; Copyright (C) 1999, 2000, 2001 Free Sofware Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Time-stamp: <2003-02-10 10:29:48 jbarranquero>
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for EBNF.
37 ;; See ebnf2ps.el for documentation.
43 ;; The current EBNF that ebnf2ps accepts has the following constructions:
45 ;; ; comment (until end of line)
49 ;; $A default non-terminal
50 ;; $"C" default terminal
51 ;; $?C? default special
52 ;; A = B. production (A is the header and B the body)
53 ;; C D sequence (C occurs before D)
54 ;; C | D alternative (C or D occurs)
55 ;; A - B exception (A excluding B, B without any non-terminal)
56 ;; n * A repetition (A repeats n (integer) times)
57 ;; (C) group (expression C is grouped together)
58 ;; [C] optional (C may or not occurs)
59 ;; C+ one or more occurrences of C
60 ;; {C}+ one or more occurrences of C
61 ;; {C}* zero or more occurrences of C
62 ;; {C} zero or more occurrences of C
63 ;; C / D equivalent to: C {D C}*
64 ;; {C || D}+ equivalent to: C {D C}*
65 ;; {C || D}* equivalent to: [C {D C}*]
66 ;; {C || D} equivalent to: [C {D C}*]
68 ;; The EBNF syntax written using the notation above is:
70 ;; EBNF = {production}+.
72 ;; production = non_terminal "=" body ".". ;; production
74 ;; body = {sequence || "|"}*. ;; alternative
76 ;; sequence = {exception}*. ;; sequence
78 ;; exception = repeat [ "-" repeat]. ;; exception
80 ;; repeat = [ integer "*" ] term. ;; repetition
83 ;; | [factor] "+" ;; one-or-more
84 ;; | [factor] "/" [factor] ;; one-or-more
87 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
88 ;; | [ "$" ] non_terminal ;; non-terminal
89 ;; | [ "$" ] "?" special "?" ;; special
90 ;; | "(" body ")" ;; group
91 ;; | "[" body "]" ;; zero-or-one
92 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
93 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
94 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
97 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
99 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
101 ;; special = "[^?\\n\\000-\\010\\016-\\037\\177-\\237]*".
103 ;; integer = "[0-9]+".
105 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
108 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
116 (defvar ebnf-bnf-lex nil
117 "Value returned by `ebnf-bnf-lex' function.")
120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121 ;; Syntactic analyzer
124 ;;; EBNF = {production}+.
126 (defun ebnf-bnf-parser (start)
128 (let ((total (+ (- ebnf-limit start
) 1))
131 prod-list token rule
)
133 (setq token
(ebnf-bnf-lex))
134 (and (eq token
'end-of-input
)
135 (error "Invalid EBNF file format"))
136 (while (not (eq token
'end-of-input
))
139 (/ (* (- (point) bias
) 100.0) total
))
140 (setq token
(ebnf-production token
)
143 (or (ebnf-add-empty-rule-list rule
)
144 (setq prod-list
(cons rule prod-list
))))
149 ;;; production = non-terminal "=" body ".".
151 (defun ebnf-production (token)
152 (let ((header ebnf-bnf-lex
)
155 (setq ebnf-action nil
)
156 (or (eq token
'non-terminal
)
157 (error "Invalid header production"))
158 (or (eq (ebnf-bnf-lex) 'equal
)
159 (error "Invalid production: missing `='"))
160 (setq body
(ebnf-body))
161 (or (eq (car body
) 'period
)
162 (error "Invalid production: missing `.'"))
163 (setq body
(cdr body
))
164 (ebnf-eps-add-production header
)
166 (ebnf-make-production header body action
))))
169 ;;; body = {sequence || "|"}*.
173 (while (eq (car (setq sequence
(ebnf-sequence))) 'alternative
)
174 (setq sequence
(cdr sequence
)
175 body
(cons sequence body
)))
176 (ebnf-token-alternative body sequence
)))
179 ;;; sequence = {exception}*.
181 (defun ebnf-sequence ()
182 (let ((token (ebnf-bnf-lex))
184 (while (setq term
(ebnf-exception token
)
187 (setq seq
(cons term seq
)))
193 ;; sequence with only one element
198 (ebnf-make-sequence (nreverse seq
)))
202 ;;; exception = repeat [ "-" repeat].
204 (defun ebnf-exception (token)
205 (let ((term (ebnf-repeat token
)))
206 (if (not (eq (car term
) 'except
))
210 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
211 (ebnf-no-non-terminal (cdr exception
))
212 (ebnf-token-except (cdr term
) exception
)))))
215 (defun ebnf-no-non-terminal (node)
217 (let ((kind (ebnf-node-kind node
)))
219 ((eq kind
'ebnf-generate-non-terminal
)
220 (error "Exception sequence should not contain a non-terminal"))
221 ((eq kind
'ebnf-generate-repeat
)
222 (ebnf-no-non-terminal (ebnf-node-separator node
)))
223 ((memq kind
'(ebnf-generate-optional ebnf-generate-except
))
224 (ebnf-no-non-terminal (ebnf-node-list node
)))
225 ((memq kind
'(ebnf-generate-one-or-more ebnf-generate-zero-or-more
))
226 (ebnf-no-non-terminal (ebnf-node-list node
))
227 (ebnf-no-non-terminal (ebnf-node-separator node
)))
228 ((memq kind
'(ebnf-generate-alternative ebnf-generate-sequence
))
229 (let ((seq (ebnf-node-list node
)))
231 (ebnf-no-non-terminal (car seq
))
232 (setq seq
(cdr seq
)))))
236 ;;; repeat = [ integer "*" ] term.
238 (defun ebnf-repeat (token)
239 (if (not (eq token
'integer
))
241 (let ((times ebnf-bnf-lex
))
242 (or (eq (ebnf-bnf-lex) 'repeat
)
243 (error "Missing `*'"))
244 (ebnf-token-repeat times
(ebnf-term (ebnf-bnf-lex))))))
248 ;;; | [factor] "+" ;; one-or-more
249 ;;; | [factor] "/" [factor] ;; one-or-more
252 (defun ebnf-term (token)
253 (let ((factor (ebnf-factor token
)))
255 (setq token
(ebnf-bnf-lex)))
258 ((eq token
'one-or-more
)
261 (let ((kind (ebnf-node-kind factor
)))
263 ;; { A }+ + ==> { A }+
264 ;; { A }* + ==> { A }*
265 ((memq kind
'(ebnf-generate-zero-or-more
266 ebnf-generate-one-or-more
))
268 ;; [ A ] + ==> { A }*
269 ((eq kind
'ebnf-generate-optional
)
270 (ebnf-make-zero-or-more (list factor
)))
273 (ebnf-make-one-or-more (list factor
)))
275 ;; [factor] / [factor]
277 (setq token
(ebnf-bnf-lex))
278 (let ((sep (ebnf-factor token
)))
280 (setq factor
(or factor
(ebnf-make-empty))))
285 (ebnf-make-one-or-more factor sep
)))))
292 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
293 ;;; | [ "$" ] non_terminal ;; non-terminal
294 ;;; | [ "$" ] "?" special "?" ;; special
295 ;;; | "(" body ")" ;; group
296 ;;; | "[" body "]" ;; zero-or-one
297 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
298 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
299 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
302 (defun ebnf-factor (token)
305 ((eq token
'terminal
)
306 (ebnf-make-terminal ebnf-bnf-lex
))
308 ((eq token
'non-terminal
)
309 (ebnf-make-non-terminal ebnf-bnf-lex
))
312 (ebnf-make-special ebnf-bnf-lex
))
314 ((eq token
'begin-group
)
315 (let ((body (ebnf-body)))
316 (or (eq (car body
) 'end-group
)
317 (error "Missing `)'"))
320 ((eq token
'begin-optional
)
321 (let ((body (ebnf-body)))
322 (or (eq (car body
) 'end-optional
)
323 (error "Missing `]'"))
324 (ebnf-token-optional (cdr body
))))
326 ((eq token
'begin-list
)
327 (let* ((body (ebnf-body))
329 (list-part (cdr body
))
331 (and (eq token
'list-separator
)
333 (setq body
(ebnf-body) ; get separator
335 sep-part
(cdr body
)))
338 ((eq token
'end-one-or-more
)
339 (ebnf-make-one-or-more list-part sep-part
))
341 ((eq token
'end-zero-or-more
)
342 (ebnf-make-zero-or-more list-part sep-part
))
344 (error "Missing `}+', `}*' or `}'"))
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
356 (defconst ebnf-bnf-token-table
(make-vector 256 'error
)
357 "Vector used to map characters to a lexical token.")
360 (defun ebnf-bnf-initialize ()
361 "Initialize EBNF token table."
362 ;; control character & control 8-bit character are set to `error'
364 ;; printable character:
365 (while (< char ?
\060)
366 (aset ebnf-bnf-token-table char
'non-terminal
)
367 (setq char
(1+ char
)))
369 (while (< char ?
\072)
370 (aset ebnf-bnf-token-table char
'integer
)
371 (setq char
(1+ char
)))
372 ;; printable character:
373 (while (< char ?
\177)
374 (aset ebnf-bnf-token-table char
'non-terminal
)
375 (setq char
(1+ char
)))
376 ;; European 8-bit accentuated characters:
378 (while (< char ?
\400)
379 (aset ebnf-bnf-token-table char
'non-terminal
)
380 (setq char
(1+ char
)))
381 ;; Override space characters:
382 (aset ebnf-bnf-token-table ?
\013 'space
) ; [VT] vertical tab
383 (aset ebnf-bnf-token-table ?
\n 'space
) ; [NL] linefeed
384 (aset ebnf-bnf-token-table ?
\r 'space
) ; [CR] carriage return
385 (aset ebnf-bnf-token-table ?
\t 'space
) ; [HT] horizontal tab
386 (aset ebnf-bnf-token-table ?\
'space
) ; [SP] space
387 ;; Override form feed character:
388 (aset ebnf-bnf-token-table ?
\f 'form-feed
) ; [FF] form feed
389 ;; Override other lexical characters:
390 (aset ebnf-bnf-token-table ?
\" 'terminal
)
391 (aset ebnf-bnf-token-table ?
\? 'special
)
392 (aset ebnf-bnf-token-table ?\
( 'begin-group
)
393 (aset ebnf-bnf-token-table ?\
) 'end-group
)
394 (aset ebnf-bnf-token-table ?
* 'repeat
)
395 (aset ebnf-bnf-token-table ?-
'except
)
396 (aset ebnf-bnf-token-table ?
= 'equal
)
397 (aset ebnf-bnf-token-table ?\
[ 'begin-optional
)
398 (aset ebnf-bnf-token-table ?\
] 'end-optional
)
399 (aset ebnf-bnf-token-table ?\
{ 'begin-list
)
400 (aset ebnf-bnf-token-table ?|
'alternative
)
401 (aset ebnf-bnf-token-table ?\
} 'end-list
)
402 (aset ebnf-bnf-token-table ?
/ 'list
)
403 (aset ebnf-bnf-token-table ?
+ 'one-or-more
)
404 (aset ebnf-bnf-token-table ?$
'default
)
405 ;; Override comment character:
406 (aset ebnf-bnf-token-table ebnf-lex-comment-char
'comment
)
407 ;; Override end of production character:
408 (aset ebnf-bnf-token-table ebnf-lex-eop-char
'period
)))
411 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
412 (defconst ebnf-bnf-non-terminal-chars
413 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?
\240 ?
\377))
416 (defun ebnf-bnf-lex ()
417 "Lexical analyser for EBNF.
419 Return a lexical token.
421 See documentation for variable `ebnf-bnf-lex'."
422 (if (>= (point) ebnf-limit
)
425 ;; skip spaces and comments
426 (while (if (> (following-char) 255)
430 (setq token
(aref ebnf-bnf-token-table
(following-char)))
433 (skip-chars-forward " \013\n\r\t" ebnf-limit
)
434 (< (point) ebnf-limit
))
436 (ebnf-bnf-skip-comment))
437 ((eq token
'form-feed
)
439 (setq ebnf-action
'form-feed
))
442 (setq ebnf-default-p nil
)
445 ((>= (point) ebnf-limit
)
449 (error "Illegal character"))
453 (if (memq (aref ebnf-bnf-token-table
(following-char))
454 '(terminal non-terminal special
))
457 (setq ebnf-default-p t
))
458 (error "Illegal `default' element")))
461 (setq ebnf-bnf-lex
(ebnf-buffer-substring "0-9"))
463 ;; special: ?special?
465 (setq ebnf-bnf-lex
(concat "?"
466 (ebnf-string " ->@-~" ?
\? "special")
469 ;; terminal: "string"
470 ((eq token
'terminal
)
471 (setq ebnf-bnf-lex
(ebnf-unescape-string (ebnf-get-string)))
473 ;; non-terminal or terminal
474 ((eq token
'non-terminal
)
475 (setq ebnf-bnf-lex
(ebnf-buffer-substring ebnf-bnf-non-terminal-chars
))
476 (let ((case-fold-search ebnf-case-fold-search
)
478 (if (and ebnf-terminal-regexp
479 (setq match
(string-match ebnf-terminal-regexp
482 (= (match-end 0) (length ebnf-bnf-lex
)))
485 ;; end of list: }+, }*, }
486 ((eq token
'end-list
)
489 ((= (following-char) ?
+)
492 ((= (following-char) ?
*)
498 ;; alternative: |, ||
499 ((eq token
'alternative
)
501 (if (/= (following-char) ?|
)
505 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
512 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
513 (defconst ebnf-bnf-comment-chars
514 (ebnf-range-regexp "^\n\000-\010\016-\037" ?
\177 ?
\237))
517 (defun ebnf-bnf-skip-comment ()
521 ((and ebnf-eps-executing
(= (following-char) ?\
[))
522 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
524 ((and ebnf-eps-executing
(= (following-char) ?\
]))
525 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
526 ;; any other action in comment
528 (setq ebnf-action
(aref ebnf-comment-table
(following-char)))
529 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit
))
531 ;; check for a valid end of comment
532 (cond ((>= (point) ebnf-limit
)
534 ((= (following-char) ?
\n)
538 (error "Illegal character"))
542 (defun ebnf-bnf-eps-filename ()
544 (ebnf-buffer-substring ebnf-bnf-comment-chars
))
547 (defun ebnf-unescape-string (str)
548 (let* ((len (length str
))
552 ;; count number of escapes
555 (if (= (aref str istr
) ?
\\)
557 (setq n-esc
(1+ n-esc
))
563 ;; at least one escape
564 (let ((new (make-string (- len n-esc
) ?\
))
566 ;; eliminate all escapes
569 (and (= (aref str istr
) ?
\\)
572 (aset new inew
(aref str istr
))
575 ;; remaining string has no escape
577 (aset new inew
(aref str istr
))
583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
589 ;;; ebnf-bnf.el ends here