Update for MH-E release 8.0.2.
[emacs.git] / lisp / progmodes / ebnf-bnf.el
blobfd58ec096b37b4f738b7155f90f2445ca9f20a1f
1 ;;; ebnf-bnf.el --- parser for EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
4 ;; Free Sofware Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/04/03 16:42:18 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
10 ;; Version: 1.9
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27 ;; Boston, MA 02110-1301, USA.
29 ;;; Commentary:
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; This is part of ebnf2ps package.
36 ;; This package defines a parser for EBNF.
38 ;; See ebnf2ps.el for documentation.
41 ;; EBNF Syntax
42 ;; -----------
44 ;; The current EBNF that ebnf2ps accepts has the following constructions:
46 ;; ; comment (until end of line)
47 ;; A non-terminal
48 ;; "C" terminal
49 ;; ?C? special
50 ;; $A default non-terminal
51 ;; $"C" default terminal
52 ;; $?C? default special
53 ;; A = B. production (A is the header and B the body)
54 ;; C D sequence (C occurs before D)
55 ;; C | D alternative (C or D occurs)
56 ;; A - B exception (A excluding B, B without any non-terminal)
57 ;; n * A repetition (A repeats at least n (integer) times)
58 ;; n * n A repetition (A repeats exactly n (integer) times)
59 ;; n * m A repetition (A repeats at least n (integer) and at most
60 ;; m (integer) times)
61 ;; (C) group (expression C is grouped together)
62 ;; [C] optional (C may or not occurs)
63 ;; C+ one or more occurrences of C
64 ;; {C}+ one or more occurrences of C
65 ;; {C}* zero or more occurrences of C
66 ;; {C} zero or more occurrences of C
67 ;; C / D equivalent to: C {D C}*
68 ;; {C || D}+ equivalent to: C {D C}*
69 ;; {C || D}* equivalent to: [C {D C}*]
70 ;; {C || D} equivalent to: [C {D C}*]
72 ;; The EBNF syntax written using the notation above is:
74 ;; EBNF = {production}+.
76 ;; production = non_terminal "=" body ".". ;; production
78 ;; body = {sequence || "|"}*. ;; alternative
80 ;; sequence = {exception}*. ;; sequence
82 ;; exception = repeat [ "-" repeat]. ;; exception
84 ;; repeat = [ integer "*" [ integer ]] term. ;; repetition
86 ;; term = factor
87 ;; | [factor] "+" ;; one-or-more
88 ;; | [factor] "/" [factor] ;; one-or-more
89 ;; .
91 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
92 ;; | [ "$" ] non_terminal ;; non-terminal
93 ;; | [ "$" ] "?" special "?" ;; special
94 ;; | "(" body ")" ;; group
95 ;; | "[" body "]" ;; zero-or-one
96 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
97 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
98 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
99 ;; .
101 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
102 ;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper
103 ;; ;; and lower), 8-bit accentuated characters,
104 ;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
105 ;; ;; "<", ">", "@", "\", "^", "_", "`" and "~".
107 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
108 ;; ;; that is, a valid terminal accepts any printable character (including
109 ;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
110 ;; ;; terminal. Also, accepts escaped characters, that is, a character
111 ;; ;; pair starting with `\' followed by a printable character, for
112 ;; ;; example: \", \\.
114 ;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
115 ;; ;; that is, a valid special accepts any printable character (including
116 ;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
117 ;; ;; delimit a special.
119 ;; integer = "[0-9]+".
120 ;; ;; that is, an integer is a sequence of one or more decimal digits.
122 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
123 ;; ;; that is, a comment starts with the character `;' and terminates at end
124 ;; ;; of line. Also, it only accepts printable characters (including 8-bit
125 ;; ;; accentuated characters) and tabs.
128 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
130 ;;; Code:
133 (require 'ebnf-otz)
136 (defvar ebnf-bnf-lex nil
137 "Value returned by `ebnf-bnf-lex' function.")
140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141 ;; Syntactic analyzer
144 ;;; EBNF = {production}+.
146 (defun ebnf-bnf-parser (start)
147 "EBNF parser."
148 (let ((total (+ (- ebnf-limit start) 1))
149 (bias (1- start))
150 (origin (point))
151 prod-list token rule)
152 (goto-char start)
153 (setq token (ebnf-bnf-lex))
154 (and (eq token 'end-of-input)
155 (error "Invalid EBNF file format"))
156 (while (not (eq token 'end-of-input))
157 (ebnf-message-float
158 "Parsing...%s%%"
159 (/ (* (- (point) bias) 100.0) total))
160 (setq token (ebnf-production token)
161 rule (cdr token)
162 token (car token))
163 (or (ebnf-add-empty-rule-list rule)
164 (setq prod-list (cons rule prod-list))))
165 (goto-char origin)
166 prod-list))
169 ;;; production = non-terminal "=" body ".".
171 (defun ebnf-production (token)
172 (let ((header ebnf-bnf-lex)
173 (action ebnf-action)
174 body)
175 (setq ebnf-action nil)
176 (or (eq token 'non-terminal)
177 (error "Invalid header production"))
178 (or (eq (ebnf-bnf-lex) 'equal)
179 (error "Invalid production: missing `='"))
180 (setq body (ebnf-body))
181 (or (eq (car body) 'period)
182 (error "Invalid production: missing `.'"))
183 (setq body (cdr body))
184 (ebnf-eps-add-production header)
185 (cons (ebnf-bnf-lex)
186 (ebnf-make-production header body action))))
189 ;;; body = {sequence || "|"}*.
191 (defun ebnf-body ()
192 (let (body sequence)
193 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
194 (setq sequence (cdr sequence)
195 body (cons sequence body)))
196 (ebnf-token-alternative body sequence)))
199 ;;; sequence = {exception}*.
201 (defun ebnf-sequence ()
202 (let ((token (ebnf-bnf-lex))
203 seq term)
204 (while (setq term (ebnf-exception token)
205 token (car term)
206 term (cdr term))
207 (setq seq (cons term seq)))
208 (cons token
209 (ebnf-token-sequence seq))))
212 ;;; exception = repeat [ "-" repeat].
214 (defun ebnf-exception (token)
215 (let ((term (ebnf-repeat token)))
216 (if (not (eq (car term) 'except))
217 ;; repeat
218 term
219 ;; repeat - repeat
220 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
221 (ebnf-no-non-terminal (cdr exception))
222 (ebnf-token-except (cdr term) exception)))))
225 (defun ebnf-no-non-terminal (node)
226 (and (vectorp node)
227 (let ((kind (ebnf-node-kind node)))
228 (cond
229 ((eq kind 'ebnf-generate-non-terminal)
230 (error "Exception sequence should not contain a non-terminal"))
231 ((eq kind 'ebnf-generate-repeat)
232 (ebnf-no-non-terminal (ebnf-node-separator node)))
233 ((memq kind '(ebnf-generate-optional ebnf-generate-except))
234 (ebnf-no-non-terminal (ebnf-node-list node)))
235 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
236 (ebnf-no-non-terminal (ebnf-node-list node))
237 (ebnf-no-non-terminal (ebnf-node-separator node)))
238 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
239 (let ((seq (ebnf-node-list node)))
240 (while seq
241 (ebnf-no-non-terminal (car seq))
242 (setq seq (cdr seq)))))
243 ))))
246 ;;; repeat = [ integer "*" [ integer ]] term.
248 (defun ebnf-repeat (token)
249 (if (not (eq token 'integer))
250 (ebnf-term token)
251 (let ((times ebnf-bnf-lex)
252 upper)
253 (or (eq (ebnf-bnf-lex) 'repeat)
254 (error "Missing `*'"))
255 (setq token (ebnf-bnf-lex))
256 (when (eq token 'integer)
257 (setq upper ebnf-bnf-lex
258 token (ebnf-bnf-lex)))
259 (ebnf-token-repeat times (ebnf-term token) upper))))
262 ;;; term = factor
263 ;;; | [factor] "+" ;; one-or-more
264 ;;; | [factor] "/" [factor] ;; one-or-more
265 ;;; .
267 (defun ebnf-term (token)
268 (let ((factor (ebnf-factor token)))
269 (and factor
270 (setq token (ebnf-bnf-lex)))
271 (cond
272 ;; [factor] +
273 ((eq token 'one-or-more)
274 (cons (ebnf-bnf-lex)
275 (and factor
276 (let ((kind (ebnf-node-kind factor)))
277 (cond
278 ;; { A }+ + ==> { A }+
279 ;; { A }* + ==> { A }*
280 ((memq kind '(ebnf-generate-zero-or-more
281 ebnf-generate-one-or-more))
282 factor)
283 ;; [ A ] + ==> { A }*
284 ((eq kind 'ebnf-generate-optional)
285 (ebnf-make-zero-or-more (list factor)))
286 ;; A +
288 (ebnf-make-one-or-more (list factor)))
289 )))))
290 ;; [factor] / [factor]
291 ((eq token 'list)
292 (setq token (ebnf-bnf-lex))
293 (let ((sep (ebnf-factor token)))
294 (and sep
295 (setq factor (or factor (ebnf-make-empty))))
296 (cons (if sep
297 (ebnf-bnf-lex)
298 token)
299 (and factor
300 (ebnf-make-one-or-more factor sep)))))
301 ;; factor
303 (cons token factor))
307 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
308 ;;; | [ "$" ] non_terminal ;; non-terminal
309 ;;; | [ "$" ] "?" special "?" ;; special
310 ;;; | "(" body ")" ;; group
311 ;;; | "[" body "]" ;; zero-or-one
312 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
313 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
314 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
315 ;;; .
317 (defun ebnf-factor (token)
318 (cond
319 ;; terminal
320 ((eq token 'terminal)
321 (ebnf-make-terminal ebnf-bnf-lex))
322 ;; non-terminal
323 ((eq token 'non-terminal)
324 (ebnf-make-non-terminal ebnf-bnf-lex))
325 ;; special
326 ((eq token 'special)
327 (ebnf-make-special ebnf-bnf-lex))
328 ;; group
329 ((eq token 'begin-group)
330 (let ((body (ebnf-body)))
331 (or (eq (car body) 'end-group)
332 (error "Missing `)'"))
333 (cdr body)))
334 ;; optional
335 ((eq token 'begin-optional)
336 (let ((body (ebnf-body)))
337 (or (eq (car body) 'end-optional)
338 (error "Missing `]'"))
339 (ebnf-token-optional (cdr body))))
340 ;; list
341 ((eq token 'begin-list)
342 (let* ((body (ebnf-body))
343 (token (car body))
344 (list-part (cdr body))
345 sep-part)
346 (and (eq token 'list-separator)
347 ;; { A || B }
348 (setq body (ebnf-body) ; get separator
349 token (car body)
350 sep-part (cdr body)))
351 (cond
352 ;; { A }+
353 ((eq token 'end-one-or-more)
354 (ebnf-make-one-or-more list-part sep-part))
355 ;; { A }*
356 ((eq token 'end-zero-or-more)
357 (ebnf-make-zero-or-more list-part sep-part))
359 (error "Missing `}+', `}*' or `}'"))
361 ;; no term
363 nil)
367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
368 ;; Lexical analyzer
371 (defconst ebnf-bnf-token-table (make-vector 256 'error)
372 "Vector used to map characters to a lexical token.")
375 (defun ebnf-bnf-initialize ()
376 "Initialize EBNF token table."
377 ;; control character & control 8-bit character are set to `error'
378 (let ((char ?\040))
379 ;; printable character:
380 (while (< char ?\060)
381 (aset ebnf-bnf-token-table char 'non-terminal)
382 (setq char (1+ char)))
383 ;; digits:
384 (while (< char ?\072)
385 (aset ebnf-bnf-token-table char 'integer)
386 (setq char (1+ char)))
387 ;; printable character:
388 (while (< char ?\177)
389 (aset ebnf-bnf-token-table char 'non-terminal)
390 (setq char (1+ char)))
391 ;; European 8-bit accentuated characters:
392 (setq char ?\240)
393 (while (< char ?\400)
394 (aset ebnf-bnf-token-table char 'non-terminal)
395 (setq char (1+ char)))
396 ;; Override space characters:
397 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
398 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
399 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
400 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
401 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
402 ;; Override form feed character:
403 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
404 ;; Override other lexical characters:
405 (aset ebnf-bnf-token-table ?\" 'terminal)
406 (aset ebnf-bnf-token-table ?\? 'special)
407 (aset ebnf-bnf-token-table ?\( 'begin-group)
408 (aset ebnf-bnf-token-table ?\) 'end-group)
409 (aset ebnf-bnf-token-table ?* 'repeat)
410 (aset ebnf-bnf-token-table ?- 'except)
411 (aset ebnf-bnf-token-table ?= 'equal)
412 (aset ebnf-bnf-token-table ?\[ 'begin-optional)
413 (aset ebnf-bnf-token-table ?\] 'end-optional)
414 (aset ebnf-bnf-token-table ?\{ 'begin-list)
415 (aset ebnf-bnf-token-table ?| 'alternative)
416 (aset ebnf-bnf-token-table ?\} 'end-list)
417 (aset ebnf-bnf-token-table ?/ 'list)
418 (aset ebnf-bnf-token-table ?+ 'one-or-more)
419 (aset ebnf-bnf-token-table ?$ 'default)
420 ;; Override comment character:
421 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
422 ;; Override end of production character:
423 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
426 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
427 (defconst ebnf-bnf-non-terminal-chars
428 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
431 (defun ebnf-bnf-lex ()
432 "Lexical analyzer for EBNF.
434 Return a lexical token.
436 See documentation for variable `ebnf-bnf-lex'."
437 (if (>= (point) ebnf-limit)
438 'end-of-input
439 (let (token)
440 ;; skip spaces and comments
441 (while (if (> (following-char) 255)
442 (progn
443 (setq token 'error)
444 nil)
445 (setq token (aref ebnf-bnf-token-table (following-char)))
446 (cond
447 ((eq token 'space)
448 (skip-chars-forward " \013\n\r\t" ebnf-limit)
449 (< (point) ebnf-limit))
450 ((eq token 'comment)
451 (ebnf-bnf-skip-comment))
452 ((eq token 'form-feed)
453 (forward-char)
454 (setq ebnf-action 'form-feed))
455 (t nil)
457 (setq ebnf-default-p nil)
458 (cond
459 ;; end of input
460 ((>= (point) ebnf-limit)
461 'end-of-input)
462 ;; error
463 ((eq token 'error)
464 (error "Invalid character"))
465 ;; default
466 ((eq token 'default)
467 (forward-char)
468 (if (memq (aref ebnf-bnf-token-table (following-char))
469 '(terminal non-terminal special))
470 (prog1
471 (ebnf-bnf-lex)
472 (setq ebnf-default-p t))
473 (error "Invalid `default' element")))
474 ;; integer
475 ((eq token 'integer)
476 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
477 'integer)
478 ;; special: ?special?
479 ((eq token 'special)
480 (setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
481 (ebnf-string " ->@-~" ?\? "special")
482 (and ebnf-special-show-delimiter "?")))
483 'special)
484 ;; terminal: "string"
485 ((eq token 'terminal)
486 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
487 'terminal)
488 ;; non-terminal or terminal
489 ((eq token 'non-terminal)
490 (setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
491 (let ((case-fold-search ebnf-case-fold-search)
492 match)
493 (if (and ebnf-terminal-regexp
494 (setq match (string-match ebnf-terminal-regexp
495 ebnf-bnf-lex))
496 (zerop match)
497 (= (match-end 0) (length ebnf-bnf-lex)))
498 'terminal
499 'non-terminal)))
500 ;; end of list: }+, }*, }
501 ((eq token 'end-list)
502 (forward-char)
503 (cond
504 ((= (following-char) ?+)
505 (forward-char)
506 'end-one-or-more)
507 ((= (following-char) ?*)
508 (forward-char)
509 'end-zero-or-more)
511 'end-zero-or-more)
513 ;; alternative: |, ||
514 ((eq token 'alternative)
515 (forward-char)
516 (if (/= (following-char) ?|)
517 'alternative
518 (forward-char)
519 'list-separator))
520 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
522 (forward-char)
523 token)
524 ))))
527 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
528 (defconst ebnf-bnf-comment-chars
529 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
532 (defun ebnf-bnf-skip-comment ()
533 (forward-char)
534 (cond
535 ;; open EPS file
536 ((and ebnf-eps-executing (= (following-char) ?\[))
537 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
538 ;; close EPS file
539 ((and ebnf-eps-executing (= (following-char) ?\]))
540 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
541 ;; any other action in comment
543 (setq ebnf-action (aref ebnf-comment-table (following-char)))
544 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
546 ;; check for a valid end of comment
547 (cond ((>= (point) ebnf-limit)
548 nil)
549 ((= (following-char) ?\n)
550 (forward-char)
553 (error "Invalid character"))
557 (defun ebnf-bnf-eps-filename ()
558 (forward-char)
559 (ebnf-buffer-substring ebnf-bnf-comment-chars))
562 (defun ebnf-unescape-string (str)
563 (let* ((len (length str))
564 (size (1- len))
565 (istr 0)
566 (n-esc 0))
567 ;; count number of escapes
568 (while (< istr size)
569 (setq istr (+ istr
570 (if (= (aref str istr) ?\\)
571 (progn
572 (setq n-esc (1+ n-esc))
574 1))))
575 (if (zerop n-esc)
576 ;; no escapes
578 ;; at least one escape
579 (let ((new (make-string (- len n-esc) ?\ ))
580 (inew 0))
581 ;; eliminate all escapes
582 (setq istr 0)
583 (while (> n-esc 0)
584 (and (= (aref str istr) ?\\)
585 (setq istr (1+ istr)
586 n-esc (1- n-esc)))
587 (aset new inew (aref str istr))
588 (setq inew (1+ inew)
589 istr (1+ istr)))
590 ;; remaining string has no escape
591 (while (< istr len)
592 (aset new inew (aref str istr))
593 (setq inew (1+ inew)
594 istr (1+ istr)))
595 new))))
598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
601 (provide 'ebnf-bnf)
604 ;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
605 ;;; ebnf-bnf.el ends here