Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / progmodes / ebnf-abn.el
blob5363f61853d6808f311568705f0a51256cd2d5ea
1 ;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
3 ;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Old-Version: 1.2
9 ;; Package: ebnf2ps
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;;; Commentary:
28 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This is part of ebnf2ps package.
33 ;; This package defines a parser for ABNF (Augmented BNF).
35 ;; See ebnf2ps.el for documentation.
38 ;; ABNF Syntax
39 ;; -----------
41 ;; See the URL:
42 ;; `http://www.ietf.org/rfc/rfc2234.txt'
43 ;; or
44 ;; `http://www.faqs.org/rfcs/rfc2234.html'
45 ;; or
46 ;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
47 ;; ("Augmented BNF for Syntax Specifications: ABNF").
50 ;; rulelist = 1*( rule / (*c-wsp c-nl) )
52 ;; rule = rulename defined-as elements c-nl
53 ;; ; continues if next line starts with white space
55 ;; rulename = ALPHA *(ALPHA / DIGIT / "-")
57 ;; defined-as = *c-wsp ("=" / "=/") *c-wsp
58 ;; ; basic rules definition and incremental
59 ;; ; alternatives
61 ;; elements = alternation *c-wsp
63 ;; c-wsp = WSP / (c-nl WSP)
65 ;; c-nl = comment / CRLF
66 ;; ; comment or newline
68 ;; comment = ";" *(WSP / VCHAR) CRLF
70 ;; alternation = concatenation
71 ;; *(*c-wsp "/" *c-wsp concatenation)
73 ;; concatenation = repetition *(1*c-wsp repetition)
75 ;; repetition = [repeat] element
77 ;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
79 ;; element = rulename / group / option /
80 ;; char-val / num-val / prose-val
82 ;; group = "(" *c-wsp alternation *c-wsp ")"
84 ;; option = "[" *c-wsp alternation *c-wsp "]"
86 ;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
87 ;; ; quoted string of SP and VCHAR without DQUOTE
89 ;; num-val = "%" (bin-val / dec-val / hex-val)
91 ;; bin-val = "b" 1*BIT
92 ;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
93 ;; ; series of concatenated bit values
94 ;; ; or single ONEOF range
96 ;; dec-val = "d" 1*DIGIT
97 ;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
99 ;; hex-val = "x" 1*HEXDIG
100 ;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
102 ;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
103 ;; ; bracketed string of SP and VCHAR without
104 ;; ; angles
105 ;; ; prose description, to be used as last resort
107 ;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
109 ;; ALPHA = %x41-5A / %x61-7A
110 ;; ; A-Z / a-z
112 ;; BIT = "0" / "1"
114 ;; CHAR = %x01-7F
115 ;; ; any 7-bit US-ASCII character, excluding NUL
117 ;; CR = %x0D
118 ;; ; carriage return
120 ;; CRLF = CR LF
121 ;; ; Internet standard newline
123 ;; CTL = %x00-1F / %x7F
124 ;; ; controls
126 ;; DIGIT = %x30-39
127 ;; ; 0-9
129 ;; DQUOTE = %x22
130 ;; ; " (Double Quote)
132 ;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
134 ;; HTAB = %x09
135 ;; ; horizontal tab
137 ;; LF = %x0A
138 ;; ; linefeed
140 ;; LWSP = *(WSP / CRLF WSP)
141 ;; ; linear white space (past newline)
143 ;; OCTET = %x00-FF
144 ;; ; 8 bits of data
146 ;; SP = %x20
147 ;; ; space
149 ;; VCHAR = %x21-7E
150 ;; ; visible (printing) characters
152 ;; WSP = SP / HTAB
153 ;; ; white space
156 ;; NOTES:
158 ;; 1. Rules name and terminal strings are case INSENSITIVE.
159 ;; So, the following rule names are all equals:
160 ;; Rule-name, rule-Name, rule-name, RULE-NAME
161 ;; Also, the following strings are equals:
162 ;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
164 ;; 2. To have a case SENSITIVE string, use the character notation.
165 ;; For example, to specify the lowercase string "abc", use:
166 ;; %d97.98.99
168 ;; 3. There are no implicit spaces between elements, for example, the
169 ;; following rules:
171 ;; foo = %x61 ; a
173 ;; bar = %x62 ; b
175 ;; mumble = foo bar foo
177 ;; Are equivalent to the following rule:
179 ;; mumble = %x61.62.61
181 ;; If spaces are needed, it should be explicit specified, like:
183 ;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
185 ;; mumble = foo spaces bar spaces foo
187 ;; 4. Lines starting with space or tab are considered a continuation line.
188 ;; For example, the rule:
190 ;; rule = foo
191 ;; bar
193 ;; Is equivalent to:
195 ;; rule = foo bar
198 ;; Differences Between ABNF And ebnf2ps ABNF
199 ;; -----------------------------------------
201 ;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
202 ;; underscore (_) for rule name and european 8-bit accentuated characters (from
203 ;; \240 to \377) for rule name, string and comment.
206 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
208 ;;; Code:
211 (require 'ebnf-otz)
214 (defvar ebnf-abn-lex nil
215 "Value returned by `ebnf-abn-lex' function.")
218 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
219 ;; Syntactic analyzer
222 ;;; rulelist = 1*( rule / (*c-wsp c-nl) )
224 (defun ebnf-abn-parser (start)
225 "ABNF parser."
226 (let ((total (+ (- ebnf-limit start) 1))
227 (bias (1- start))
228 (origin (point))
229 rule-list token rule)
230 (goto-char start)
231 (setq token (ebnf-abn-lex))
232 (and (eq token 'end-of-input)
233 (error "Invalid ABNF file format"))
234 (and (eq token 'end-of-rule)
235 (setq token (ebnf-abn-lex)))
236 (while (not (eq token 'end-of-input))
237 (ebnf-message-float
238 "Parsing...%s%%"
239 (/ (* (- (point) bias) 100.0) total))
240 (setq token (ebnf-abn-rule token)
241 rule (cdr token)
242 token (car token))
243 (or (ebnf-add-empty-rule-list rule)
244 (setq rule-list (cons rule rule-list))))
245 (goto-char origin)
246 rule-list))
249 ;;; rule = rulename defined-as elements c-nl
250 ;;; ; continues if next line starts with white space
252 ;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
254 ;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
255 ;;; ; basic rules definition and incremental
256 ;;; ; alternatives
258 ;;; elements = alternation *c-wsp
260 ;;; c-wsp = WSP / (c-nl WSP)
262 ;;; c-nl = comment / CRLF
263 ;;; ; comment or newline
265 ;;; comment = ";" *(WSP / VCHAR) CRLF
268 (defun ebnf-abn-rule (token)
269 (let ((name ebnf-abn-lex)
270 (action ebnf-action)
271 elements)
272 (setq ebnf-action nil)
273 (or (eq token 'non-terminal)
274 (error "Invalid rule name"))
275 (setq token (ebnf-abn-lex))
276 (or (memq token '(equal incremental-alternative))
277 (error "Invalid rule: missing `=' or `=/'"))
278 (and (eq token 'incremental-alternative)
279 (setq name (concat name " =/")))
280 (setq elements (ebnf-abn-alternation))
281 (or (memq (car elements) '(end-of-rule end-of-input))
282 (error "Invalid rule: there is no end of rule"))
283 (setq elements (cdr elements))
284 (ebnf-eps-add-production name)
285 (cons (ebnf-abn-lex)
286 (ebnf-make-production name elements action))))
289 ;;; alternation = concatenation
290 ;;; *(*c-wsp "/" *c-wsp concatenation)
293 (defun ebnf-abn-alternation ()
294 (let (body concatenation)
295 (while (eq (car (setq concatenation
296 (ebnf-abn-concatenation (ebnf-abn-lex))))
297 'alternative)
298 (setq body (cons (cdr concatenation) body)))
299 (ebnf-token-alternative body concatenation)))
302 ;;; concatenation = repetition *(1*c-wsp repetition)
305 (defun ebnf-abn-concatenation (token)
306 (let ((term (ebnf-abn-repetition token))
307 seq)
308 (or (setq token (car term)
309 term (cdr term))
310 (error "Empty element"))
311 (setq seq (cons term seq))
312 (while (setq term (ebnf-abn-repetition token)
313 token (car term)
314 term (cdr term))
315 (setq seq (cons term seq)))
316 (cons token
317 (ebnf-token-sequence seq))))
320 ;;; repetition = [repeat] element
322 ;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
325 (defun ebnf-abn-repetition (token)
326 (let (lower upper)
327 ;; INTEGER [ "*" [ INTEGER ] ]
328 (when (eq token 'integer)
329 (setq lower ebnf-abn-lex
330 token (ebnf-abn-lex))
331 (or (eq token 'repeat)
332 (setq upper lower)))
333 ;; "*" [ INTEGER ]
334 (when (eq token 'repeat)
335 ;; only * ==> lower & upper are empty string
336 (or lower
337 (setq lower ""
338 upper ""))
339 (when (eq (setq token (ebnf-abn-lex)) 'integer)
340 (setq upper ebnf-abn-lex
341 token (ebnf-abn-lex))))
342 (let ((element (ebnf-abn-element token)))
343 (cond
344 ;; there is a repetition
345 (lower
346 (or element
347 (error "Missing element repetition"))
348 (setq token (ebnf-abn-lex))
349 (cond
350 ;; one or more
351 ((and (string= lower "1") (null upper))
352 (cons token (ebnf-make-one-or-more element)))
353 ;; zero or more
354 ((or (and (string= lower "0") (null upper))
355 (and (string= lower "") (string= upper "")))
356 (cons token (ebnf-make-zero-or-more element)))
357 ;; real repetition
359 (ebnf-token-repeat lower (cons token element) upper))))
360 ;; there is an element
361 (element
362 (cons (ebnf-abn-lex) element))
363 ;; something that caller has to deal
365 (cons token nil))))))
368 ;;; element = rulename / group / option /
369 ;;; char-val / num-val / prose-val
371 ;;; group = "(" *c-wsp alternation *c-wsp ")"
373 ;;; option = "[" *c-wsp alternation *c-wsp "]"
375 ;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
376 ;;; ; quoted string of SP and VCHAR without DQUOTE
378 ;;; num-val = "%" (bin-val / dec-val / hex-val)
380 ;;; bin-val = "b" 1*BIT
381 ;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
382 ;;; ; series of concatenated bit values
383 ;;; ; or single ONEOF range
385 ;;; dec-val = "d" 1*DIGIT
386 ;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
388 ;;; hex-val = "x" 1*HEXDIG
389 ;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
391 ;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
392 ;;; ; bracketed string of SP and VCHAR without
393 ;;; ; angles
394 ;;; ; prose description, to be used as last resort
397 (defun ebnf-abn-element (token)
398 (cond
399 ;; terminal
400 ((eq token 'terminal)
401 (ebnf-make-terminal ebnf-abn-lex))
402 ;; non-terminal
403 ((eq token 'non-terminal)
404 (ebnf-make-non-terminal ebnf-abn-lex))
405 ;; group
406 ((eq token 'begin-group)
407 (let ((body (ebnf-abn-alternation)))
408 (or (eq (car body) 'end-group)
409 (error "Missing `)'"))
410 (cdr body)))
411 ;; optional
412 ((eq token 'begin-optional)
413 (let ((body (ebnf-abn-alternation)))
414 (or (eq (car body) 'end-optional)
415 (error "Missing `]'"))
416 (ebnf-token-optional (cdr body))))
417 ;; no element
419 nil)
423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
424 ;; Lexical analyzer
427 (defconst ebnf-abn-token-table (make-vector 256 'error)
428 "Vector used to map characters to a lexical token.")
431 (defun ebnf-abn-initialize ()
432 "Initialize EBNF token table."
433 ;; control character & control 8-bit character are set to `error'
434 (let ((char ?\060))
435 ;; digits: 0-9
436 (while (< char ?\072)
437 (aset ebnf-abn-token-table char 'integer)
438 (setq char (1+ char)))
439 ;; printable character: A-Z
440 (setq char ?\101)
441 (while (< char ?\133)
442 (aset ebnf-abn-token-table char 'non-terminal)
443 (setq char (1+ char)))
444 ;; printable character: a-z
445 (setq char ?\141)
446 (while (< char ?\173)
447 (aset ebnf-abn-token-table char 'non-terminal)
448 (setq char (1+ char)))
449 ;; European 8-bit accentuated characters:
450 (setq char ?\240)
451 (while (< char ?\400)
452 (aset ebnf-abn-token-table char 'non-terminal)
453 (setq char (1+ char)))
454 ;; Override end of line characters:
455 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
456 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
457 ;; Override space characters:
458 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
459 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
460 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
461 ;; Override form feed character:
462 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
463 ;; Override other lexical characters:
464 (aset ebnf-abn-token-table ?< 'non-terminal)
465 (aset ebnf-abn-token-table ?% 'terminal)
466 (aset ebnf-abn-token-table ?\" 'terminal)
467 (aset ebnf-abn-token-table ?\( 'begin-group)
468 (aset ebnf-abn-token-table ?\) 'end-group)
469 (aset ebnf-abn-token-table ?* 'repeat)
470 (aset ebnf-abn-token-table ?= 'equal)
471 (aset ebnf-abn-token-table ?\[ 'begin-optional)
472 (aset ebnf-abn-token-table ?\] 'end-optional)
473 (aset ebnf-abn-token-table ?/ 'alternative)
474 ;; Override comment character:
475 (aset ebnf-abn-token-table ?\; 'comment)))
478 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
479 (defconst ebnf-abn-non-terminal-chars
480 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
481 (defconst ebnf-abn-non-terminal-letter-chars
482 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
485 (defun ebnf-abn-lex ()
486 "Lexical analyzer for ABNF.
488 Return a lexical token.
490 See documentation for variable `ebnf-abn-lex'."
491 (if (>= (point) ebnf-limit)
492 'end-of-input
493 (let (token)
494 ;; skip spaces and comments
495 (while (if (> (following-char) 255)
496 (progn
497 (setq token 'error)
498 nil)
499 (setq token (aref ebnf-abn-token-table (following-char)))
500 (cond
501 ((eq token 'space)
502 (skip-chars-forward " \013\t" ebnf-limit)
503 (< (point) ebnf-limit))
504 ((eq token 'comment)
505 (ebnf-abn-skip-comment))
506 ((eq token 'form-feed)
507 (forward-char)
508 (setq ebnf-action 'form-feed))
509 ((eq token 'end-of-rule)
510 (ebnf-abn-skip-end-of-rule))
511 (t nil)
513 (cond
514 ;; end of input
515 ((>= (point) ebnf-limit)
516 'end-of-input)
517 ;; error
518 ((eq token 'error)
519 (error "Invalid character"))
520 ;; end of rule
521 ((eq token 'end-of-rule)
522 'end-of-rule)
523 ;; integer
524 ((eq token 'integer)
525 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
526 'integer)
527 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
528 ((eq token 'terminal)
529 (setq ebnf-abn-lex
530 (if (= (following-char) ?\")
531 (ebnf-abn-string)
532 (ebnf-abn-character)))
533 'terminal)
534 ;; non-terminal: NAME or <NAME>
535 ((eq token 'non-terminal)
536 (let ((prose-p (= (following-char) ?<)))
537 (when prose-p
538 (forward-char)
539 (or (looking-at ebnf-abn-non-terminal-letter-chars)
540 (error "Invalid prose value")))
541 (setq ebnf-abn-lex
542 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
543 (when prose-p
544 (or (= (following-char) ?>)
545 (error "Invalid prose value"))
546 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
547 'non-terminal)
548 ;; equal: =, =/
549 ((eq token 'equal)
550 (forward-char)
551 (if (/= (following-char) ?/)
552 'equal
553 (forward-char)
554 'incremental-alternative))
555 ;; miscellaneous: (, ), [, ], /, *
557 (forward-char)
558 token)
559 ))))
562 (defun ebnf-abn-skip-end-of-rule ()
563 (let (eor-p)
564 (while (progn
565 ;; end of rule ==> 2 or more consecutive end of lines
566 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
567 eor-p))
568 ;; skip spaces
569 (skip-chars-forward " \013\t" ebnf-limit)
570 ;; skip comments
571 (and (= (following-char) ?\;)
572 (ebnf-abn-skip-comment))))
573 (not eor-p)))
576 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
577 (defconst ebnf-abn-comment-chars
578 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
581 (defun ebnf-abn-skip-comment ()
582 (forward-char)
583 (cond
584 ;; open EPS file
585 ((and ebnf-eps-executing (= (following-char) ?\[))
586 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
587 ;; close EPS file
588 ((and ebnf-eps-executing (= (following-char) ?\]))
589 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
590 ;; EPS header
591 ((and ebnf-eps-executing (= (following-char) ?H))
592 (ebnf-eps-header-comment (ebnf-abn-eps-filename)))
593 ;; EPS footer
594 ((and ebnf-eps-executing (= (following-char) ?F))
595 (ebnf-eps-footer-comment (ebnf-abn-eps-filename)))
596 ;; any other action in comment
598 (setq ebnf-action (aref ebnf-comment-table (following-char)))
599 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
601 ;; check for a valid end of comment
602 (cond ((>= (point) ebnf-limit)
603 nil)
604 ((= (following-char) ?\n)
607 (error "Invalid character"))
611 (defun ebnf-abn-eps-filename ()
612 (forward-char)
613 (ebnf-buffer-substring ebnf-abn-comment-chars))
616 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
617 (defconst ebnf-abn-string-chars
618 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
621 (defun ebnf-abn-string ()
622 (buffer-substring-no-properties
623 (progn
624 (forward-char)
625 (point))
626 (progn
627 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
628 (or (= (following-char) ?\")
629 (error "Missing `\"'"))
630 (prog1
631 (point)
632 (forward-char)))))
635 (defun ebnf-abn-character ()
636 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
637 (buffer-substring-no-properties
638 (point)
639 (progn
640 (forward-char)
641 (let* ((char (following-char))
642 (chars (cond ((or (= char ?B) (= char ?b)) "01")
643 ((or (= char ?D) (= char ?d)) "0-9")
644 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
645 (t (error "Invalid terminal value")))))
646 (forward-char)
647 (or (> (skip-chars-forward chars ebnf-limit) 0)
648 (error "Invalid terminal value"))
649 (if (= (following-char) ?-)
650 (progn
651 (forward-char)
652 (or (> (skip-chars-forward chars ebnf-limit) 0)
653 (error "Invalid terminal value range")))
654 (while (= (following-char) ?.)
655 (forward-char)
656 (or (> (skip-chars-forward chars ebnf-limit) 0)
657 (error "Invalid terminal value")))))
658 (point))))
661 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
664 (provide 'ebnf-abn)
666 ;;; ebnf-abn.el ends here