Renamed xhtml-target.rnc to xhtml-tgt.rnc; all users changed.
[emacs.git] / lisp / progmodes / ebnf-abn.el
blob44034bde3d57a1768c8eb2e6d48675e4287b1825
1 ;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
9 ;; Version: 1.2
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 3, or (at your option)
16 ;; any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
28 ;;; Commentary:
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for ABNF (Augmented BNF).
37 ;; See ebnf2ps.el for documentation.
40 ;; ABNF Syntax
41 ;; -----------
43 ;; See the URL:
44 ;; `http://www.ietf.org/rfc/rfc2234.txt'
45 ;; or
46 ;; `http://www.faqs.org/rfcs/rfc2234.html'
47 ;; or
48 ;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
49 ;; ("Augmented BNF for Syntax Specifications: ABNF").
52 ;; rulelist = 1*( rule / (*c-wsp c-nl) )
54 ;; rule = rulename defined-as elements c-nl
55 ;; ; continues if next line starts with white space
57 ;; rulename = ALPHA *(ALPHA / DIGIT / "-")
59 ;; defined-as = *c-wsp ("=" / "=/") *c-wsp
60 ;; ; basic rules definition and incremental
61 ;; ; alternatives
63 ;; elements = alternation *c-wsp
65 ;; c-wsp = WSP / (c-nl WSP)
67 ;; c-nl = comment / CRLF
68 ;; ; comment or newline
70 ;; comment = ";" *(WSP / VCHAR) CRLF
72 ;; alternation = concatenation
73 ;; *(*c-wsp "/" *c-wsp concatenation)
75 ;; concatenation = repetition *(1*c-wsp repetition)
77 ;; repetition = [repeat] element
79 ;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
81 ;; element = rulename / group / option /
82 ;; char-val / num-val / prose-val
84 ;; group = "(" *c-wsp alternation *c-wsp ")"
86 ;; option = "[" *c-wsp alternation *c-wsp "]"
88 ;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
89 ;; ; quoted string of SP and VCHAR without DQUOTE
91 ;; num-val = "%" (bin-val / dec-val / hex-val)
93 ;; bin-val = "b" 1*BIT
94 ;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
95 ;; ; series of concatenated bit values
96 ;; ; or single ONEOF range
98 ;; dec-val = "d" 1*DIGIT
99 ;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
101 ;; hex-val = "x" 1*HEXDIG
102 ;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
104 ;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
105 ;; ; bracketed string of SP and VCHAR without
106 ;; ; angles
107 ;; ; prose description, to be used as last resort
109 ;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
111 ;; ALPHA = %x41-5A / %x61-7A
112 ;; ; A-Z / a-z
114 ;; BIT = "0" / "1"
116 ;; CHAR = %x01-7F
117 ;; ; any 7-bit US-ASCII character, excluding NUL
119 ;; CR = %x0D
120 ;; ; carriage return
122 ;; CRLF = CR LF
123 ;; ; Internet standard newline
125 ;; CTL = %x00-1F / %x7F
126 ;; ; controls
128 ;; DIGIT = %x30-39
129 ;; ; 0-9
131 ;; DQUOTE = %x22
132 ;; ; " (Double Quote)
134 ;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
136 ;; HTAB = %x09
137 ;; ; horizontal tab
139 ;; LF = %x0A
140 ;; ; linefeed
142 ;; LWSP = *(WSP / CRLF WSP)
143 ;; ; linear white space (past newline)
145 ;; OCTET = %x00-FF
146 ;; ; 8 bits of data
148 ;; SP = %x20
149 ;; ; space
151 ;; VCHAR = %x21-7E
152 ;; ; visible (printing) characters
154 ;; WSP = SP / HTAB
155 ;; ; white space
158 ;; NOTES:
160 ;; 1. Rules name and terminal strings are case INSENSITIVE.
161 ;; So, the following rule names are all equals:
162 ;; Rule-name, rule-Name, rule-name, RULE-NAME
163 ;; Also, the following strings are equals:
164 ;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
166 ;; 2. To have a case SENSITIVE string, use the character notation.
167 ;; For example, to specify the lowercase string "abc", use:
168 ;; %d97.98.99
170 ;; 3. There are no implicit spaces between elements, for example, the
171 ;; following rules:
173 ;; foo = %x61 ; a
175 ;; bar = %x62 ; b
177 ;; mumble = foo bar foo
179 ;; Are equivalent to the following rule:
181 ;; mumble = %x61.62.61
183 ;; If spaces are needed, it should be explicit specified, like:
185 ;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
187 ;; mumble = foo spaces bar spaces foo
189 ;; 4. Lines starting with space or tab are considered a continuation line.
190 ;; For example, the rule:
192 ;; rule = foo
193 ;; bar
195 ;; Is equivalent to:
197 ;; rule = foo bar
200 ;; Differences Between ABNF And ebnf2ps ABNF
201 ;; -----------------------------------------
203 ;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
204 ;; underscore (_) for rule name and european 8-bit accentuated characters (from
205 ;; \240 to \377) for rule name, string and comment.
208 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
210 ;;; Code:
213 (require 'ebnf-otz)
216 (defvar ebnf-abn-lex nil
217 "Value returned by `ebnf-abn-lex' function.")
220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
221 ;; Syntactic analyzer
224 ;;; rulelist = 1*( rule / (*c-wsp c-nl) )
226 (defun ebnf-abn-parser (start)
227 "ABNF parser."
228 (let ((total (+ (- ebnf-limit start) 1))
229 (bias (1- start))
230 (origin (point))
231 rule-list token rule)
232 (goto-char start)
233 (setq token (ebnf-abn-lex))
234 (and (eq token 'end-of-input)
235 (error "Invalid ABNF file format"))
236 (and (eq token 'end-of-rule)
237 (setq token (ebnf-abn-lex)))
238 (while (not (eq token 'end-of-input))
239 (ebnf-message-float
240 "Parsing...%s%%"
241 (/ (* (- (point) bias) 100.0) total))
242 (setq token (ebnf-abn-rule token)
243 rule (cdr token)
244 token (car token))
245 (or (ebnf-add-empty-rule-list rule)
246 (setq rule-list (cons rule rule-list))))
247 (goto-char origin)
248 rule-list))
251 ;;; rule = rulename defined-as elements c-nl
252 ;;; ; continues if next line starts with white space
254 ;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
256 ;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
257 ;;; ; basic rules definition and incremental
258 ;;; ; alternatives
260 ;;; elements = alternation *c-wsp
262 ;;; c-wsp = WSP / (c-nl WSP)
264 ;;; c-nl = comment / CRLF
265 ;;; ; comment or newline
267 ;;; comment = ";" *(WSP / VCHAR) CRLF
270 (defun ebnf-abn-rule (token)
271 (let ((name ebnf-abn-lex)
272 (action ebnf-action)
273 elements)
274 (setq ebnf-action nil)
275 (or (eq token 'non-terminal)
276 (error "Invalid rule name"))
277 (setq token (ebnf-abn-lex))
278 (or (memq token '(equal incremental-alternative))
279 (error "Invalid rule: missing `=' or `=/'"))
280 (and (eq token 'incremental-alternative)
281 (setq name (concat name " =/")))
282 (setq elements (ebnf-abn-alternation))
283 (or (memq (car elements) '(end-of-rule end-of-input))
284 (error "Invalid rule: there is no end of rule"))
285 (setq elements (cdr elements))
286 (ebnf-eps-add-production name)
287 (cons (ebnf-abn-lex)
288 (ebnf-make-production name elements action))))
291 ;;; alternation = concatenation
292 ;;; *(*c-wsp "/" *c-wsp concatenation)
295 (defun ebnf-abn-alternation ()
296 (let (body concatenation)
297 (while (eq (car (setq concatenation
298 (ebnf-abn-concatenation (ebnf-abn-lex))))
299 'alternative)
300 (setq body (cons (cdr concatenation) body)))
301 (ebnf-token-alternative body concatenation)))
304 ;;; concatenation = repetition *(1*c-wsp repetition)
307 (defun ebnf-abn-concatenation (token)
308 (let ((term (ebnf-abn-repetition token))
309 seq)
310 (or (setq token (car term)
311 term (cdr term))
312 (error "Empty element"))
313 (setq seq (cons term seq))
314 (while (setq term (ebnf-abn-repetition token)
315 token (car term)
316 term (cdr term))
317 (setq seq (cons term seq)))
318 (cons token
319 (ebnf-token-sequence seq))))
322 ;;; repetition = [repeat] element
324 ;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
327 (defun ebnf-abn-repetition (token)
328 (let (lower upper)
329 ;; INTEGER [ "*" [ INTEGER ] ]
330 (when (eq token 'integer)
331 (setq lower ebnf-abn-lex
332 token (ebnf-abn-lex))
333 (or (eq token 'repeat)
334 (setq upper lower)))
335 ;; "*" [ INTEGER ]
336 (when (eq token 'repeat)
337 ;; only * ==> lower & upper are empty string
338 (or lower
339 (setq lower ""
340 upper ""))
341 (when (eq (setq token (ebnf-abn-lex)) 'integer)
342 (setq upper ebnf-abn-lex
343 token (ebnf-abn-lex))))
344 (let ((element (ebnf-abn-element token)))
345 (cond
346 ;; there is a repetition
347 (lower
348 (or element
349 (error "Missing element repetition"))
350 (setq token (ebnf-abn-lex))
351 (cond
352 ;; one or more
353 ((and (string= lower "1") (null upper))
354 (cons token (ebnf-make-one-or-more element)))
355 ;; zero or more
356 ((or (and (string= lower "0") (null upper))
357 (and (string= lower "") (string= upper "")))
358 (cons token (ebnf-make-zero-or-more element)))
359 ;; real repetition
361 (ebnf-token-repeat lower (cons token element) upper))))
362 ;; there is an element
363 (element
364 (cons (ebnf-abn-lex) element))
365 ;; something that caller has to deal
367 (cons token nil))))))
370 ;;; element = rulename / group / option /
371 ;;; char-val / num-val / prose-val
373 ;;; group = "(" *c-wsp alternation *c-wsp ")"
375 ;;; option = "[" *c-wsp alternation *c-wsp "]"
377 ;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
378 ;;; ; quoted string of SP and VCHAR without DQUOTE
380 ;;; num-val = "%" (bin-val / dec-val / hex-val)
382 ;;; bin-val = "b" 1*BIT
383 ;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
384 ;;; ; series of concatenated bit values
385 ;;; ; or single ONEOF range
387 ;;; dec-val = "d" 1*DIGIT
388 ;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
390 ;;; hex-val = "x" 1*HEXDIG
391 ;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
393 ;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
394 ;;; ; bracketed string of SP and VCHAR without
395 ;;; ; angles
396 ;;; ; prose description, to be used as last resort
399 (defun ebnf-abn-element (token)
400 (cond
401 ;; terminal
402 ((eq token 'terminal)
403 (ebnf-make-terminal ebnf-abn-lex))
404 ;; non-terminal
405 ((eq token 'non-terminal)
406 (ebnf-make-non-terminal ebnf-abn-lex))
407 ;; group
408 ((eq token 'begin-group)
409 (let ((body (ebnf-abn-alternation)))
410 (or (eq (car body) 'end-group)
411 (error "Missing `)'"))
412 (cdr body)))
413 ;; optional
414 ((eq token 'begin-optional)
415 (let ((body (ebnf-abn-alternation)))
416 (or (eq (car body) 'end-optional)
417 (error "Missing `]'"))
418 (ebnf-token-optional (cdr body))))
419 ;; no element
421 nil)
425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
426 ;; Lexical analyzer
429 (defconst ebnf-abn-token-table (make-vector 256 'error)
430 "Vector used to map characters to a lexical token.")
433 (defun ebnf-abn-initialize ()
434 "Initialize EBNF token table."
435 ;; control character & control 8-bit character are set to `error'
436 (let ((char ?\060))
437 ;; digits: 0-9
438 (while (< char ?\072)
439 (aset ebnf-abn-token-table char 'integer)
440 (setq char (1+ char)))
441 ;; printable character: A-Z
442 (setq char ?\101)
443 (while (< char ?\133)
444 (aset ebnf-abn-token-table char 'non-terminal)
445 (setq char (1+ char)))
446 ;; printable character: a-z
447 (setq char ?\141)
448 (while (< char ?\173)
449 (aset ebnf-abn-token-table char 'non-terminal)
450 (setq char (1+ char)))
451 ;; European 8-bit accentuated characters:
452 (setq char ?\240)
453 (while (< char ?\400)
454 (aset ebnf-abn-token-table char 'non-terminal)
455 (setq char (1+ char)))
456 ;; Override end of line characters:
457 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
458 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
459 ;; Override space characters:
460 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
461 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
462 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
463 ;; Override form feed character:
464 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
465 ;; Override other lexical characters:
466 (aset ebnf-abn-token-table ?< 'non-terminal)
467 (aset ebnf-abn-token-table ?% 'terminal)
468 (aset ebnf-abn-token-table ?\" 'terminal)
469 (aset ebnf-abn-token-table ?\( 'begin-group)
470 (aset ebnf-abn-token-table ?\) 'end-group)
471 (aset ebnf-abn-token-table ?* 'repeat)
472 (aset ebnf-abn-token-table ?= 'equal)
473 (aset ebnf-abn-token-table ?\[ 'begin-optional)
474 (aset ebnf-abn-token-table ?\] 'end-optional)
475 (aset ebnf-abn-token-table ?/ 'alternative)
476 ;; Override comment character:
477 (aset ebnf-abn-token-table ?\; 'comment)))
480 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
481 (defconst ebnf-abn-non-terminal-chars
482 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
483 (defconst ebnf-abn-non-terminal-letter-chars
484 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
487 (defun ebnf-abn-lex ()
488 "Lexical analyzer for ABNF.
490 Return a lexical token.
492 See documentation for variable `ebnf-abn-lex'."
493 (if (>= (point) ebnf-limit)
494 'end-of-input
495 (let (token)
496 ;; skip spaces and comments
497 (while (if (> (following-char) 255)
498 (progn
499 (setq token 'error)
500 nil)
501 (setq token (aref ebnf-abn-token-table (following-char)))
502 (cond
503 ((eq token 'space)
504 (skip-chars-forward " \013\t" ebnf-limit)
505 (< (point) ebnf-limit))
506 ((eq token 'comment)
507 (ebnf-abn-skip-comment))
508 ((eq token 'form-feed)
509 (forward-char)
510 (setq ebnf-action 'form-feed))
511 ((eq token 'end-of-rule)
512 (ebnf-abn-skip-end-of-rule))
513 (t nil)
515 (cond
516 ;; end of input
517 ((>= (point) ebnf-limit)
518 'end-of-input)
519 ;; error
520 ((eq token 'error)
521 (error "Invalid character"))
522 ;; end of rule
523 ((eq token 'end-of-rule)
524 'end-of-rule)
525 ;; integer
526 ((eq token 'integer)
527 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
528 'integer)
529 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
530 ((eq token 'terminal)
531 (setq ebnf-abn-lex
532 (if (= (following-char) ?\")
533 (ebnf-abn-string)
534 (ebnf-abn-character)))
535 'terminal)
536 ;; non-terminal: NAME or <NAME>
537 ((eq token 'non-terminal)
538 (let ((prose-p (= (following-char) ?<)))
539 (when prose-p
540 (forward-char)
541 (or (looking-at ebnf-abn-non-terminal-letter-chars)
542 (error "Invalid prose value")))
543 (setq ebnf-abn-lex
544 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
545 (when prose-p
546 (or (= (following-char) ?>)
547 (error "Invalid prose value"))
548 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
549 'non-terminal)
550 ;; equal: =, =/
551 ((eq token 'equal)
552 (forward-char)
553 (if (/= (following-char) ?/)
554 'equal
555 (forward-char)
556 'incremental-alternative))
557 ;; miscellaneous: (, ), [, ], /, *
559 (forward-char)
560 token)
561 ))))
564 (defun ebnf-abn-skip-end-of-rule ()
565 (let (eor-p)
566 (while (progn
567 ;; end of rule ==> 2 or more consecutive end of lines
568 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
569 eor-p))
570 ;; skip spaces
571 (skip-chars-forward " \013\t" ebnf-limit)
572 ;; skip comments
573 (and (= (following-char) ?\;)
574 (ebnf-abn-skip-comment))))
575 (not eor-p)))
578 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
579 (defconst ebnf-abn-comment-chars
580 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
583 (defun ebnf-abn-skip-comment ()
584 (forward-char)
585 (cond
586 ;; open EPS file
587 ((and ebnf-eps-executing (= (following-char) ?\[))
588 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
589 ;; close EPS file
590 ((and ebnf-eps-executing (= (following-char) ?\]))
591 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
592 ;; EPS header
593 ((and ebnf-eps-executing (= (following-char) ?H))
594 (ebnf-eps-header-comment (ebnf-abn-eps-filename)))
595 ;; EPS footer
596 ((and ebnf-eps-executing (= (following-char) ?F))
597 (ebnf-eps-footer-comment (ebnf-abn-eps-filename)))
598 ;; any other action in comment
600 (setq ebnf-action (aref ebnf-comment-table (following-char)))
601 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
603 ;; check for a valid end of comment
604 (cond ((>= (point) ebnf-limit)
605 nil)
606 ((= (following-char) ?\n)
609 (error "Invalid character"))
613 (defun ebnf-abn-eps-filename ()
614 (forward-char)
615 (ebnf-buffer-substring ebnf-abn-comment-chars))
618 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
619 (defconst ebnf-abn-string-chars
620 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
623 (defun ebnf-abn-string ()
624 (buffer-substring-no-properties
625 (progn
626 (forward-char)
627 (point))
628 (progn
629 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
630 (or (= (following-char) ?\")
631 (error "Missing `\"'"))
632 (prog1
633 (point)
634 (forward-char)))))
637 (defun ebnf-abn-character ()
638 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
639 (buffer-substring-no-properties
640 (point)
641 (progn
642 (forward-char)
643 (let* ((char (following-char))
644 (chars (cond ((or (= char ?B) (= char ?b)) "01")
645 ((or (= char ?D) (= char ?d)) "0-9")
646 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
647 (t (error "Invalid terminal value")))))
648 (forward-char)
649 (or (> (skip-chars-forward chars ebnf-limit) 0)
650 (error "Invalid terminal value"))
651 (if (= (following-char) ?-)
652 (progn
653 (forward-char)
654 (or (> (skip-chars-forward chars ebnf-limit) 0)
655 (error "Invalid terminal value range")))
656 (while (= (following-char) ?.)
657 (forward-char)
658 (or (> (skip-chars-forward chars ebnf-limit) 0)
659 (error "Invalid terminal value")))))
660 (point))))
663 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
666 (provide 'ebnf-abn)
668 ;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
669 ;;; ebnf-abn.el ends here