1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This is part of ebnf2ps package.
33 ;; This package defines a parser for ISO EBNF.
35 ;; See ebnf2ps.el for documentation.
42 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
43 ;; ("International Standard of the ISO EBNF Notation").
46 ;; ISO EBNF = syntax rule, {syntax rule};
48 ;; syntax rule = meta identifier, '=', definition list, ';';
50 ;; definition list = single definition, {'|', single definition};
52 ;; single definition = term, {',', term};
54 ;; term = factor, ['-', exception];
56 ;; exception = factor (* without <meta identifier> *);
58 ;; factor = [integer, '*'], primary;
60 ;; primary = optional sequence | repeated sequence | special sequence
61 ;; | grouped sequence | meta identifier | terminal string
66 ;; optional sequence = '[', definition list, ']';
68 ;; repeated sequence = '{', definition list, '}';
70 ;; grouped sequence = '(', definition list, ')';
72 ;; terminal string = "'", character - "'", {character - "'"}, "'"
73 ;; | '"', character - '"', {character - '"'}, '"';
75 ;; special sequence = '?', {character - '?'}, '?';
77 ;; meta identifier = letter, { letter | decimal digit | ' ' };
79 ;; integer = decimal digit, {decimal digit};
81 ;; comment = '(*', {comment symbol}, '*)';
83 ;; comment symbol = comment (* <== NESTED COMMENT *)
84 ;; | terminal string | special sequence | character;
86 ;; letter = ? A-Z a-z ?;
88 ;; decimal digit = ? 0-9 ?;
90 ;; character = letter | decimal digit
91 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
92 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
93 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96 ;; There is also the following alternative representation:
98 ;; STANDARD ALTERNATIVE
107 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
108 ;; -------------------------------------------------
110 ;; ISO EBNF accepts the characters given by <character> production above,
111 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
112 ;; (^L), any other characters are invalid. But ebnf2ps accepts also the
113 ;; european 8-bit accentuated characters (from \240 to \377) and underscore
117 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
125 (defvar ebnf-iso-lex nil
126 "Value returned by `ebnf-iso-lex' function.")
129 (defvar ebnf-no-meta-identifier nil
130 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
133 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;; Syntactic analyzer
137 ;;; ISO EBNF = syntax rule, {syntax rule};
139 (defun ebnf-iso-parser (start)
141 (let ((total (+ (- ebnf-limit start
) 1))
144 syntax-list token rule
)
146 (setq token
(ebnf-iso-lex))
147 (and (eq token
'end-of-input
)
148 (error "Invalid ISO EBNF file format"))
149 (while (not (eq token
'end-of-input
))
152 (/ (* (- (point) bias
) 100.0) total
))
153 (setq token
(ebnf-iso-syntax-rule token
)
156 (or (ebnf-add-empty-rule-list rule
)
157 (setq syntax-list
(cons rule syntax-list
))))
162 ;;; syntax rule = meta identifier, '=', definition list, ';';
164 (defun ebnf-iso-syntax-rule (token)
165 (let ((header ebnf-iso-lex
)
168 (setq ebnf-action nil
)
169 (or (eq token
'non-terminal
)
170 (error "Invalid meta identifier syntax rule"))
171 (or (eq (ebnf-iso-lex) 'equal
)
172 (error "Invalid syntax rule: missing `='"))
173 (setq body
(ebnf-iso-definition-list))
174 (or (eq (car body
) 'period
)
175 (error "Invalid syntax rule: missing `;' or `.'"))
176 (setq body
(cdr body
))
177 (ebnf-eps-add-production header
)
179 (ebnf-make-production header body action
))))
182 ;;; definition list = single definition, {'|', single definition};
184 (defun ebnf-iso-definition-list ()
186 (while (eq (car (setq sequence
(ebnf-iso-single-definition)))
188 (setq sequence
(cdr sequence
)
189 body
(cons sequence body
)))
190 (ebnf-token-alternative body sequence
)))
193 ;;; single definition = term, {',', term};
195 (defun ebnf-iso-single-definition ()
196 (let (token seq term
)
197 (while (and (setq term
(ebnf-iso-term (ebnf-iso-lex))
200 (eq token
'catenate
))
201 (setq seq
(cons term seq
)))
203 (ebnf-token-sequence (if term
208 ;;; term = factor, ['-', exception];
210 ;;; exception = factor (* without <meta identifier> *);
212 (defun ebnf-iso-term (token)
213 (let ((factor (ebnf-iso-factor token
)))
214 (if (not (eq (car factor
) 'except
))
217 ;; factor - exception
218 (let ((ebnf-no-meta-identifier t
))
219 (ebnf-token-except (cdr factor
) (ebnf-iso-factor (ebnf-iso-lex)))))))
222 ;;; factor = [integer, '*'], primary;
224 (defun ebnf-iso-factor (token)
225 (if (eq token
'integer
)
226 (let ((times ebnf-iso-lex
))
227 (or (eq (ebnf-iso-lex) 'repeat
)
228 (error "Missing `*'"))
229 (ebnf-token-repeat times
(ebnf-iso-primary (ebnf-iso-lex))))
230 (ebnf-iso-primary token
)))
233 ;;; primary = optional sequence | repeated sequence | special sequence
234 ;;; | grouped sequence | meta identifier | terminal string
239 ;;; optional sequence = '[', definition list, ']';
241 ;;; repeated sequence = '{', definition list, '}';
243 ;;; grouped sequence = '(', definition list, ')';
245 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
246 ;;; | '"', character - '"', {character - '"'}, '"';
248 ;;; special sequence = '?', {character - '?'}, '?';
250 ;;; meta identifier = letter, {letter | decimal digit};
252 (defun ebnf-iso-primary (token)
256 ((eq token
'terminal
)
257 (ebnf-make-terminal ebnf-iso-lex
))
259 ((eq token
'non-terminal
)
260 (ebnf-make-non-terminal ebnf-iso-lex
))
263 (ebnf-make-special ebnf-iso-lex
))
265 ((eq token
'begin-group
)
266 (let ((body (ebnf-iso-definition-list)))
267 (or (eq (car body
) 'end-group
)
268 (error "Missing `)'"))
271 ((eq token
'begin-optional
)
272 (let ((body (ebnf-iso-definition-list)))
273 (or (eq (car body
) 'end-optional
)
274 (error "Missing `]' or `/)'"))
275 (ebnf-token-optional (cdr body
))))
277 ((eq token
'begin-zero-or-more
)
278 (let* ((body (ebnf-iso-definition-list))
280 (or (eq (car body
) 'end-zero-or-more
)
281 (error "Missing `}' or `:)'"))
282 (ebnf-make-zero-or-more repeat
)))
293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297 (defconst ebnf-iso-token-table
298 ;; control character & 8-bit character are set to `error'
299 (let ((table (make-vector 256 'error
))
301 ;; printable character
302 (while (< char ?
\060)
303 (aset table char
'character
)
304 (setq char
(1+ char
)))
306 (while (< char ?
\072)
307 (aset table char
'integer
)
308 (setq char
(1+ char
)))
309 (while (< char ?
\101)
310 (aset table char
'character
)
311 (setq char
(1+ char
)))
312 ;; upper case letters:
313 (while (< char ?
\133)
314 (aset table char
'non-terminal
)
315 (setq char
(1+ char
)))
316 (while (< char ?
\141)
317 (aset table char
'character
)
318 (setq char
(1+ char
)))
319 ;; lower case letters:
320 (while (< char ?
\173)
321 (aset table char
'non-terminal
)
322 (setq char
(1+ char
)))
323 (while (< char ?
\177)
324 (aset table char
'character
)
325 (setq char
(1+ char
)))
326 ;; European 8-bit accentuated characters:
328 (while (< char ?
\400)
329 (aset table char
'non-terminal
)
330 (setq char
(1+ char
)))
331 ;; Override space characters:
332 (aset table ?
\013 'space
) ; [VT] vertical tab
333 (aset table ?
\n 'space
) ; [NL] linefeed
334 (aset table ?
\r 'space
) ; [CR] carriage return
335 (aset table ?
\t 'space
) ; [HT] horizontal tab
336 (aset table ?\
'space
) ; [SP] space
337 ;; Override form feed character:
338 (aset table ?
\f 'form-feed
) ; [FF] form feed
339 ;; Override other lexical characters:
340 (aset table ?_
'non-terminal
)
341 (aset table ?
\" 'double-terminal
)
342 (aset table ?
\' 'single-terminal
)
343 (aset table ?
\? 'special
)
344 (aset table ?
* 'repeat
)
345 (aset table ?
, 'catenate
)
346 (aset table ?-
'except
)
347 (aset table ?
= 'equal
)
348 (aset table ?\
) 'end-group
)
350 "Vector used to map characters to a lexical token.")
353 (defun ebnf-iso-initialize ()
354 "Initialize ISO EBNF token table."
355 (if ebnf-iso-alternative-p
356 ;; Override alternative lexical characters:
358 (aset ebnf-iso-token-table ?\
( 'left-parenthesis
)
359 (aset ebnf-iso-token-table ?\
[ 'character
)
360 (aset ebnf-iso-token-table ?\
] 'character
)
361 (aset ebnf-iso-token-table ?\
{ 'character
)
362 (aset ebnf-iso-token-table ?\
} 'character
)
363 (aset ebnf-iso-token-table ?|
'character
)
364 (aset ebnf-iso-token-table ?\
; 'character)
365 (aset ebnf-iso-token-table ?
/ 'slash
)
366 (aset ebnf-iso-token-table ?
! 'alternative
)
367 (aset ebnf-iso-token-table ?
: 'colon
)
368 (aset ebnf-iso-token-table ?.
'period
))
369 ;; Override standard lexical characters:
370 (aset ebnf-iso-token-table ?\
( 'begin-parenthesis
)
371 (aset ebnf-iso-token-table ?\
[ 'begin-optional
)
372 (aset ebnf-iso-token-table ?\
] 'end-optional
)
373 (aset ebnf-iso-token-table ?\
{ 'begin-zero-or-more
)
374 (aset ebnf-iso-token-table ?\
} 'end-zero-or-more
)
375 (aset ebnf-iso-token-table ?|
'alternative
)
376 (aset ebnf-iso-token-table ?\
; 'period)
377 (aset ebnf-iso-token-table ?
/ 'character
)
378 (aset ebnf-iso-token-table ?
! 'character
)
379 (aset ebnf-iso-token-table ?
: 'character
)
380 (aset ebnf-iso-token-table ?.
'character
)))
383 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
384 (defconst ebnf-iso-non-terminal-chars
385 (ebnf-range-regexp " 0-9A-Za-z_" ?
\240 ?
\377))
388 (defun ebnf-iso-lex ()
389 "Lexical analyzer for ISO EBNF.
391 Return a lexical token.
393 See documentation for variable `ebnf-iso-lex'."
394 (if (>= (point) ebnf-limit
)
397 ;; skip spaces and comments
398 (while (if (> (following-char) 255)
402 (setq token
(aref ebnf-iso-token-table
(following-char)))
405 (skip-chars-forward " \013\n\r\t" ebnf-limit
)
406 (< (point) ebnf-limit
))
407 ((or (eq token
'begin-parenthesis
)
408 (eq token
'left-parenthesis
))
410 (if (/= (following-char) ?
*)
414 (ebnf-iso-skip-comment)
416 ((eq token
'form-feed
)
418 (setq ebnf-action
'form-feed
))
423 ((>= (point) ebnf-limit
)
427 (error "Invalid character"))
430 (setq ebnf-iso-lex
(ebnf-buffer-substring "0-9"))
432 ;; special: ?special?
434 (setq ebnf-iso-lex
(concat (and ebnf-special-show-delimiter
"?")
435 (ebnf-string " ->@-~" ?
\? "special")
436 (and ebnf-special-show-delimiter
"?")))
438 ;; terminal: "string"
439 ((eq token
'double-terminal
)
440 (setq ebnf-iso-lex
(ebnf-string " !#-~" ?
\" "terminal"))
442 ;; terminal: 'string'
443 ((eq token
'single-terminal
)
444 (setq ebnf-iso-lex
(ebnf-string " -&(-~" ?
\' "terminal"))
447 ((eq token
'non-terminal
)
451 (ebnf-buffer-substring ebnf-iso-non-terminal-chars
))))
452 (and ebnf-no-meta-identifier
453 (error "Exception sequence should not contain a meta identifier"))
455 ;; begin optional, begin list or begin group
456 ((eq token
'left-parenthesis
)
458 (cond ((= (following-char) ?
/)
461 ((= (following-char) ?
:)
467 ;; end optional or alternative
470 (if (/= (following-char) ?\
))
477 (if (/= (following-char) ?\
))
482 ((eq token
'begin-parenthesis
)
491 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
492 (defconst ebnf-iso-comment-chars
493 (ebnf-range-regexp "^*(\000-\010\016-\037" ?
\177 ?
\237))
496 (defun ebnf-iso-skip-comment ()
500 ((and ebnf-eps-executing
(= (following-char) ?\
[))
501 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
503 ((and ebnf-eps-executing
(= (following-char) ?\
]))
504 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
506 ((and ebnf-eps-executing
(= (following-char) ?H
))
507 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
509 ((and ebnf-eps-executing
(= (following-char) ?F
))
510 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
511 ;; any other action in comment
513 (setq ebnf-action
(aref ebnf-comment-table
(following-char))))
517 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit
)
518 (cond ((>= (point) ebnf-limit
)
519 (error "Missing end of comment: `*)'"))
520 ((= (following-char) ?
*)
521 (skip-chars-forward "*" ebnf-limit
)
522 (when (= (following-char) ?\
))
525 (setq pair
(1- pair
))))
526 ((= (following-char) ?\
()
527 (skip-chars-forward "(" ebnf-limit
)
528 (when (= (following-char) ?
*)
529 ;; beginning of comment
531 (setq pair
(1+ pair
))))
533 (error "Invalid character"))
537 (defun ebnf-iso-eps-filename ()
539 (buffer-substring-no-properties
541 (let ((chars (concat ebnf-iso-comment-chars
"\n"))
544 (skip-chars-forward chars ebnf-limit
)
546 (cond ((>= (point) ebnf-limit
)
548 ((= (following-char) ?
*)
549 (skip-chars-forward "*" ebnf-limit
)
550 (if (/= (following-char) ?\
))
554 ((= (following-char) ?\
()
556 (if (/= (following-char) ?
*)
566 (defun ebnf-iso-normalize (str)
567 (if (not ebnf-iso-normalize-p
)
569 (let ((len (length str
))
572 ;; count exceeding spaces
574 (if (/= (aref str stri
) ?\
)
575 (setq stri
(1+ stri
))
576 (setq stri
(1+ stri
))
577 (while (and (< stri len
) (= (aref str stri
) ?\
))
579 spaces
(1+ spaces
)))))
581 ;; no exceeding space
583 ;; at least one exceeding space
584 (let ((new (make-string (- len spaces
) ?\
))
586 ;; eliminate exceeding spaces
589 (if (/= (aref str stri
) ?\
)
591 (aset new newi
(aref str stri
))
594 (aset new newi
(aref str stri
))
597 (while (and (> spaces
0) (= (aref str stri
) ?\
))
599 spaces
(1- spaces
)))))
600 ;; remaining is normalized
602 (aset new newi
(aref str stri
))
608 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
614 ;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
615 ;;; ebnf-iso.el ends here