1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Time-stamp: <2003/08/12 21:29:14 vinicius>
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for ISO EBNF.
37 ;; See ebnf2ps.el for documentation.
44 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
45 ;; ("International Standard of the ISO EBNF Notation").
48 ;; ISO EBNF = syntax rule, {syntax rule};
50 ;; syntax rule = meta identifier, '=', definition list, ';';
52 ;; definition list = single definition, {'|', single definition};
54 ;; single definition = term, {',', term};
56 ;; term = factor, ['-', exception];
58 ;; exception = factor (* without <meta identifier> *);
60 ;; factor = [integer, '*'], primary;
62 ;; primary = optional sequence | repeated sequence | special sequence
63 ;; | grouped sequence | meta identifier | terminal string
68 ;; optional sequence = '[', definition list, ']';
70 ;; repeated sequence = '{', definition list, '}';
72 ;; grouped sequence = '(', definition list, ')';
74 ;; terminal string = "'", character - "'", {character - "'"}, "'"
75 ;; | '"', character - '"', {character - '"'}, '"';
77 ;; special sequence = '?', {character - '?'}, '?';
79 ;; meta identifier = letter, { letter | decimal digit | ' ' };
81 ;; integer = decimal digit, {decimal digit};
83 ;; comment = '(*', {comment symbol}, '*)';
85 ;; comment symbol = comment (* <== NESTED COMMENT *)
86 ;; | terminal string | special sequence | character;
88 ;; letter = ? A-Z a-z ?;
90 ;; decimal digit = ? 0-9 ?;
92 ;; character = letter | decimal digit
93 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
94 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
95 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
98 ;; There is also the following alternative representation:
100 ;; STANDARD ALTERNATIVE
109 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
110 ;; -------------------------------------------------
112 ;; ISO EBNF accepts the characters given by <character> production above,
113 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
114 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
115 ;; european 8-bit accentuated characters (from \240 to \377).
118 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
126 (defvar ebnf-iso-lex nil
127 "Value returned by `ebnf-iso-lex' function.")
130 (defvar ebnf-no-meta-identifier nil
131 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
135 ;; Syntactic analyzer
138 ;;; ISO EBNF = syntax rule, {syntax rule};
140 (defun ebnf-iso-parser (start)
142 (let ((total (+ (- ebnf-limit start
) 1))
145 syntax-list token rule
)
147 (setq token
(ebnf-iso-lex))
148 (and (eq token
'end-of-input
)
149 (error "Invalid ISO EBNF file format"))
150 (while (not (eq token
'end-of-input
))
153 (/ (* (- (point) bias
) 100.0) total
))
154 (setq token
(ebnf-iso-syntax-rule token
)
157 (or (ebnf-add-empty-rule-list rule
)
158 (setq syntax-list
(cons rule syntax-list
))))
163 ;;; syntax rule = meta identifier, '=', definition list, ';';
165 (defun ebnf-iso-syntax-rule (token)
166 (let ((header ebnf-iso-lex
)
169 (setq ebnf-action nil
)
170 (or (eq token
'non-terminal
)
171 (error "Invalid meta identifier syntax rule"))
172 (or (eq (ebnf-iso-lex) 'equal
)
173 (error "Invalid syntax rule: missing `='"))
174 (setq body
(ebnf-iso-definition-list))
175 (or (eq (car body
) 'period
)
176 (error "Invalid syntax rule: missing `;' or `.'"))
177 (setq body
(cdr body
))
178 (ebnf-eps-add-production header
)
180 (ebnf-make-production header body action
))))
183 ;;; definition list = single definition, {'|', single definition};
185 (defun ebnf-iso-definition-list ()
187 (while (eq (car (setq sequence
(ebnf-iso-single-definition)))
189 (setq sequence
(cdr sequence
)
190 body
(cons sequence body
)))
191 (ebnf-token-alternative body sequence
)))
194 ;;; single definition = term, {',', term};
196 (defun ebnf-iso-single-definition ()
197 (let (token seq term
)
198 (while (and (setq term
(ebnf-iso-term (ebnf-iso-lex))
201 (eq token
'catenate
))
202 (setq seq
(cons term seq
)))
208 ;; sequence with only one element
209 ((and (null term
) (= (length seq
) 1))
213 (ebnf-make-sequence (nreverse (cons term seq
))))
217 ;;; term = factor, ['-', exception];
219 ;;; exception = factor (* without <meta identifier> *);
221 (defun ebnf-iso-term (token)
222 (let ((factor (ebnf-iso-factor token
)))
223 (if (not (eq (car factor
) 'except
))
226 ;; factor - exception
227 (let ((ebnf-no-meta-identifier t
))
228 (ebnf-token-except (cdr factor
) (ebnf-iso-factor (ebnf-iso-lex)))))))
231 ;;; factor = [integer, '*'], primary;
233 (defun ebnf-iso-factor (token)
234 (if (eq token
'integer
)
235 (let ((times ebnf-iso-lex
))
236 (or (eq (ebnf-iso-lex) 'repeat
)
237 (error "Missing `*'"))
238 (ebnf-token-repeat times
(ebnf-iso-primary (ebnf-iso-lex))))
239 (ebnf-iso-primary token
)))
242 ;;; primary = optional sequence | repeated sequence | special sequence
243 ;;; | grouped sequence | meta identifier | terminal string
248 ;;; optional sequence = '[', definition list, ']';
250 ;;; repeated sequence = '{', definition list, '}';
252 ;;; grouped sequence = '(', definition list, ')';
254 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
255 ;;; | '"', character - '"', {character - '"'}, '"';
257 ;;; special sequence = '?', {character - '?'}, '?';
259 ;;; meta identifier = letter, {letter | decimal digit};
261 (defun ebnf-iso-primary (token)
265 ((eq token
'terminal
)
266 (ebnf-make-terminal ebnf-iso-lex
))
268 ((eq token
'non-terminal
)
269 (ebnf-make-non-terminal ebnf-iso-lex
))
272 (ebnf-make-special ebnf-iso-lex
))
274 ((eq token
'begin-group
)
275 (let ((body (ebnf-iso-definition-list)))
276 (or (eq (car body
) 'end-group
)
277 (error "Missing `)'"))
280 ((eq token
'begin-optional
)
281 (let ((body (ebnf-iso-definition-list)))
282 (or (eq (car body
) 'end-optional
)
283 (error "Missing `]' or `/)'"))
284 (ebnf-token-optional (cdr body
))))
286 ((eq token
'begin-zero-or-more
)
287 (let* ((body (ebnf-iso-definition-list))
289 (or (eq (car body
) 'end-zero-or-more
)
290 (error "Missing `}' or `:)'"))
291 (ebnf-make-zero-or-more repeat
)))
302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
306 (defconst ebnf-iso-token-table
307 ;; control character & 8-bit character are set to `error'
308 (let ((table (make-vector 256 'error
))
310 ;; printable character
311 (while (< char ?
\060)
312 (aset table char
'character
)
313 (setq char
(1+ char
)))
315 (while (< char ?
\072)
316 (aset table char
'integer
)
317 (setq char
(1+ char
)))
318 (while (< char ?
\101)
319 (aset table char
'character
)
320 (setq char
(1+ char
)))
321 ;; upper case letters:
322 (while (< char ?
\133)
323 (aset table char
'non-terminal
)
324 (setq char
(1+ char
)))
325 (while (< char ?
\141)
326 (aset table char
'character
)
327 (setq char
(1+ char
)))
328 ;; lower case letters:
329 (while (< char ?
\173)
330 (aset table char
'non-terminal
)
331 (setq char
(1+ char
)))
332 (while (< char ?
\177)
333 (aset table char
'character
)
334 (setq char
(1+ char
)))
335 ;; European 8-bit accentuated characters:
337 (while (< char ?
\400)
338 (aset table char
'non-terminal
)
339 (setq char
(1+ char
)))
340 ;; Override space characters:
341 (aset table ?
\013 'space
) ; [VT] vertical tab
342 (aset table ?
\n 'space
) ; [NL] linefeed
343 (aset table ?
\r 'space
) ; [CR] carriage return
344 (aset table ?
\t 'space
) ; [HT] horizontal tab
345 (aset table ?\
'space
) ; [SP] space
346 ;; Override form feed character:
347 (aset table ?
\f 'form-feed
) ; [FF] form feed
348 ;; Override other lexical characters:
349 (aset table ?
\" 'double-terminal
)
350 (aset table ?
\' 'single-terminal
)
351 (aset table ?
\? 'special
)
352 (aset table ?
* 'repeat
)
353 (aset table ?
, 'catenate
)
354 (aset table ?-
'except
)
355 (aset table ?
= 'equal
)
356 (aset table ?\
) 'end-group
)
358 "Vector used to map characters to a lexical token.")
361 (defun ebnf-iso-initialize ()
362 "Initialize ISO EBNF token table."
363 (if ebnf-iso-alternative-p
364 ;; Override alternative lexical characters:
366 (aset ebnf-iso-token-table ?\
( 'left-parenthesis
)
367 (aset ebnf-iso-token-table ?\
[ 'character
)
368 (aset ebnf-iso-token-table ?\
] 'character
)
369 (aset ebnf-iso-token-table ?\
{ 'character
)
370 (aset ebnf-iso-token-table ?\
} 'character
)
371 (aset ebnf-iso-token-table ?|
'character
)
372 (aset ebnf-iso-token-table ?\
; 'character)
373 (aset ebnf-iso-token-table ?
/ 'slash
)
374 (aset ebnf-iso-token-table ?
! 'alternative
)
375 (aset ebnf-iso-token-table ?
: 'colon
)
376 (aset ebnf-iso-token-table ?.
'period
))
377 ;; Override standard lexical characters:
378 (aset ebnf-iso-token-table ?\
( 'begin-parenthesis
)
379 (aset ebnf-iso-token-table ?\
[ 'begin-optional
)
380 (aset ebnf-iso-token-table ?\
] 'end-optional
)
381 (aset ebnf-iso-token-table ?\
{ 'begin-zero-or-more
)
382 (aset ebnf-iso-token-table ?\
} 'end-zero-or-more
)
383 (aset ebnf-iso-token-table ?|
'alternative
)
384 (aset ebnf-iso-token-table ?\
; 'period)
385 (aset ebnf-iso-token-table ?
/ 'character
)
386 (aset ebnf-iso-token-table ?
! 'character
)
387 (aset ebnf-iso-token-table ?
: 'character
)
388 (aset ebnf-iso-token-table ?.
'character
)))
391 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
392 (defconst ebnf-iso-non-terminal-chars
393 (ebnf-range-regexp " 0-9A-Za-z" ?
\240 ?
\377))
396 (defun ebnf-iso-lex ()
397 "Lexical analyser for ISO EBNF.
399 Return a lexical token.
401 See documentation for variable `ebnf-iso-lex'."
402 (if (>= (point) ebnf-limit
)
405 ;; skip spaces and comments
406 (while (if (> (following-char) 255)
410 (setq token
(aref ebnf-iso-token-table
(following-char)))
413 (skip-chars-forward " \013\n\r\t" ebnf-limit
)
414 (< (point) ebnf-limit
))
415 ((or (eq token
'begin-parenthesis
)
416 (eq token
'left-parenthesis
))
418 (if (/= (following-char) ?
*)
422 (ebnf-iso-skip-comment)
424 ((eq token
'form-feed
)
426 (setq ebnf-action
'form-feed
))
431 ((>= (point) ebnf-limit
)
435 (error "Illegal character"))
438 (setq ebnf-iso-lex
(ebnf-buffer-substring "0-9"))
440 ;; special: ?special?
442 (setq ebnf-iso-lex
(concat "?"
443 (ebnf-string " ->@-~" ?
\? "special")
446 ;; terminal: "string"
447 ((eq token
'double-terminal
)
448 (setq ebnf-iso-lex
(ebnf-string " !#-~" ?
\" "terminal"))
450 ;; terminal: 'string'
451 ((eq token
'single-terminal
)
452 (setq ebnf-iso-lex
(ebnf-string " -&(-~" ?
\' "terminal"))
455 ((eq token
'non-terminal
)
459 (ebnf-buffer-substring ebnf-iso-non-terminal-chars
))))
460 (and ebnf-no-meta-identifier
461 (error "Exception sequence should not contain a meta identifier"))
463 ;; begin optional, begin list or begin group
464 ((eq token
'left-parenthesis
)
466 (cond ((= (following-char) ?
/)
469 ((= (following-char) ?
:)
475 ;; end optional or alternative
478 (if (/= (following-char) ?\
))
485 (if (/= (following-char) ?\
))
490 ((eq token
'begin-parenthesis
)
499 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
500 (defconst ebnf-iso-comment-chars
501 (ebnf-range-regexp "^*(\000-\010\016-\037" ?
\177 ?
\237))
504 (defun ebnf-iso-skip-comment ()
508 ((and ebnf-eps-executing
(= (following-char) ?\
[))
509 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
511 ((and ebnf-eps-executing
(= (following-char) ?\
]))
512 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
513 ;; any other action in comment
515 (setq ebnf-action
(aref ebnf-comment-table
(following-char))))
519 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit
)
520 (cond ((>= (point) ebnf-limit
)
521 (error "Missing end of comment: `*)'"))
522 ((= (following-char) ?
*)
523 (skip-chars-forward "*" ebnf-limit
)
524 (when (= (following-char) ?\
))
527 (setq pair
(1- pair
))))
528 ((= (following-char) ?\
()
529 (skip-chars-forward "(" ebnf-limit
)
530 (when (= (following-char) ?
*)
531 ;; beginning of comment
533 (setq pair
(1+ pair
))))
535 (error "Illegal character"))
539 (defun ebnf-iso-eps-filename ()
541 (buffer-substring-no-properties
543 (let ((chars (concat ebnf-iso-comment-chars
"\n"))
546 (skip-chars-forward chars ebnf-limit
)
548 (cond ((>= (point) ebnf-limit
)
550 ((= (following-char) ?
*)
551 (skip-chars-forward "*" ebnf-limit
)
552 (if (/= (following-char) ?\
))
556 ((= (following-char) ?\
()
558 (if (/= (following-char) ?
*)
568 (defun ebnf-iso-normalize (str)
569 (if (not ebnf-iso-normalize-p
)
571 (let ((len (length str
))
574 ;; count exceeding spaces
576 (if (/= (aref str stri
) ?\
)
577 (setq stri
(1+ stri
))
578 (setq stri
(1+ stri
))
579 (while (and (< stri len
) (= (aref str stri
) ?\
))
581 spaces
(1+ spaces
)))))
583 ;; no exceeding space
585 ;; at least one exceeding space
586 (let ((new (make-string (- len spaces
) ?\
))
588 ;; eliminate exceeding spaces
591 (if (/= (aref str stri
) ?\
)
593 (aset new newi
(aref str stri
))
596 (aset new newi
(aref str stri
))
599 (while (and (> spaces
0) (= (aref str stri
) ?\
))
601 spaces
(1- spaces
)))))
602 ;; remaining is normalized
604 (aset new newi
(aref str stri
))
610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
616 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
617 ;;; ebnf-iso.el ends here