1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Time-stamp: <2004/04/03 16:48:52 vinicius>
9 ;; Keywords: wp, ebnf, PostScript
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
31 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; This is part of ebnf2ps package.
36 ;; This package defines a parser for ISO EBNF.
38 ;; See ebnf2ps.el for documentation.
45 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
46 ;; ("International Standard of the ISO EBNF Notation").
49 ;; ISO EBNF = syntax rule, {syntax rule};
51 ;; syntax rule = meta identifier, '=', definition list, ';';
53 ;; definition list = single definition, {'|', single definition};
55 ;; single definition = term, {',', term};
57 ;; term = factor, ['-', exception];
59 ;; exception = factor (* without <meta identifier> *);
61 ;; factor = [integer, '*'], primary;
63 ;; primary = optional sequence | repeated sequence | special sequence
64 ;; | grouped sequence | meta identifier | terminal string
69 ;; optional sequence = '[', definition list, ']';
71 ;; repeated sequence = '{', definition list, '}';
73 ;; grouped sequence = '(', definition list, ')';
75 ;; terminal string = "'", character - "'", {character - "'"}, "'"
76 ;; | '"', character - '"', {character - '"'}, '"';
78 ;; special sequence = '?', {character - '?'}, '?';
80 ;; meta identifier = letter, { letter | decimal digit | ' ' };
82 ;; integer = decimal digit, {decimal digit};
84 ;; comment = '(*', {comment symbol}, '*)';
86 ;; comment symbol = comment (* <== NESTED COMMENT *)
87 ;; | terminal string | special sequence | character;
89 ;; letter = ? A-Z a-z ?;
91 ;; decimal digit = ? 0-9 ?;
93 ;; character = letter | decimal digit
94 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
95 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
96 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
99 ;; There is also the following alternative representation:
101 ;; STANDARD ALTERNATIVE
110 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
111 ;; -------------------------------------------------
113 ;; ISO EBNF accepts the characters given by <character> production above,
114 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
115 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
116 ;; european 8-bit accentuated characters (from \240 to \377) and underscore
120 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
128 (defvar ebnf-iso-lex nil
129 "Value returned by `ebnf-iso-lex' function.")
132 (defvar ebnf-no-meta-identifier nil
133 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
137 ;; Syntactic analyzer
140 ;;; ISO EBNF = syntax rule, {syntax rule};
142 (defun ebnf-iso-parser (start)
144 (let ((total (+ (- ebnf-limit start
) 1))
147 syntax-list token rule
)
149 (setq token
(ebnf-iso-lex))
150 (and (eq token
'end-of-input
)
151 (error "Invalid ISO EBNF file format"))
152 (while (not (eq token
'end-of-input
))
155 (/ (* (- (point) bias
) 100.0) total
))
156 (setq token
(ebnf-iso-syntax-rule token
)
159 (or (ebnf-add-empty-rule-list rule
)
160 (setq syntax-list
(cons rule syntax-list
))))
165 ;;; syntax rule = meta identifier, '=', definition list, ';';
167 (defun ebnf-iso-syntax-rule (token)
168 (let ((header ebnf-iso-lex
)
171 (setq ebnf-action nil
)
172 (or (eq token
'non-terminal
)
173 (error "Invalid meta identifier syntax rule"))
174 (or (eq (ebnf-iso-lex) 'equal
)
175 (error "Invalid syntax rule: missing `='"))
176 (setq body
(ebnf-iso-definition-list))
177 (or (eq (car body
) 'period
)
178 (error "Invalid syntax rule: missing `;' or `.'"))
179 (setq body
(cdr body
))
180 (ebnf-eps-add-production header
)
182 (ebnf-make-production header body action
))))
185 ;;; definition list = single definition, {'|', single definition};
187 (defun ebnf-iso-definition-list ()
189 (while (eq (car (setq sequence
(ebnf-iso-single-definition)))
191 (setq sequence
(cdr sequence
)
192 body
(cons sequence body
)))
193 (ebnf-token-alternative body sequence
)))
196 ;;; single definition = term, {',', term};
198 (defun ebnf-iso-single-definition ()
199 (let (token seq term
)
200 (while (and (setq term
(ebnf-iso-term (ebnf-iso-lex))
203 (eq token
'catenate
))
204 (setq seq
(cons term seq
)))
206 (ebnf-token-sequence (if term
211 ;;; term = factor, ['-', exception];
213 ;;; exception = factor (* without <meta identifier> *);
215 (defun ebnf-iso-term (token)
216 (let ((factor (ebnf-iso-factor token
)))
217 (if (not (eq (car factor
) 'except
))
220 ;; factor - exception
221 (let ((ebnf-no-meta-identifier t
))
222 (ebnf-token-except (cdr factor
) (ebnf-iso-factor (ebnf-iso-lex)))))))
225 ;;; factor = [integer, '*'], primary;
227 (defun ebnf-iso-factor (token)
228 (if (eq token
'integer
)
229 (let ((times ebnf-iso-lex
))
230 (or (eq (ebnf-iso-lex) 'repeat
)
231 (error "Missing `*'"))
232 (ebnf-token-repeat times
(ebnf-iso-primary (ebnf-iso-lex))))
233 (ebnf-iso-primary token
)))
236 ;;; primary = optional sequence | repeated sequence | special sequence
237 ;;; | grouped sequence | meta identifier | terminal string
242 ;;; optional sequence = '[', definition list, ']';
244 ;;; repeated sequence = '{', definition list, '}';
246 ;;; grouped sequence = '(', definition list, ')';
248 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
249 ;;; | '"', character - '"', {character - '"'}, '"';
251 ;;; special sequence = '?', {character - '?'}, '?';
253 ;;; meta identifier = letter, {letter | decimal digit};
255 (defun ebnf-iso-primary (token)
259 ((eq token
'terminal
)
260 (ebnf-make-terminal ebnf-iso-lex
))
262 ((eq token
'non-terminal
)
263 (ebnf-make-non-terminal ebnf-iso-lex
))
266 (ebnf-make-special ebnf-iso-lex
))
268 ((eq token
'begin-group
)
269 (let ((body (ebnf-iso-definition-list)))
270 (or (eq (car body
) 'end-group
)
271 (error "Missing `)'"))
274 ((eq token
'begin-optional
)
275 (let ((body (ebnf-iso-definition-list)))
276 (or (eq (car body
) 'end-optional
)
277 (error "Missing `]' or `/)'"))
278 (ebnf-token-optional (cdr body
))))
280 ((eq token
'begin-zero-or-more
)
281 (let* ((body (ebnf-iso-definition-list))
283 (or (eq (car body
) 'end-zero-or-more
)
284 (error "Missing `}' or `:)'"))
285 (ebnf-make-zero-or-more repeat
)))
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
300 (defconst ebnf-iso-token-table
301 ;; control character & 8-bit character are set to `error'
302 (let ((table (make-vector 256 'error
))
304 ;; printable character
305 (while (< char ?
\060)
306 (aset table char
'character
)
307 (setq char
(1+ char
)))
309 (while (< char ?
\072)
310 (aset table char
'integer
)
311 (setq char
(1+ char
)))
312 (while (< char ?
\101)
313 (aset table char
'character
)
314 (setq char
(1+ char
)))
315 ;; upper case letters:
316 (while (< char ?
\133)
317 (aset table char
'non-terminal
)
318 (setq char
(1+ char
)))
319 (while (< char ?
\141)
320 (aset table char
'character
)
321 (setq char
(1+ char
)))
322 ;; lower case letters:
323 (while (< char ?
\173)
324 (aset table char
'non-terminal
)
325 (setq char
(1+ char
)))
326 (while (< char ?
\177)
327 (aset table char
'character
)
328 (setq char
(1+ char
)))
329 ;; European 8-bit accentuated characters:
331 (while (< char ?
\400)
332 (aset table char
'non-terminal
)
333 (setq char
(1+ char
)))
334 ;; Override space characters:
335 (aset table ?
\013 'space
) ; [VT] vertical tab
336 (aset table ?
\n 'space
) ; [NL] linefeed
337 (aset table ?
\r 'space
) ; [CR] carriage return
338 (aset table ?
\t 'space
) ; [HT] horizontal tab
339 (aset table ?\
'space
) ; [SP] space
340 ;; Override form feed character:
341 (aset table ?
\f 'form-feed
) ; [FF] form feed
342 ;; Override other lexical characters:
343 (aset table ?_
'non-terminal
)
344 (aset table ?
\" 'double-terminal
)
345 (aset table ?
\' 'single-terminal
)
346 (aset table ?
\? 'special
)
347 (aset table ?
* 'repeat
)
348 (aset table ?
, 'catenate
)
349 (aset table ?-
'except
)
350 (aset table ?
= 'equal
)
351 (aset table ?\
) 'end-group
)
353 "Vector used to map characters to a lexical token.")
356 (defun ebnf-iso-initialize ()
357 "Initialize ISO EBNF token table."
358 (if ebnf-iso-alternative-p
359 ;; Override alternative lexical characters:
361 (aset ebnf-iso-token-table ?\
( 'left-parenthesis
)
362 (aset ebnf-iso-token-table ?\
[ 'character
)
363 (aset ebnf-iso-token-table ?\
] 'character
)
364 (aset ebnf-iso-token-table ?\
{ 'character
)
365 (aset ebnf-iso-token-table ?\
} 'character
)
366 (aset ebnf-iso-token-table ?|
'character
)
367 (aset ebnf-iso-token-table ?\
; 'character)
368 (aset ebnf-iso-token-table ?
/ 'slash
)
369 (aset ebnf-iso-token-table ?
! 'alternative
)
370 (aset ebnf-iso-token-table ?
: 'colon
)
371 (aset ebnf-iso-token-table ?.
'period
))
372 ;; Override standard lexical characters:
373 (aset ebnf-iso-token-table ?\
( 'begin-parenthesis
)
374 (aset ebnf-iso-token-table ?\
[ 'begin-optional
)
375 (aset ebnf-iso-token-table ?\
] 'end-optional
)
376 (aset ebnf-iso-token-table ?\
{ 'begin-zero-or-more
)
377 (aset ebnf-iso-token-table ?\
} 'end-zero-or-more
)
378 (aset ebnf-iso-token-table ?|
'alternative
)
379 (aset ebnf-iso-token-table ?\
; 'period)
380 (aset ebnf-iso-token-table ?
/ 'character
)
381 (aset ebnf-iso-token-table ?
! 'character
)
382 (aset ebnf-iso-token-table ?
: 'character
)
383 (aset ebnf-iso-token-table ?.
'character
)))
386 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
387 (defconst ebnf-iso-non-terminal-chars
388 (ebnf-range-regexp " 0-9A-Za-z_" ?
\240 ?
\377))
391 (defun ebnf-iso-lex ()
392 "Lexical analyser for ISO EBNF.
394 Return a lexical token.
396 See documentation for variable `ebnf-iso-lex'."
397 (if (>= (point) ebnf-limit
)
400 ;; skip spaces and comments
401 (while (if (> (following-char) 255)
405 (setq token
(aref ebnf-iso-token-table
(following-char)))
408 (skip-chars-forward " \013\n\r\t" ebnf-limit
)
409 (< (point) ebnf-limit
))
410 ((or (eq token
'begin-parenthesis
)
411 (eq token
'left-parenthesis
))
413 (if (/= (following-char) ?
*)
417 (ebnf-iso-skip-comment)
419 ((eq token
'form-feed
)
421 (setq ebnf-action
'form-feed
))
426 ((>= (point) ebnf-limit
)
430 (error "Illegal character"))
433 (setq ebnf-iso-lex
(ebnf-buffer-substring "0-9"))
435 ;; special: ?special?
437 (setq ebnf-iso-lex
(concat (and ebnf-special-show-delimiter
"?")
438 (ebnf-string " ->@-~" ?
\? "special")
439 (and ebnf-special-show-delimiter
"?")))
441 ;; terminal: "string"
442 ((eq token
'double-terminal
)
443 (setq ebnf-iso-lex
(ebnf-string " !#-~" ?
\" "terminal"))
445 ;; terminal: 'string'
446 ((eq token
'single-terminal
)
447 (setq ebnf-iso-lex
(ebnf-string " -&(-~" ?
\' "terminal"))
450 ((eq token
'non-terminal
)
454 (ebnf-buffer-substring ebnf-iso-non-terminal-chars
))))
455 (and ebnf-no-meta-identifier
456 (error "Exception sequence should not contain a meta identifier"))
458 ;; begin optional, begin list or begin group
459 ((eq token
'left-parenthesis
)
461 (cond ((= (following-char) ?
/)
464 ((= (following-char) ?
:)
470 ;; end optional or alternative
473 (if (/= (following-char) ?\
))
480 (if (/= (following-char) ?\
))
485 ((eq token
'begin-parenthesis
)
494 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
495 (defconst ebnf-iso-comment-chars
496 (ebnf-range-regexp "^*(\000-\010\016-\037" ?
\177 ?
\237))
499 (defun ebnf-iso-skip-comment ()
503 ((and ebnf-eps-executing
(= (following-char) ?\
[))
504 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
506 ((and ebnf-eps-executing
(= (following-char) ?\
]))
507 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
508 ;; any other action in comment
510 (setq ebnf-action
(aref ebnf-comment-table
(following-char))))
514 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit
)
515 (cond ((>= (point) ebnf-limit
)
516 (error "Missing end of comment: `*)'"))
517 ((= (following-char) ?
*)
518 (skip-chars-forward "*" ebnf-limit
)
519 (when (= (following-char) ?\
))
522 (setq pair
(1- pair
))))
523 ((= (following-char) ?\
()
524 (skip-chars-forward "(" ebnf-limit
)
525 (when (= (following-char) ?
*)
526 ;; beginning of comment
528 (setq pair
(1+ pair
))))
530 (error "Illegal character"))
534 (defun ebnf-iso-eps-filename ()
536 (buffer-substring-no-properties
538 (let ((chars (concat ebnf-iso-comment-chars
"\n"))
541 (skip-chars-forward chars ebnf-limit
)
543 (cond ((>= (point) ebnf-limit
)
545 ((= (following-char) ?
*)
546 (skip-chars-forward "*" ebnf-limit
)
547 (if (/= (following-char) ?\
))
551 ((= (following-char) ?\
()
553 (if (/= (following-char) ?
*)
563 (defun ebnf-iso-normalize (str)
564 (if (not ebnf-iso-normalize-p
)
566 (let ((len (length str
))
569 ;; count exceeding spaces
571 (if (/= (aref str stri
) ?\
)
572 (setq stri
(1+ stri
))
573 (setq stri
(1+ stri
))
574 (while (and (< stri len
) (= (aref str stri
) ?\
))
576 spaces
(1+ spaces
)))))
578 ;; no exceeding space
580 ;; at least one exceeding space
581 (let ((new (make-string (- len spaces
) ?\
))
583 ;; eliminate exceeding spaces
586 (if (/= (aref str stri
) ?\
)
588 (aset new newi
(aref str stri
))
591 (aset new newi
(aref str stri
))
594 (while (and (> spaces
0) (= (aref str stri
) ?\
))
596 spaces
(1- spaces
)))))
597 ;; remaining is normalized
599 (aset new newi
(aref str stri
))
605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
611 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
612 ;;; ebnf-iso.el ends here