Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / progmodes / ebnf-iso.el
blobe1bd1a1d54fb9a7b89c7002fb3a9078b558f7b8a
1 ;;; ebnf-iso.el --- parser for ISO EBNF
3 ;; Copyright (C) 1999-2014 Free Software Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Old-Version: 1.9
9 ;; Package: ebnf2ps
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;;; Commentary:
28 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This is part of ebnf2ps package.
33 ;; This package defines a parser for ISO EBNF.
35 ;; See ebnf2ps.el for documentation.
38 ;; ISO EBNF Syntax
39 ;; ---------------
41 ;; See the URL:
42 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
43 ;; ("International Standard of the ISO EBNF Notation").
46 ;; ISO EBNF = syntax rule, {syntax rule};
48 ;; syntax rule = meta identifier, '=', definition list, ';';
50 ;; definition list = single definition, {'|', single definition};
52 ;; single definition = term, {',', term};
54 ;; term = factor, ['-', exception];
56 ;; exception = factor (* without <meta identifier> *);
58 ;; factor = [integer, '*'], primary;
60 ;; primary = optional sequence | repeated sequence | special sequence
61 ;; | grouped sequence | meta identifier | terminal string
62 ;; | empty;
64 ;; empty = ;
66 ;; optional sequence = '[', definition list, ']';
68 ;; repeated sequence = '{', definition list, '}';
70 ;; grouped sequence = '(', definition list, ')';
72 ;; terminal string = "'", character - "'", {character - "'"}, "'"
73 ;; | '"', character - '"', {character - '"'}, '"';
75 ;; special sequence = '?', {character - '?'}, '?';
77 ;; meta identifier = letter, { letter | decimal digit | ' ' };
79 ;; integer = decimal digit, {decimal digit};
81 ;; comment = '(*', {comment symbol}, '*)';
83 ;; comment symbol = comment (* <== NESTED COMMENT *)
84 ;; | terminal string | special sequence | character;
86 ;; letter = ? A-Z a-z ?;
88 ;; decimal digit = ? 0-9 ?;
90 ;; character = letter | decimal digit
91 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
92 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
93 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96 ;; There is also the following alternative representation:
98 ;; STANDARD ALTERNATIVE
99 ;; | ==> / or !
100 ;; [ ==> (/
101 ;; ] ==> /)
102 ;; { ==> (:
103 ;; } ==> :)
104 ;; ; ==> .
107 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
108 ;; -------------------------------------------------
110 ;; ISO EBNF accepts the characters given by <character> production above,
111 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
112 ;; (^L), any other characters are invalid. But ebnf2ps accepts also the
113 ;; european 8-bit accentuated characters (from \240 to \377) and underscore
114 ;; (_).
117 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
119 ;;; Code:
122 (require 'ebnf-otz)
125 (defvar ebnf-iso-lex nil
126 "Value returned by `ebnf-iso-lex' function.")
129 (defvar ebnf-no-meta-identifier nil
130 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
133 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;; Syntactic analyzer
137 ;;; ISO EBNF = syntax rule, {syntax rule};
139 (defun ebnf-iso-parser (start)
140 "ISO EBNF parser."
141 (let ((total (+ (- ebnf-limit start) 1))
142 (bias (1- start))
143 (origin (point))
144 syntax-list token rule)
145 (goto-char start)
146 (setq token (ebnf-iso-lex))
147 (and (eq token 'end-of-input)
148 (error "Invalid ISO EBNF file format"))
149 (while (not (eq token 'end-of-input))
150 (ebnf-message-float
151 "Parsing...%s%%"
152 (/ (* (- (point) bias) 100.0) total))
153 (setq token (ebnf-iso-syntax-rule token)
154 rule (cdr token)
155 token (car token))
156 (or (ebnf-add-empty-rule-list rule)
157 (setq syntax-list (cons rule syntax-list))))
158 (goto-char origin)
159 syntax-list))
162 ;;; syntax rule = meta identifier, '=', definition list, ';';
164 (defun ebnf-iso-syntax-rule (token)
165 (let ((header ebnf-iso-lex)
166 (action ebnf-action)
167 body)
168 (setq ebnf-action nil)
169 (or (eq token 'non-terminal)
170 (error "Invalid meta identifier syntax rule"))
171 (or (eq (ebnf-iso-lex) 'equal)
172 (error "Invalid syntax rule: missing `='"))
173 (setq body (ebnf-iso-definition-list))
174 (or (eq (car body) 'period)
175 (error "Invalid syntax rule: missing `;' or `.'"))
176 (setq body (cdr body))
177 (ebnf-eps-add-production header)
178 (cons (ebnf-iso-lex)
179 (ebnf-make-production header body action))))
182 ;;; definition list = single definition, {'|', single definition};
184 (defun ebnf-iso-definition-list ()
185 (let (body sequence)
186 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
187 'alternative)
188 (setq sequence (cdr sequence)
189 body (cons sequence body)))
190 (ebnf-token-alternative body sequence)))
193 ;;; single definition = term, {',', term};
195 (defun ebnf-iso-single-definition ()
196 (let (token seq term)
197 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
198 token (car term)
199 term (cdr term))
200 (eq token 'catenate))
201 (setq seq (cons term seq)))
202 (cons token
203 (ebnf-token-sequence (if term
204 (cons term seq)
205 seq)))))
208 ;;; term = factor, ['-', exception];
210 ;;; exception = factor (* without <meta identifier> *);
212 (defun ebnf-iso-term (token)
213 (let ((factor (ebnf-iso-factor token)))
214 (if (not (eq (car factor) 'except))
215 ;; factor
216 factor
217 ;; factor - exception
218 (let ((ebnf-no-meta-identifier t))
219 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
222 ;;; factor = [integer, '*'], primary;
224 (defun ebnf-iso-factor (token)
225 (if (eq token 'integer)
226 (let ((times ebnf-iso-lex))
227 (or (eq (ebnf-iso-lex) 'repeat)
228 (error "Missing `*'"))
229 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
230 (ebnf-iso-primary token)))
233 ;;; primary = optional sequence | repeated sequence | special sequence
234 ;;; | grouped sequence | meta identifier | terminal string
235 ;;; | empty;
237 ;;; empty = ;
239 ;;; optional sequence = '[', definition list, ']';
241 ;;; repeated sequence = '{', definition list, '}';
243 ;;; grouped sequence = '(', definition list, ')';
245 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
246 ;;; | '"', character - '"', {character - '"'}, '"';
248 ;;; special sequence = '?', {character - '?'}, '?';
250 ;;; meta identifier = letter, {letter | decimal digit};
252 (defun ebnf-iso-primary (token)
253 (let ((primary
254 (cond
255 ;; terminal string
256 ((eq token 'terminal)
257 (ebnf-make-terminal ebnf-iso-lex))
258 ;; meta identifier
259 ((eq token 'non-terminal)
260 (ebnf-make-non-terminal ebnf-iso-lex))
261 ;; special sequence
262 ((eq token 'special)
263 (ebnf-make-special ebnf-iso-lex))
264 ;; grouped sequence
265 ((eq token 'begin-group)
266 (let ((body (ebnf-iso-definition-list)))
267 (or (eq (car body) 'end-group)
268 (error "Missing `)'"))
269 (cdr body)))
270 ;; optional sequence
271 ((eq token 'begin-optional)
272 (let ((body (ebnf-iso-definition-list)))
273 (or (eq (car body) 'end-optional)
274 (error "Missing `]' or `/)'"))
275 (ebnf-token-optional (cdr body))))
276 ;; repeated sequence
277 ((eq token 'begin-zero-or-more)
278 (let* ((body (ebnf-iso-definition-list))
279 (repeat (cdr body)))
280 (or (eq (car body) 'end-zero-or-more)
281 (error "Missing `}' or `:)'"))
282 (ebnf-make-zero-or-more repeat)))
283 ;; empty
285 nil)
287 (cons (if primary
288 (ebnf-iso-lex)
289 token)
290 primary)))
293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Lexical analyzer
297 (defconst ebnf-iso-token-table
298 ;; control character & 8-bit character are set to `error'
299 (let ((table (make-vector 256 'error))
300 (char ?\040))
301 ;; printable character
302 (while (< char ?\060)
303 (aset table char 'character)
304 (setq char (1+ char)))
305 ;; digits:
306 (while (< char ?\072)
307 (aset table char 'integer)
308 (setq char (1+ char)))
309 (while (< char ?\101)
310 (aset table char 'character)
311 (setq char (1+ char)))
312 ;; upper case letters:
313 (while (< char ?\133)
314 (aset table char 'non-terminal)
315 (setq char (1+ char)))
316 (while (< char ?\141)
317 (aset table char 'character)
318 (setq char (1+ char)))
319 ;; lower case letters:
320 (while (< char ?\173)
321 (aset table char 'non-terminal)
322 (setq char (1+ char)))
323 (while (< char ?\177)
324 (aset table char 'character)
325 (setq char (1+ char)))
326 ;; European 8-bit accentuated characters:
327 (setq char ?\240)
328 (while (< char ?\400)
329 (aset table char 'non-terminal)
330 (setq char (1+ char)))
331 ;; Override space characters:
332 (aset table ?\013 'space) ; [VT] vertical tab
333 (aset table ?\n 'space) ; [NL] linefeed
334 (aset table ?\r 'space) ; [CR] carriage return
335 (aset table ?\t 'space) ; [HT] horizontal tab
336 (aset table ?\ 'space) ; [SP] space
337 ;; Override form feed character:
338 (aset table ?\f 'form-feed) ; [FF] form feed
339 ;; Override other lexical characters:
340 (aset table ?_ 'non-terminal)
341 (aset table ?\" 'double-terminal)
342 (aset table ?\' 'single-terminal)
343 (aset table ?\? 'special)
344 (aset table ?* 'repeat)
345 (aset table ?, 'catenate)
346 (aset table ?- 'except)
347 (aset table ?= 'equal)
348 (aset table ?\) 'end-group)
349 table)
350 "Vector used to map characters to a lexical token.")
353 (defun ebnf-iso-initialize ()
354 "Initialize ISO EBNF token table."
355 (if ebnf-iso-alternative-p
356 ;; Override alternative lexical characters:
357 (progn
358 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
359 (aset ebnf-iso-token-table ?\[ 'character)
360 (aset ebnf-iso-token-table ?\] 'character)
361 (aset ebnf-iso-token-table ?\{ 'character)
362 (aset ebnf-iso-token-table ?\} 'character)
363 (aset ebnf-iso-token-table ?| 'character)
364 (aset ebnf-iso-token-table ?\; 'character)
365 (aset ebnf-iso-token-table ?/ 'slash)
366 (aset ebnf-iso-token-table ?! 'alternative)
367 (aset ebnf-iso-token-table ?: 'colon)
368 (aset ebnf-iso-token-table ?. 'period))
369 ;; Override standard lexical characters:
370 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
371 (aset ebnf-iso-token-table ?\[ 'begin-optional)
372 (aset ebnf-iso-token-table ?\] 'end-optional)
373 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
374 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
375 (aset ebnf-iso-token-table ?| 'alternative)
376 (aset ebnf-iso-token-table ?\; 'period)
377 (aset ebnf-iso-token-table ?/ 'character)
378 (aset ebnf-iso-token-table ?! 'character)
379 (aset ebnf-iso-token-table ?: 'character)
380 (aset ebnf-iso-token-table ?. 'character)))
383 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
384 (defconst ebnf-iso-non-terminal-chars
385 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
388 (defun ebnf-iso-lex ()
389 "Lexical analyzer for ISO EBNF.
391 Return a lexical token.
393 See documentation for variable `ebnf-iso-lex'."
394 (if (>= (point) ebnf-limit)
395 'end-of-input
396 (let (token)
397 ;; skip spaces and comments
398 (while (if (> (following-char) 255)
399 (progn
400 (setq token 'error)
401 nil)
402 (setq token (aref ebnf-iso-token-table (following-char)))
403 (cond
404 ((eq token 'space)
405 (skip-chars-forward " \013\n\r\t" ebnf-limit)
406 (< (point) ebnf-limit))
407 ((or (eq token 'begin-parenthesis)
408 (eq token 'left-parenthesis))
409 (forward-char)
410 (if (/= (following-char) ?*)
411 ;; no comment
413 ;; comment
414 (ebnf-iso-skip-comment)
416 ((eq token 'form-feed)
417 (forward-char)
418 (setq ebnf-action 'form-feed))
419 (t nil)
421 (cond
422 ;; end of input
423 ((>= (point) ebnf-limit)
424 'end-of-input)
425 ;; error
426 ((eq token 'error)
427 (error "Invalid character"))
428 ;; integer
429 ((eq token 'integer)
430 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
431 'integer)
432 ;; special: ?special?
433 ((eq token 'special)
434 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
435 (ebnf-string " ->@-~" ?\? "special")
436 (and ebnf-special-show-delimiter "?")))
437 'special)
438 ;; terminal: "string"
439 ((eq token 'double-terminal)
440 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
441 'terminal)
442 ;; terminal: 'string'
443 ((eq token 'single-terminal)
444 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
445 'terminal)
446 ;; non-terminal
447 ((eq token 'non-terminal)
448 (setq ebnf-iso-lex
449 (ebnf-iso-normalize
450 (ebnf-trim-right
451 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
452 (and ebnf-no-meta-identifier
453 (error "Exception sequence should not contain a meta identifier"))
454 'non-terminal)
455 ;; begin optional, begin list or begin group
456 ((eq token 'left-parenthesis)
457 (forward-char)
458 (cond ((= (following-char) ?/)
459 (forward-char)
460 'begin-optional)
461 ((= (following-char) ?:)
462 (forward-char)
463 'begin-zero-or-more)
465 'begin-group)
467 ;; end optional or alternative
468 ((eq token 'slash)
469 (forward-char)
470 (if (/= (following-char) ?\))
471 'alternative
472 (forward-char)
473 'end-optional))
474 ;; end list
475 ((eq token 'colon)
476 (forward-char)
477 (if (/= (following-char) ?\))
478 'character
479 (forward-char)
480 'end-zero-or-more))
481 ;; begin group
482 ((eq token 'begin-parenthesis)
483 'begin-group)
484 ;; miscellaneous
486 (forward-char)
487 token)
488 ))))
491 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
492 (defconst ebnf-iso-comment-chars
493 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
496 (defun ebnf-iso-skip-comment ()
497 (forward-char)
498 (cond
499 ;; open EPS file
500 ((and ebnf-eps-executing (= (following-char) ?\[))
501 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
502 ;; close EPS file
503 ((and ebnf-eps-executing (= (following-char) ?\]))
504 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
505 ;; EPS header
506 ((and ebnf-eps-executing (= (following-char) ?H))
507 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
508 ;; EPS footer
509 ((and ebnf-eps-executing (= (following-char) ?F))
510 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
511 ;; any other action in comment
513 (setq ebnf-action (aref ebnf-comment-table (following-char))))
515 (let ((pair 1))
516 (while (> pair 0)
517 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
518 (cond ((>= (point) ebnf-limit)
519 (error "Missing end of comment: `*)'"))
520 ((= (following-char) ?*)
521 (skip-chars-forward "*" ebnf-limit)
522 (when (= (following-char) ?\))
523 ;; end of comment
524 (forward-char)
525 (setq pair (1- pair))))
526 ((= (following-char) ?\()
527 (skip-chars-forward "(" ebnf-limit)
528 (when (= (following-char) ?*)
529 ;; beginning of comment
530 (forward-char)
531 (setq pair (1+ pair))))
533 (error "Invalid character"))
534 ))))
537 (defun ebnf-iso-eps-filename ()
538 (forward-char)
539 (buffer-substring-no-properties
540 (point)
541 (let ((chars (concat ebnf-iso-comment-chars "\n"))
542 found)
543 (while (not found)
544 (skip-chars-forward chars ebnf-limit)
545 (setq found
546 (cond ((>= (point) ebnf-limit)
547 (point))
548 ((= (following-char) ?*)
549 (skip-chars-forward "*" ebnf-limit)
550 (if (/= (following-char) ?\))
552 (backward-char)
553 (point)))
554 ((= (following-char) ?\()
555 (forward-char)
556 (if (/= (following-char) ?*)
558 (backward-char)
559 (point)))
561 (point))
563 found)))
566 (defun ebnf-iso-normalize (str)
567 (if (not ebnf-iso-normalize-p)
569 (let ((len (length str))
570 (stri 0)
571 (spaces 0))
572 ;; count exceeding spaces
573 (while (< stri len)
574 (if (/= (aref str stri) ?\ )
575 (setq stri (1+ stri))
576 (setq stri (1+ stri))
577 (while (and (< stri len) (= (aref str stri) ?\ ))
578 (setq stri (1+ stri)
579 spaces (1+ spaces)))))
580 (if (zerop spaces)
581 ;; no exceeding space
583 ;; at least one exceeding space
584 (let ((new (make-string (- len spaces) ?\ ))
585 (newi 0))
586 ;; eliminate exceeding spaces
587 (setq stri 0)
588 (while (> spaces 0)
589 (if (/= (aref str stri) ?\ )
590 (progn
591 (aset new newi (aref str stri))
592 (setq stri (1+ stri)
593 newi (1+ newi)))
594 (aset new newi (aref str stri))
595 (setq stri (1+ stri)
596 newi (1+ newi))
597 (while (and (> spaces 0) (= (aref str stri) ?\ ))
598 (setq stri (1+ stri)
599 spaces (1- spaces)))))
600 ;; remaining is normalized
601 (while (< stri len)
602 (aset new newi (aref str stri))
603 (setq stri (1+ stri)
604 newi (1+ newi)))
605 new)))))
608 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
611 (provide 'ebnf-iso)
614 ;;; ebnf-iso.el ends here