Merge branch 'master' into comment-cache
[emacs.git] / lisp / cedet / semantic / wisent / comp.el
blobcb19b1b861fe684a03266fcf2f549ef4ec519e7c
1 ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
3 ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2017 Free
4 ;; Software Foundation, Inc.
6 ;; Author: David Ponce <david@dponce.com>
7 ;; Maintainer: David Ponce <david@dponce.com>
8 ;; Created: 30 January 2002
9 ;; Keywords: syntax
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;;; Commentary:
28 ;; Grammar compiler that produces Wisent's LALR automatons.
30 ;; Wisent (the European Bison ;-) is an Elisp implementation of the
31 ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
32 ;; code of GNU Bison 1.28 & 1.31.
34 ;; For more details on the basic concepts for understanding Wisent,
35 ;; read the Bison manual ;)
37 ;; For more details on Wisent itself read the Wisent manual.
39 ;;; History:
42 ;;; Code:
43 (require 'semantic/wisent)
44 (eval-when-compile (require 'cl))
46 ;;;; -------------------
47 ;;;; Misc. useful things
48 ;;;; -------------------
50 ;; As much as possible I would like to keep the name of global
51 ;; variables used in Bison without polluting too much the Elisp global
52 ;; name space. Elisp dynamic binding allows that ;-)
54 ;; Here are simple macros to easily define and use set of variables
55 ;; bound locally, without all these "reference to free variable"
56 ;; compiler warnings!
58 (defmacro wisent-context-name (name)
59 "Return the context name from NAME."
60 `(if (and ,name (symbolp ,name))
61 (intern (format "wisent-context-%s" ,name))
62 (error "Invalid context name: %S" ,name)))
64 (defmacro wisent-context-bindings (name)
65 "Return the variables in context NAME."
66 `(symbol-value (wisent-context-name ,name)))
68 (defmacro wisent-defcontext (name &rest vars)
69 "Define a context NAME that will bind variables VARS."
70 (declare (indent 1))
71 (let* ((context (wisent-context-name name))
72 (declarations (mapcar #'(lambda (v) (list 'defvar v)) vars)))
73 `(progn
74 ,@declarations
75 (eval-when-compile
76 (defvar ,context ',vars)))))
78 (defmacro wisent-with-context (name &rest body)
79 "Bind variables in context NAME then eval BODY."
80 (declare (indent 1))
81 (let ((bindings (wisent-context-bindings name)))
82 `(progn
83 ,@(mapcar (lambda (binding) `(defvar ,(or (car-safe binding) binding)))
84 bindings)
85 (let* ,bindings
86 ,@body))))
88 ;; A naive implementation of data structures! But it suffice here ;-)
90 (defmacro wisent-struct (name &rest fields)
91 "Define a simple data structure called NAME.
92 Which contains data stored in FIELDS. FIELDS is a list of symbols
93 which are field names or pairs (FIELD INITIAL-VALUE) where
94 INITIAL-VALUE is a constant used as the initial value of FIELD when
95 the data structure is created. INITIAL-VALUE defaults to nil.
97 This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
98 set-able `set-NAME-FIELD' accessors."
99 (let ((size (length fields))
100 (i 0)
101 accors field sufx fun ivals)
102 (while (< i size)
103 (setq field (car fields)
104 fields (cdr fields))
105 (if (consp field)
106 (setq ivals (cons (cadr field) ivals)
107 field (car field))
108 (setq ivals (cons nil ivals)))
109 (setq sufx (format "%s-%s" name field)
110 fun (intern (format "%s" sufx))
111 accors (cons `(defmacro ,fun (s)
112 (list 'aref s ,i))
113 accors)
114 fun (intern (format "set-%s" sufx))
115 accors (cons `(defmacro ,fun (s v)
116 (list 'aset s ,i v))
117 accors)
118 i (1+ i)))
119 `(progn
120 (defmacro ,(intern (format "make-%s" name)) ()
121 (cons 'vector ',(nreverse ivals)))
122 ,@accors)))
123 (put 'wisent-struct 'lisp-indent-function 1)
125 ;; Other utilities
127 (defsubst wisent-pad-string (s n &optional left)
128 "Fill string S with spaces.
129 Return a new string of at least N characters. Insert spaces on right.
130 If optional LEFT is non-nil insert spaces on left."
131 (let ((i (length s)))
132 (if (< i n)
133 (if left
134 (concat (make-string (- n i) ?\ ) s)
135 (concat s (make-string (- n i) ?\ )))
136 s)))
138 ;;;; ------------------------
139 ;;;; Environment dependencies
140 ;;;; ------------------------
142 (defconst wisent-BITS-PER-WORD
143 (let ((i 1)
144 (do-shift (if (boundp 'most-positive-fixnum)
145 (lambda (i) (lsh most-positive-fixnum (- i)))
146 (lambda (i) (lsh 1 i)))))
147 (while (not (zerop (funcall do-shift i)))
148 (setq i (1+ i)))
151 (defsubst wisent-WORDSIZE (n)
152 "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
153 (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
155 (defsubst wisent-SETBIT (x i)
156 "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
157 (let ((k (/ i wisent-BITS-PER-WORD)))
158 (aset x k (logior (aref x k)
159 (lsh 1 (% i wisent-BITS-PER-WORD))))))
161 (defsubst wisent-RESETBIT (x i)
162 "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
163 (let ((k (/ i wisent-BITS-PER-WORD)))
164 (aset x k (logand (aref x k)
165 (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
167 (defsubst wisent-BITISSET (x i)
168 "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
169 (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
170 (lsh 1 (% i wisent-BITS-PER-WORD))))))
172 (defsubst wisent-noninteractive ()
173 "Return non-nil if running without interactive terminal."
174 (if (featurep 'xemacs)
175 (noninteractive)
176 noninteractive))
178 (defvar wisent-debug-flag nil
179 "Non-nil means enable some debug stuff.")
181 ;;;; --------------
182 ;;;; Logging/Output
183 ;;;; --------------
184 (defconst wisent-log-buffer-name "*wisent-log*"
185 "Name of the log buffer.")
187 (defvar wisent-new-log-flag nil
188 "Non-nil means to start a new report.")
190 (defcustom wisent-verbose-flag nil
191 "Non-nil means to report verbose information on generated parser."
192 :group 'wisent
193 :type 'boolean)
195 (defun wisent-toggle-verbose-flag ()
196 "Toggle whether to report verbose information on generated parser."
197 (interactive)
198 (setq wisent-verbose-flag (not wisent-verbose-flag))
199 (when (called-interactively-p 'interactive)
200 (message "Verbose report %sabled"
201 (if wisent-verbose-flag "en" "dis"))))
203 (defmacro wisent-log-buffer ()
204 "Return the log buffer.
205 Its name is defined in constant `wisent-log-buffer-name'."
206 `(get-buffer-create wisent-log-buffer-name))
208 (defmacro wisent-clear-log ()
209 "Delete the entire contents of the log buffer."
210 `(with-current-buffer (wisent-log-buffer)
211 (erase-buffer)))
213 (defvar byte-compile-current-file)
215 (defun wisent-source ()
216 "Return the current source file name or nil."
217 (let ((source (or (and (boundp 'byte-compile-current-file)
218 byte-compile-current-file)
219 load-file-name (buffer-file-name))))
220 (if source
221 (file-relative-name source))))
223 (defun wisent-new-log ()
224 "Start a new entry into the log buffer."
225 (setq wisent-new-log-flag nil)
226 (let ((text (format "\n\n*** Wisent %s - %s\n\n"
227 (or (wisent-source) (buffer-name))
228 (format-time-string "%Y-%m-%d %R"))))
229 (with-current-buffer (wisent-log-buffer)
230 (goto-char (point-max))
231 (insert text))))
233 (defsubst wisent-log (&rest args)
234 "Insert text into the log buffer.
235 `format-message' is applied to ARGS and the result string is inserted into the
236 log buffer returned by the function `wisent-log-buffer'."
237 (and wisent-new-log-flag (wisent-new-log))
238 (with-current-buffer (wisent-log-buffer)
239 (insert (apply #'format-message args))))
241 (defconst wisent-log-file "wisent.output"
242 "The log file.
243 Used when running without interactive terminal.")
245 (defun wisent-append-to-log-file ()
246 "Append contents of logging buffer to `wisent-log-file'."
247 (if (get-buffer wisent-log-buffer-name)
248 (condition-case err
249 (with-current-buffer (wisent-log-buffer)
250 (widen)
251 (if (> (point-max) (point-min))
252 (write-region (point-min) (point-max)
253 wisent-log-file t)))
254 (error
255 (message "*** %s" (error-message-string err))))))
257 ;;;; -----------------------------------
258 ;;;; Representation of the grammar rules
259 ;;;; -----------------------------------
261 ;; ntokens is the number of tokens, and nvars is the number of
262 ;; variables (nonterminals). nsyms is the total number, ntokens +
263 ;; nvars.
265 ;; Each symbol (either token or variable) receives a symbol number.
266 ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
267 ;; for variables. Symbol number zero is the end-of-input token. This
268 ;; token is counted in ntokens.
270 ;; The rules receive rule numbers 1 to nrules in the order they are
271 ;; written. Actions and guards are accessed via the rule number.
273 ;; The rules themselves are described by three arrays: rrhs, rlhs and
274 ;; ritem. rlhs[R] is the symbol number of the left hand side of rule
275 ;; R. The right hand side is stored as symbol numbers in a portion of
276 ;; ritem. rrhs[R] contains the index in ritem of the beginning of the
277 ;; portion for rule R.
279 ;; The length of the portion is one greater than the number of symbols
280 ;; in the rule's right hand side. The last element in the portion
281 ;; contains minus R, which identifies it as the end of a portion and
282 ;; says which rule it is for.
284 ;; The portions of ritem come in order of increasing rule number and
285 ;; are followed by an element which is nil to mark the end. nitems is
286 ;; the total length of ritem, not counting the final nil. Each
287 ;; element of ritem is called an "item" and its index in ritem is an
288 ;; item number.
290 ;; Item numbers are used in the finite state machine to represent
291 ;; places that parsing can get to.
293 ;; The vector rprec contains for each rule, the item number of the
294 ;; symbol giving its precedence level to this rule. The precedence
295 ;; level and associativity of each symbol is recorded in respectively
296 ;; the properties 'wisent--prec and 'wisent--assoc.
298 ;; Precedence levels are assigned in increasing order starting with 1
299 ;; so that numerically higher precedence values mean tighter binding
300 ;; as they ought to. nil as a symbol or rule's precedence means none
301 ;; is assigned.
303 (defcustom wisent-state-table-size 1009
304 "The size of the state table."
305 :type 'integer
306 :group 'wisent)
308 ;; These variables only exist locally in the function
309 ;; `wisent-compile-grammar' and are shared by all other nested
310 ;; callees.
311 (wisent-defcontext compile-grammar
312 F LA LAruleno accessing-symbol conflicts consistent default-prec
313 derives err-table fderives final-state first-reduction first-shift
314 first-state firsts from-state goto-map includes itemset nitemset
315 kernel-base kernel-end kernel-items last-reduction last-shift
316 last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
317 nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
318 reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
319 rcode ruleset rulesetsize shift-symbol shift-table shiftset
320 src-count src-total start-table state-table tags this-state to-state
321 tokensetsize ;; nb of words req. to hold a bit for each rule
322 varsetsize ;; nb of words req. to hold a bit for each variable
323 error-token-number start-symbol token-list var-list
324 N P V V1 nuseless-nonterminals nuseless-productions
325 ptable ;; symbols & characters properties
328 (defmacro wisent-ISTOKEN (s)
329 "Return non-nil if item number S defines a token (terminal).
330 That is if S < `ntokens'."
331 `(< ,s ntokens))
333 (defmacro wisent-ISVAR(s)
334 "Return non-nil if item number S defines a nonterminal.
335 That is if S >= `ntokens'."
336 `(>= ,s ntokens))
338 (defsubst wisent-tag (s)
339 "Return printable form of item number S."
340 (wisent-item-to-string (aref tags s)))
342 ;; Symbol and character properties
344 (defsubst wisent-put (object propname value)
345 "Store OBJECT's PROPNAME property with value VALUE.
346 Use `eq' to locate OBJECT."
347 (let ((entry (assq object ptable)))
348 (or entry (setq entry (list object) ptable (cons entry ptable)))
349 (setcdr entry (plist-put (cdr entry) propname value))))
351 (defsubst wisent-get (object propname)
352 "Return the value of OBJECT's PROPNAME property.
353 Use `eq' to locate OBJECT."
354 (plist-get (cdr (assq object ptable)) propname))
356 (defsubst wisent-item-number (x)
357 "Return the item number of symbol X."
358 (wisent-get x 'wisent--item-no))
360 (defsubst wisent-set-item-number (x n)
361 "Set the item number of symbol X to N."
362 (wisent-put x 'wisent--item-no n))
364 (defsubst wisent-assoc (x)
365 "Return the associativity of symbol X."
366 (wisent-get x 'wisent--assoc))
368 (defsubst wisent-set-assoc (x a)
369 "Set the associativity of symbol X to A."
370 (wisent-put x 'wisent--assoc a))
372 (defsubst wisent-prec (x)
373 "Return the precedence level of symbol X."
374 (wisent-get x 'wisent--prec))
376 (defsubst wisent-set-prec (x p)
377 "Set the precedence level of symbol X to P."
378 (wisent-put x 'wisent--prec p))
380 ;;;; ----------------------------------------------------------
381 ;;;; Type definitions for nondeterministic finite state machine
382 ;;;; ----------------------------------------------------------
384 ;; These type definitions are used to represent a nondeterministic
385 ;; finite state machine that parses the specified grammar. This
386 ;; information is generated by the function `wisent-generate-states'.
388 ;; Each state of the machine is described by a set of items --
389 ;; particular positions in particular rules -- that are the possible
390 ;; places where parsing could continue when the machine is in this
391 ;; state. These symbols at these items are the allowable inputs that
392 ;; can follow now.
394 ;; A core represents one state. States are numbered in the number
395 ;; field. When `wisent-generate-states' is finished, the starting
396 ;; state is state 0 and `nstates' is the number of states. (A
397 ;; transition to a state whose state number is `nstates' indicates
398 ;; termination.) All the cores are chained together and `first-state'
399 ;; points to the first one (state 0).
401 ;; For each state there is a particular symbol which must have been
402 ;; the last thing accepted to reach that state. It is the
403 ;; accessing-symbol of the core.
405 ;; Each core contains a vector of `nitems' items which are the indices
406 ;; in the `ritems' vector of the items that are selected in this
407 ;; state.
409 ;; The link field is used for chaining buckets that hash states by
410 ;; their itemsets. This is for recognizing equivalent states and
411 ;; combining them when the states are generated.
413 ;; The two types of transitions are shifts (push the lookahead token
414 ;; and read another) and reductions (combine the last n things on the
415 ;; stack via a rule, replace them with the symbol that the rule
416 ;; derives, and leave the lookahead token alone). When the states are
417 ;; generated, these transitions are represented in two other lists.
419 ;; Each shifts structure describes the possible shift transitions out
420 ;; of one state, the state whose number is in the number field. The
421 ;; shifts structures are linked through next and first-shift points to
422 ;; them. Each contains a vector of numbers of the states that shift
423 ;; transitions can go to. The accessing-symbol fields of those
424 ;; states' cores say what kind of input leads to them.
426 ;; A shift to state zero should be ignored. Conflict resolution
427 ;; deletes shifts by changing them to zero.
429 ;; Each reductions structure describes the possible reductions at the
430 ;; state whose number is in the number field. The data is a list of
431 ;; nreds rules, represented by their rule numbers. `first-reduction'
432 ;; points to the list of these structures.
434 ;; Conflict resolution can decide that certain tokens in certain
435 ;; states should explicitly be errors (for implementing %nonassoc).
436 ;; For each state, the tokens that are errors for this reason are
437 ;; recorded in an errs structure, which has the state number in its
438 ;; number field. The rest of the errs structure is full of token
439 ;; numbers.
441 ;; There is at least one shift transition present in state zero. It
442 ;; leads to a next-to-final state whose accessing-symbol is the
443 ;; grammar's start symbol. The next-to-final state has one shift to
444 ;; the final state, whose accessing-symbol is zero (end of input).
445 ;; The final state has one shift, which goes to the termination state
446 ;; (whose number is `nstates'-1).
447 ;; The reason for the extra state at the end is to placate the
448 ;; parser's strategy of making all decisions one token ahead of its
449 ;; actions.
451 (wisent-struct core
452 next ; -> core
453 link ; -> core
454 (number 0)
455 (accessing-symbol 0)
456 (nitems 0)
457 (items [0]))
459 (wisent-struct shifts
460 next ; -> shifts
461 (number 0)
462 (nshifts 0)
463 (shifts [0]))
465 (wisent-struct reductions
466 next ; -> reductions
467 (number 0)
468 (nreds 0)
469 (rules [0]))
471 (wisent-struct errs
472 (nerrs 0)
473 (errs [0]))
475 ;;;; --------------------------------------------------------
476 ;;;; Find unreachable terminals, nonterminals and productions
477 ;;;; --------------------------------------------------------
479 (defun wisent-bits-equal (L R n)
480 "Visit L and R and return non-nil if their first N elements are `='.
481 L and R must be vectors of integers."
482 (let* ((i (1- n))
483 (iseq t))
484 (while (and iseq (natnump i))
485 (setq iseq (= (aref L i) (aref R i))
486 i (1- i)))
487 iseq))
489 (defun wisent-nbits (i)
490 "Return number of bits set in integer I."
491 (let ((count 0))
492 (while (not (zerop i))
493 ;; i ^= (i & ((unsigned) (-(int) i)))
494 (setq i (logxor i (logand i (- i)))
495 count (1+ count)))
496 count))
498 (defun wisent-bits-size (S n)
499 "In vector S count the total of bits set in first N elements.
500 S must be a vector of integers."
501 (let* ((i (1- n))
502 (count 0))
503 (while (natnump i)
504 (setq count (+ count (wisent-nbits (aref S i)))
505 i (1- i)))
506 count))
508 (defun wisent-useful-production (i N0)
509 "Return non-nil if production I is in useful set N0."
510 (let* ((useful t)
511 (r (aref rrhs i))
513 (while (and useful (> (setq n (aref ritem r)) 0))
514 (if (wisent-ISVAR n)
515 (setq useful (wisent-BITISSET N0 (- n ntokens))))
516 (setq r (1+ r)))
517 useful))
519 (defun wisent-useless-nonterminals ()
520 "Find out which nonterminals are used."
521 (let (Np Ns i n break)
522 ;; N is set as built. Np is set being built this iteration. P is
523 ;; set of all productions which have a RHS all in N.
524 (setq n (wisent-WORDSIZE nvars)
525 Np (make-vector n 0))
527 ;; The set being computed is a set of nonterminals which can
528 ;; derive the empty string or strings consisting of all
529 ;; terminals. At each iteration a nonterminal is added to the set
530 ;; if there is a production with that nonterminal as its LHS for
531 ;; which all the nonterminals in its RHS are already in the set.
532 ;; Iterate until the set being computed remains unchanged. Any
533 ;; nonterminals not in the set at that point are useless in that
534 ;; they will never be used in deriving a sentence of the language.
536 ;; This iteration doesn't use any special traversal over the
537 ;; productions. A set is kept of all productions for which all
538 ;; the nonterminals in the RHS are in useful. Only productions
539 ;; not in this set are scanned on each iteration. At the end,
540 ;; this set is saved to be used when finding useful productions:
541 ;; only productions in this set will appear in the final grammar.
543 (while (not break)
544 (setq i (1- n))
545 (while (natnump i)
546 ;; Np[i] = N[i]
547 (aset Np i (aref N i))
548 (setq i (1- i)))
550 (setq i 1)
551 (while (<= i nrules)
552 (if (not (wisent-BITISSET P i))
553 (when (wisent-useful-production i N)
554 (wisent-SETBIT Np (- (aref rlhs i) ntokens))
555 (wisent-SETBIT P i)))
556 (setq i (1+ i)))
557 (if (wisent-bits-equal N Np n)
558 (setq break t)
559 (setq Ns Np
560 Np N
561 N Ns)))
562 (setq N Np)))
564 (defun wisent-inaccessible-symbols ()
565 "Find out which productions are reachable and which symbols are used."
566 ;; Starting with an empty set of productions and a set of symbols
567 ;; which only has the start symbol in it, iterate over all
568 ;; productions until the set of productions remains unchanged for an
569 ;; iteration. For each production which has a LHS in the set of
570 ;; reachable symbols, add the production to the set of reachable
571 ;; productions, and add all of the nonterminals in the RHS of the
572 ;; production to the set of reachable symbols.
574 ;; Consider only the (partially) reduced grammar which has only
575 ;; nonterminals in N and productions in P.
577 ;; The result is the set P of productions in the reduced grammar,
578 ;; and the set V of symbols in the reduced grammar.
580 ;; Although this algorithm also computes the set of terminals which
581 ;; are reachable, no terminal will be deleted from the grammar. Some
582 ;; terminals might not be in the grammar but might be generated by
583 ;; semantic routines, and so the user might want them available with
584 ;; specified numbers. (Is this true?) However, the non reachable
585 ;; terminals are printed (if running in verbose mode) so that the
586 ;; user can know.
587 (let (Vp Vs Pp i tt r n m break)
588 (setq n (wisent-WORDSIZE nsyms)
589 m (wisent-WORDSIZE (1+ nrules))
590 Vp (make-vector n 0)
591 Pp (make-vector m 0))
593 ;; If the start symbol isn't useful, then nothing will be useful.
594 (when (wisent-BITISSET N (- start-symbol ntokens))
595 (wisent-SETBIT V start-symbol)
596 (while (not break)
597 (setq i (1- n))
598 (while (natnump i)
599 (aset Vp i (aref V i))
600 (setq i (1- i)))
601 (setq i 1)
602 (while (<= i nrules)
603 (when (and (not (wisent-BITISSET Pp i))
604 (wisent-BITISSET P i)
605 (wisent-BITISSET V (aref rlhs i)))
606 (setq r (aref rrhs i))
607 (while (natnump (setq tt (aref ritem r)))
608 (if (or (wisent-ISTOKEN tt)
609 (wisent-BITISSET N (- tt ntokens)))
610 (wisent-SETBIT Vp tt))
611 (setq r (1+ r)))
612 (wisent-SETBIT Pp i))
613 (setq i (1+ i)))
614 (if (wisent-bits-equal V Vp n)
615 (setq break t)
616 (setq Vs Vp
617 Vp V
618 V Vs))))
619 (setq V Vp)
621 ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
622 (wisent-SETBIT V 0) ;; end-of-input token
623 (wisent-SETBIT V 1) ;; error token
624 (setq P Pp)
626 (setq nuseless-productions (- nrules (wisent-bits-size P m))
627 nuseless-nonterminals nvars
628 i ntokens)
629 (while (< i nsyms)
630 (if (wisent-BITISSET V i)
631 (setq nuseless-nonterminals (1- nuseless-nonterminals)))
632 (setq i (1+ i)))
634 ;; A token that was used in %prec should not be warned about.
635 (setq i 1)
636 (while (<= i nrules)
637 (if (aref rprec i)
638 (wisent-SETBIT V1 (aref rprec i)))
639 (setq i (1+ i)))
642 (defun wisent-reduce-grammar-tables ()
643 "Disable useless productions."
644 (if (> nuseless-productions 0)
645 (let ((pn 1))
646 (while (<= pn nrules)
647 (aset ruseful pn (wisent-BITISSET P pn))
648 (setq pn (1+ pn))))))
650 (defun wisent-nonterminals-reduce ()
651 "Remove useless nonterminals."
652 (let (i n r item nontermmap tags-sorted)
653 ;; Map the nonterminals to their new index: useful first, useless
654 ;; afterwards. Kept for later report.
655 (setq nontermmap (make-vector nvars 0)
656 n ntokens
657 i ntokens)
658 (while (< i nsyms)
659 (when (wisent-BITISSET V i)
660 (aset nontermmap (- i ntokens) n)
661 (setq n (1+ n)))
662 (setq i (1+ i)))
663 (setq i ntokens)
664 (while (< i nsyms)
665 (unless (wisent-BITISSET V i)
666 (aset nontermmap (- i ntokens) n)
667 (setq n (1+ n)))
668 (setq i (1+ i)))
669 ;; Shuffle elements of tables indexed by symbol number
670 (setq tags-sorted (make-vector nvars nil)
671 i ntokens)
672 (while (< i nsyms)
673 (setq n (aref nontermmap (- i ntokens)))
674 (aset tags-sorted (- n ntokens) (aref tags i))
675 (setq i (1+ i)))
676 (setq i ntokens)
677 (while (< i nsyms)
678 (aset tags i (aref tags-sorted (- i ntokens)))
679 (setq i (1+ i)))
680 ;; Replace all symbol numbers in valid data structures.
681 (setq i 1)
682 (while (<= i nrules)
683 (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
684 (setq i (1+ i)))
685 (setq r 0)
686 (while (setq item (aref ritem r))
687 (if (wisent-ISVAR item)
688 (aset ritem r (aref nontermmap (- item ntokens))))
689 (setq r (1+ r)))
690 (setq start-symbol (aref nontermmap (- start-symbol ntokens))
691 nsyms (- nsyms nuseless-nonterminals)
692 nvars (- nvars nuseless-nonterminals))
695 (defun wisent-total-useless ()
696 "Report number of useless nonterminals and productions."
697 (let* ((src (wisent-source))
698 (src (if src (concat " in " src) ""))
699 (msg (format "Grammar%s contains" src)))
700 (if (> nuseless-nonterminals 0)
701 (setq msg (format "%s %d useless nonterminal%s"
702 msg nuseless-nonterminals
703 (if (> nuseless-nonterminals 0) "s" ""))))
704 (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
705 (setq msg (format "%s and" msg)))
706 (if (> nuseless-productions 0)
707 (setq msg (format "%s %d useless rule%s"
708 msg nuseless-productions
709 (if (> nuseless-productions 0) "s" ""))))
710 (message msg)))
712 (defun wisent-reduce-grammar ()
713 "Find unreachable terminals, nonterminals and productions."
714 ;; Allocate the global sets used to compute the reduced grammar
715 (setq N (make-vector (wisent-WORDSIZE nvars) 0)
716 P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
717 V (make-vector (wisent-WORDSIZE nsyms) 0)
718 V1 (make-vector (wisent-WORDSIZE nsyms) 0)
719 nuseless-nonterminals 0
720 nuseless-productions 0)
722 (wisent-useless-nonterminals)
723 (wisent-inaccessible-symbols)
725 (when (> (+ nuseless-nonterminals nuseless-productions) 0)
726 (wisent-total-useless)
727 (or (wisent-BITISSET N (- start-symbol ntokens))
728 (error "Start symbol `%s' does not derive any sentence"
729 (wisent-tag start-symbol)))
730 (wisent-reduce-grammar-tables)
731 (if (> nuseless-nonterminals 0)
732 (wisent-nonterminals-reduce))))
734 (defun wisent-print-useless ()
735 "Output the detailed results of the reductions."
736 (let (i b r)
737 (when (> nuseless-nonterminals 0)
738 ;; Useless nonterminals have been moved after useful ones.
739 (wisent-log "\n\nUseless nonterminals:\n\n")
740 (setq i 0)
741 (while (< i nuseless-nonterminals)
742 (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
743 (setq i (1+ i))))
744 (setq b nil
745 i 0)
746 (while (< i ntokens)
747 (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
748 (or b
749 (wisent-log "\n\nTerminals which are not used:\n\n"))
750 (setq b t)
751 (wisent-log " %s\n" (wisent-tag i)))
752 (setq i (1+ i)))
753 (when (> nuseless-productions 0)
754 (wisent-log "\n\nUseless rules:\n\n")
755 (setq i 1)
756 (while (<= i nrules)
757 (unless (aref ruseful i)
758 (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
759 (wisent-log "%s:" (wisent-tag (aref rlhs i)))
760 (setq r (aref rrhs i))
761 (while (natnump (aref ritem r))
762 (wisent-log " %s" (wisent-tag (aref ritem r)))
763 (setq r (1+ r)))
764 (wisent-log ";\n"))
765 (setq i (1+ i))))
766 (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
767 (wisent-log "\n\n"))
770 ;;;; -----------------------------
771 ;;;; Match rules with nonterminals
772 ;;;; -----------------------------
774 (defun wisent-set-derives ()
775 "Find, for each variable (nonterminal), which rules can derive it.
776 It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
777 a list of rule numbers, terminated with -1."
778 (let (i lhs p q dset delts)
779 (setq dset (make-vector nvars nil)
780 delts (make-vector (1+ nrules) 0))
781 (setq p 0 ;; p = delts
782 i nrules)
783 (while (> i 0)
784 (when (aref ruseful i)
785 (setq lhs (aref rlhs i))
786 ;; p->next = dset[lhs];
787 ;; p->value = i;
788 (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
789 (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
790 (setq p (1+ p)) ;; p++
792 (setq i (1- i)))
794 (setq derives (make-vector nvars nil)
795 i ntokens)
797 (while (< i nsyms)
798 (setq q nil
799 p (aref dset (- i ntokens))) ;; p = dset[i]
801 (while p
802 (setq p (aref delts p)
803 q (cons (car p) q) ;;q++ = p->value
804 p (cdr p))) ;; p = p->next
805 (setq q (nreverse (cons -1 q))) ;; *q++ = -1
806 (aset derives (- i ntokens) q) ;; derives[i] = q
807 (setq i (1+ i)))
810 ;;;; --------------------------------------------------------
811 ;;;; Find which nonterminals can expand into the null string.
812 ;;;; --------------------------------------------------------
814 (defun wisent-print-nullable ()
815 "Print NULLABLE."
816 (let (i)
817 (wisent-log "NULLABLE\n")
818 (setq i ntokens)
819 (while (< i nsyms)
820 (wisent-log "\t%s: %s\n" (wisent-tag i)
821 (if (aref nullable (- i ntokens))
822 "yes" : "no"))
823 (setq i (1+ i)))
824 (wisent-log "\n\n")))
826 (defun wisent-set-nullable ()
827 "Set up NULLABLE.
828 A vector saying which nonterminals can expand into the null string.
829 NULLABLE[i - NTOKENS] is nil if symbol I can do so."
830 (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
831 (setq squeue (make-vector nvars 0)
832 rcount (make-vector (1+ nrules) 0)
833 rsets (make-vector nvars nil) ;; - ntokens
834 relts (make-vector (+ nitems nvars 1) nil)
835 nullable (make-vector nvars nil)) ;; - ntokens
836 (setq s1 0 s2 0 ;; s1 = s2 = squeue
837 p 0 ;; p = relts
838 ruleno 1)
839 (while (<= ruleno nrules)
840 (when (aref ruseful ruleno)
841 (if (> (aref ritem (aref rrhs ruleno)) 0)
842 (progn
843 ;; This rule has a non empty RHS.
844 (setq any-tokens nil
845 r (aref rrhs ruleno))
846 (while (> (aref ritem r) 0)
847 (if (wisent-ISTOKEN (aref ritem r))
848 (setq any-tokens t))
849 (setq r (1+ r)))
851 ;; This rule has only nonterminals: schedule it for the
852 ;; second pass.
853 (unless any-tokens
854 (setq r (aref rrhs ruleno))
855 (while (> (setq item (aref ritem r)) 0)
856 (aset rcount ruleno (1+ (aref rcount ruleno)))
857 ;; p->next = rsets[item];
858 ;; p->value = ruleno;
859 (aset relts p (cons ruleno (aref rsets (- item ntokens))))
860 ;; rsets[item] = p;
861 (aset rsets (- item ntokens) p)
862 (setq p (1+ p)
863 r (1+ r)))))
864 ;; This rule has an empty RHS.
865 ;; assert (ritem[rrhs[ruleno]] == -ruleno)
866 (when (and (aref ruseful ruleno)
867 (setq item (aref rlhs ruleno))
868 (not (aref nullable (- item ntokens))))
869 (aset nullable (- item ntokens) t)
870 (aset squeue s2 item)
871 (setq s2 (1+ s2)))
874 (setq ruleno (1+ ruleno)))
876 (while (< s1 s2)
877 ;; p = rsets[*s1++]
878 (setq p (aref rsets (- (aref squeue s1) ntokens))
879 s1 (1+ s1))
880 (while p
881 (setq p (aref relts p)
882 ruleno (car p)
883 p (cdr p)) ;; p = p->next
884 ;; if (--rcount[ruleno] == 0)
885 (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
886 (setq item (aref rlhs ruleno))
887 (aset nullable (- item ntokens) t)
888 (aset squeue s2 item)
889 (setq s2 (1+ s2)))))
891 (if wisent-debug-flag
892 (wisent-print-nullable))
895 ;;;; -----------
896 ;;;; Subroutines
897 ;;;; -----------
899 (defun wisent-print-fderives ()
900 "Print FDERIVES."
901 (let (i j rp)
902 (wisent-log "\n\n\nFDERIVES\n")
903 (setq i ntokens)
904 (while (< i nsyms)
905 (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
906 (setq rp (aref fderives (- i ntokens))
907 j 0)
908 (while (<= j nrules)
909 (if (wisent-BITISSET rp j)
910 (wisent-log " %d\n" j))
911 (setq j (1+ j)))
912 (setq i (1+ i)))))
914 (defun wisent-set-fderives ()
915 "Set up FDERIVES.
916 An NVARS by NRULES matrix of bits indicating which rules can help
917 derive the beginning of the data for each nonterminal. For example,
918 if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
919 of the rules for deriving symbol 8 is rule 4, then the
920 [5 - NTOKENS, 4] bit in FDERIVES is set."
921 (let (i j k)
922 (setq fderives (make-vector nvars nil))
923 (setq i 0)
924 (while (< i nvars)
925 (aset fderives i (make-vector rulesetsize 0))
926 (setq i (1+ i)))
928 (wisent-set-firsts)
930 (setq i ntokens)
931 (while (< i nsyms)
932 (setq j ntokens)
933 (while (< j nsyms)
934 ;; if (BITISSET (FIRSTS (i), j - ntokens))
935 (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
936 (setq k (aref derives (- j ntokens)))
937 (while (> (car k) 0) ;; derives[j][k] > 0
938 ;; SETBIT (FDERIVES (i), derives[j][k]);
939 (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
940 (setq k (cdr k))))
941 (setq j (1+ j)))
942 (setq i (1+ i)))
944 (if wisent-debug-flag
945 (wisent-print-fderives))
948 (defun wisent-print-firsts ()
949 "Print FIRSTS."
950 (let (i j v)
951 (wisent-log "\n\n\nFIRSTS\n\n")
952 (setq i ntokens)
953 (while (< i nsyms)
954 (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
955 (setq v (aref firsts (- i ntokens))
956 j 0)
957 (while (< j nvars)
958 (if (wisent-BITISSET v j)
959 (wisent-log "\t\t%d (%s)\n"
960 (+ j ntokens) (wisent-tag (+ j ntokens))))
961 (setq j (1+ j)))
962 (setq i (1+ i)))))
964 (defun wisent-TC (R n)
965 "Transitive closure.
966 Given R an N by N matrix of bits, modify its contents to be the
967 transitive closure of what was given."
968 (let (i j k)
969 ;; R (J, I) && R (I, K) => R (J, K).
970 ;; I *must* be the outer loop.
971 (setq i 0)
972 (while (< i n)
973 (setq j 0)
974 (while (< j n)
975 (when (wisent-BITISSET (aref R j) i)
976 (setq k 0)
977 (while (< k n)
978 (if (wisent-BITISSET (aref R i) k)
979 (wisent-SETBIT (aref R j) k))
980 (setq k (1+ k))))
981 (setq j (1+ j)))
982 (setq i (1+ i)))))
984 (defun wisent-RTC (R n)
985 "Reflexive Transitive Closure.
986 Same as `wisent-TC' and then set all the bits on the diagonal of R, an
987 N by N matrix of bits."
988 (let (i)
989 (wisent-TC R n)
990 (setq i 0)
991 (while (< i n)
992 (wisent-SETBIT (aref R i) i)
993 (setq i (1+ i)))))
995 (defun wisent-set-firsts ()
996 "Set up FIRSTS.
997 An NVARS by NVARS bit matrix indicating which items can represent the
998 beginning of the input corresponding to which other items. For
999 example, if some rule expands symbol 5 into the sequence of symbols 8
1000 3 20, the symbol 8 can be the beginning of the data for symbol 5, so
1001 the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
1002 (let (row symbol sp rowsize i)
1003 (setq rowsize (wisent-WORDSIZE nvars)
1004 varsetsize rowsize
1005 firsts (make-vector nvars nil)
1006 i 0)
1007 (while (< i nvars)
1008 (aset firsts i (make-vector rowsize 0))
1009 (setq i (1+ i)))
1011 (setq row 0 ;; row = firsts
1012 i ntokens)
1013 (while (< i nsyms)
1014 (setq sp (aref derives (- i ntokens)))
1015 (while (>= (car sp) 0)
1016 (setq symbol (aref ritem (aref rrhs (car sp)))
1017 sp (cdr sp))
1018 (when (wisent-ISVAR symbol)
1019 (setq symbol (- symbol ntokens))
1020 (wisent-SETBIT (aref firsts row) symbol)
1022 (setq row (1+ row)
1023 i (1+ i)))
1025 (wisent-RTC firsts nvars)
1027 (if wisent-debug-flag
1028 (wisent-print-firsts))
1031 (defun wisent-initialize-closure (n)
1032 "Allocate the ITEMSET and RULESET vectors.
1033 And precompute useful data so that `wisent-closure' can be called.
1034 N is the number of elements to allocate for ITEMSET."
1035 (setq itemset (make-vector n 0)
1036 rulesetsize (wisent-WORDSIZE (1+ nrules))
1037 ruleset (make-vector rulesetsize 0))
1039 (wisent-set-fderives))
1041 (defun wisent-print-closure ()
1042 "Print ITEMSET."
1043 (let (i)
1044 (wisent-log "\n\nclosure n = %d\n\n" nitemset)
1045 (setq i 0) ;; isp = itemset
1046 (while (< i nitemset)
1047 (wisent-log " %d\n" (aref itemset i))
1048 (setq i (1+ i)))))
1050 (defun wisent-closure (core n)
1051 "Set up RULESET and ITEMSET for the transitions out of CORE state.
1052 Given a vector of item numbers items, of length N, set up RULESET and
1053 ITEMSET to indicate what rules could be run and which items could be
1054 accepted when those items are the active ones.
1056 RULESET contains a bit for each rule. `wisent-closure' sets the bits
1057 for all rules which could potentially describe the next input to be
1058 read.
1060 ITEMSET is a vector of item numbers; NITEMSET is the number of items
1061 in ITEMSET. `wisent-closure' places there the indices of all items
1062 which represent units of input that could arrive next."
1063 (let (c r v symbol ruleno itemno)
1064 (if (zerop n)
1065 (progn
1066 (setq r 0
1067 v (aref fderives (- start-symbol ntokens)))
1068 (while (< r rulesetsize)
1069 ;; ruleset[r] = FDERIVES (start-symbol)[r];
1070 (aset ruleset r (aref v r))
1071 (setq r (1+ r)))
1073 (fillarray ruleset 0)
1074 (setq c 0)
1075 (while (< c n)
1076 (setq symbol (aref ritem (aref core c)))
1077 (when (wisent-ISVAR symbol)
1078 (setq r 0
1079 v (aref fderives (- symbol ntokens)))
1080 (while (< r rulesetsize)
1081 ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
1082 (aset ruleset r (logior (aref ruleset r) (aref v r)))
1083 (setq r (1+ r))))
1084 (setq c (1+ c)))
1086 (setq nitemset 0
1088 ruleno 0
1089 r (* rulesetsize wisent-BITS-PER-WORD))
1090 (while (< ruleno r)
1091 (when (wisent-BITISSET ruleset ruleno)
1092 (setq itemno (aref rrhs ruleno))
1093 (while (and (< c n) (< (aref core c) itemno))
1094 (aset itemset nitemset (aref core c))
1095 (setq nitemset (1+ nitemset)
1096 c (1+ c)))
1097 (aset itemset nitemset itemno)
1098 (setq nitemset (1+ nitemset)))
1099 (setq ruleno (1+ ruleno)))
1101 (while (< c n)
1102 (aset itemset nitemset (aref core c))
1103 (setq nitemset (1+ nitemset)
1104 c (1+ c)))
1106 (if wisent-debug-flag
1107 (wisent-print-closure))
1110 ;;;; --------------------------------------------------
1111 ;;;; Generate the nondeterministic finite state machine
1112 ;;;; --------------------------------------------------
1114 (defun wisent-allocate-itemsets ()
1115 "Allocate storage for itemsets."
1116 (let (symbol i count symbol-count)
1117 ;; Count the number of occurrences of all the symbols in RITEMS.
1118 ;; Note that useless productions (hence useless nonterminals) are
1119 ;; browsed too, hence we need to allocate room for _all_ the
1120 ;; symbols.
1121 (setq count 0
1122 symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
1123 i 0)
1124 (while (setq symbol (aref ritem i))
1125 (when (> symbol 0)
1126 (setq count (1+ count))
1127 (aset symbol-count symbol (1+ (aref symbol-count symbol))))
1128 (setq i (1+ i)))
1129 ;; See comments before `wisent-new-itemsets'. All the vectors of
1130 ;; items live inside kernel-items. The number of active items
1131 ;; after some symbol cannot be more than the number of times that
1132 ;; symbol appears as an item, which is symbol-count[symbol]. We
1133 ;; allocate that much space for each symbol.
1134 (setq kernel-base (make-vector nsyms nil)
1135 kernel-items (make-vector count 0)
1136 count 0
1137 i 0)
1138 (while (< i nsyms)
1139 (aset kernel-base i count)
1140 (setq count (+ count (aref symbol-count i))
1141 i (1+ i)))
1142 (setq shift-symbol symbol-count
1143 kernel-end (make-vector nsyms nil))
1146 (defun wisent-allocate-storage ()
1147 "Allocate storage for the state machine."
1148 (wisent-allocate-itemsets)
1149 (setq shiftset (make-vector nsyms 0)
1150 redset (make-vector (1+ nrules) 0)
1151 state-table (make-vector wisent-state-table-size nil)))
1153 (defun wisent-new-itemsets ()
1154 "Find which symbols can be shifted in the current state.
1155 And for each one record which items would be active after that shift.
1156 Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
1157 symbols that can be shifted. For each symbol in the grammar,
1158 KERNEL-BASE[symbol] points to a vector of item numbers activated if
1159 that symbol is shifted, and KERNEL-END[symbol] points after the end of
1160 that vector."
1161 (let (i shiftcount isp ksp symbol)
1162 (fillarray kernel-end nil)
1163 (setq shiftcount 0
1164 isp 0)
1165 (while (< isp nitemset)
1166 (setq i (aref itemset isp)
1167 isp (1+ isp)
1168 symbol (aref ritem i))
1169 (when (> symbol 0)
1170 (setq ksp (aref kernel-end symbol))
1171 (when (not ksp)
1172 ;; shift-symbol[shiftcount++] = symbol;
1173 (aset shift-symbol shiftcount symbol)
1174 (setq shiftcount (1+ shiftcount)
1175 ksp (aref kernel-base symbol)))
1176 ;; *ksp++ = i + 1;
1177 (aset kernel-items ksp (1+ i))
1178 (setq ksp (1+ ksp))
1179 (aset kernel-end symbol ksp)))
1180 (setq nshifts shiftcount)))
1182 (defun wisent-new-state (symbol)
1183 "Create a new state for those items, if necessary.
1184 SYMBOL is the core accessing-symbol.
1185 Subroutine of `wisent-get-state'."
1186 (let (n p isp1 isp2 iend items)
1187 (setq isp1 (aref kernel-base symbol)
1188 iend (aref kernel-end symbol)
1189 n (- iend isp1)
1190 p (make-core)
1191 items (make-vector n 0))
1192 (set-core-accessing-symbol p symbol)
1193 (set-core-number p nstates)
1194 (set-core-nitems p n)
1195 (set-core-items p items)
1196 (setq isp2 0) ;; isp2 = p->items
1197 (while (< isp1 iend)
1198 ;; *isp2++ = *isp1++;
1199 (aset items isp2 (aref kernel-items isp1))
1200 (setq isp1 (1+ isp1)
1201 isp2 (1+ isp2)))
1202 (set-core-next last-state p)
1203 (setq last-state p
1204 nstates (1+ nstates))
1207 (defun wisent-get-state (symbol)
1208 "Find the state we would get to by shifting SYMBOL.
1209 Return the state number for the state we would get to (from the
1210 current state) by shifting SYMBOL. Create a new state if no
1211 equivalent one exists already. Used by `wisent-append-states'."
1212 (let (key isp1 isp2 iend sp sp2 found n)
1213 (setq isp1 (aref kernel-base symbol)
1214 iend (aref kernel-end symbol)
1215 n (- iend isp1)
1216 key 0)
1217 ;; Add up the target state's active item numbers to get a hash key
1218 (while (< isp1 iend)
1219 (setq key (+ key (aref kernel-items isp1))
1220 isp1 (1+ isp1)))
1221 (setq key (% key wisent-state-table-size)
1222 sp (aref state-table key))
1223 (if sp
1224 (progn
1225 (setq found nil)
1226 (while (not found)
1227 (when (= (core-nitems sp) n)
1228 (setq found t
1229 isp1 (aref kernel-base symbol)
1230 ;; isp2 = sp->items;
1231 sp2 (core-items sp)
1232 isp2 0)
1234 (while (and found (< isp1 iend))
1235 ;; if (*isp1++ != *isp2++)
1236 (if (not (= (aref kernel-items isp1)
1237 (aref sp2 isp2)))
1238 (setq found nil))
1239 (setq isp1 (1+ isp1)
1240 isp2 (1+ isp2))))
1241 (if (not found)
1242 (if (core-link sp)
1243 (setq sp (core-link sp))
1244 ;; sp = sp->link = new-state(symbol)
1245 (setq sp (set-core-link sp (wisent-new-state symbol))
1246 found t)))))
1247 ;; bucket is empty
1248 ;; state-table[key] = sp = new-state(symbol)
1249 (setq sp (wisent-new-state symbol))
1250 (aset state-table key sp))
1251 ;; return (sp->number);
1252 (core-number sp)))
1254 (defun wisent-append-states ()
1255 "Find or create the core structures for states.
1256 Use the information computed by `wisent-new-itemsets' to find the
1257 state numbers reached by each shift transition from the current state.
1258 SHIFTSET is set up as a vector of state numbers of those states."
1259 (let (i j symbol)
1260 ;; First sort shift-symbol into increasing order
1261 (setq i 1)
1262 (while (< i nshifts)
1263 (setq symbol (aref shift-symbol i)
1264 j i)
1265 (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
1266 (aset shift-symbol j (aref shift-symbol (1- j)))
1267 (setq j (1- j)))
1268 (aset shift-symbol j symbol)
1269 (setq i (1+ i)))
1270 (setq i 0)
1271 (while (< i nshifts)
1272 (setq symbol (aref shift-symbol i))
1273 (aset shiftset i (wisent-get-state symbol))
1274 (setq i (1+ i)))
1277 (defun wisent-initialize-states ()
1278 "Initialize states."
1279 (let ((p (make-core)))
1280 (setq first-state p
1281 last-state p
1282 this-state p
1283 nstates 1)))
1285 (defun wisent-save-shifts ()
1286 "Save the NSHIFTS of SHIFTSET into the current linked list."
1287 (let (p i shifts)
1288 (setq p (make-shifts)
1289 shifts (make-vector nshifts 0)
1290 i 0)
1291 (set-shifts-number p (core-number this-state))
1292 (set-shifts-nshifts p nshifts)
1293 (set-shifts-shifts p shifts)
1294 (while (< i nshifts)
1295 ;; (p->shifts)[i] = shiftset[i];
1296 (aset shifts i (aref shiftset i))
1297 (setq i (1+ i)))
1299 (if last-shift
1300 (set-shifts-next last-shift p)
1301 (setq first-shift p))
1302 (setq last-shift p)))
1304 (defun wisent-insert-start-shift ()
1305 "Create the next-to-final state.
1306 That is the state to which a shift has already been made in the
1307 initial state. Subroutine of `wisent-augment-automaton'."
1308 (let (statep sp)
1309 (setq statep (make-core))
1310 (set-core-number statep nstates)
1311 (set-core-accessing-symbol statep start-symbol)
1312 (set-core-next last-state statep)
1313 (setq last-state statep)
1314 ;; Make a shift from this state to (what will be) the final state.
1315 (setq sp (make-shifts))
1316 (set-shifts-number sp nstates)
1317 (setq nstates (1+ nstates))
1318 (set-shifts-nshifts sp 1)
1319 (set-shifts-shifts sp (vector nstates))
1320 (set-shifts-next last-shift sp)
1321 (setq last-shift sp)))
1323 (defun wisent-augment-automaton ()
1324 "Set up initial and final states as parser wants them.
1325 Make sure that the initial state has a shift that accepts the
1326 grammar's start symbol and goes to the next-to-final state, which has
1327 a shift going to the final state, which has a shift to the termination
1328 state. Create such states and shifts if they don't happen to exist
1329 already."
1330 (let (i k statep sp sp2 sp1 shifts)
1331 (setq sp first-shift)
1332 (if sp
1333 (progn
1334 (if (zerop (shifts-number sp))
1335 (progn
1336 (setq k (shifts-nshifts sp)
1337 statep (core-next first-state))
1338 ;; The states reached by shifts from first-state are
1339 ;; numbered 1...K. Look for one reached by
1340 ;; START-SYMBOL.
1341 (while (and (< (core-accessing-symbol statep) start-symbol)
1342 (< (core-number statep) k))
1343 (setq statep (core-next statep)))
1344 (if (= (core-accessing-symbol statep) start-symbol)
1345 (progn
1346 ;; We already have a next-to-final state. Make
1347 ;; sure it has a shift to what will be the final
1348 ;; state.
1349 (setq k (core-number statep))
1350 (while (and sp (< (shifts-number sp) k))
1351 (setq sp1 sp
1352 sp (shifts-next sp)))
1353 (if (and sp (= (shifts-number sp) k))
1354 (progn
1355 (setq i (shifts-nshifts sp)
1356 sp2 (make-shifts)
1357 shifts (make-vector (1+ i) 0))
1358 (set-shifts-number sp2 k)
1359 (set-shifts-nshifts sp2 (1+ i))
1360 (set-shifts-shifts sp2 shifts)
1361 (aset shifts 0 nstates)
1362 (while (> i 0)
1363 ;; sp2->shifts[i] = sp->shifts[i - 1];
1364 (aset shifts i (aref (shifts-shifts sp) (1- i)))
1365 (setq i (1- i)))
1366 ;; Patch sp2 into the chain of shifts in
1367 ;; place of sp, following sp1.
1368 (set-shifts-next sp2 (shifts-next sp))
1369 (set-shifts-next sp1 sp2)
1370 (if (eq sp last-shift)
1371 (setq last-shift sp2))
1373 (setq sp2 (make-shifts))
1374 (set-shifts-number sp2 k)
1375 (set-shifts-nshifts sp2 1)
1376 (set-shifts-shifts sp2 (vector nstates))
1377 ;; Patch sp2 into the chain of shifts between
1378 ;; sp1 and sp.
1379 (set-shifts-next sp2 sp)
1380 (set-shifts-next sp1 sp2)
1381 (if (not sp)
1382 (setq last-shift sp2))
1385 ;; There is no next-to-final state as yet.
1386 ;; Add one more shift in FIRST-SHIFT, going to the
1387 ;; next-to-final state (yet to be made).
1388 (setq sp first-shift
1389 sp2 (make-shifts)
1390 i (shifts-nshifts sp)
1391 shifts (make-vector (1+ i) 0))
1392 (set-shifts-nshifts sp2 (1+ i))
1393 (set-shifts-shifts sp2 shifts)
1394 ;; Stick this shift into the vector at the proper place.
1395 (setq statep (core-next first-state)
1397 i 0)
1398 (while (< i (shifts-nshifts sp))
1399 (when (and (> (core-accessing-symbol statep) start-symbol)
1400 (= i k))
1401 (aset shifts k nstates)
1402 (setq k (1+ k)))
1403 (aset shifts k (aref (shifts-shifts sp) i))
1404 (setq statep (core-next statep))
1405 (setq i (1+ i)
1406 k (1+ k)))
1407 (when (= i k)
1408 (aset shifts k nstates)
1409 (setq k (1+ k)))
1410 ;; Patch sp2 into the chain of shifts in place of
1411 ;; sp, at the beginning.
1412 (set-shifts-next sp2 (shifts-next sp))
1413 (setq first-shift sp2)
1414 (if (eq last-shift sp)
1415 (setq last-shift sp2))
1416 ;; Create the next-to-final state, with shift to
1417 ;; what will be the final state.
1418 (wisent-insert-start-shift)))
1419 ;; The initial state didn't even have any shifts. Give it
1420 ;; one shift, to the next-to-final state.
1421 (setq sp (make-shifts))
1422 (set-shifts-nshifts sp 1)
1423 (set-shifts-shifts sp (vector nstates))
1424 ;; Patch sp into the chain of shifts at the beginning.
1425 (set-shifts-next sp first-shift)
1426 (setq first-shift sp)
1427 ;; Create the next-to-final state, with shift to what will
1428 ;; be the final state.
1429 (wisent-insert-start-shift)))
1430 ;; There are no shifts for any state. Make one shift, from the
1431 ;; initial state to the next-to-final state.
1432 (setq sp (make-shifts))
1433 (set-shifts-nshifts sp 1)
1434 (set-shifts-shifts sp (vector nstates))
1435 ;; Initialize the chain of shifts with sp.
1436 (setq first-shift sp
1437 last-shift sp)
1438 ;; Create the next-to-final state, with shift to what will be
1439 ;; the final state.
1440 (wisent-insert-start-shift))
1441 ;; Make the final state--the one that follows a shift from the
1442 ;; next-to-final state. The symbol for that shift is 0
1443 ;; (end-of-file).
1444 (setq statep (make-core))
1445 (set-core-number statep nstates)
1446 (set-core-next last-state statep)
1447 (setq last-state statep)
1448 ;; Make the shift from the final state to the termination state.
1449 (setq sp (make-shifts))
1450 (set-shifts-number sp nstates)
1451 (setq nstates (1+ nstates))
1452 (set-shifts-nshifts sp 1)
1453 (set-shifts-shifts sp (vector nstates))
1454 (set-shifts-next last-shift sp)
1455 (setq last-shift sp)
1456 ;; Note that the variable FINAL-STATE refers to what we sometimes
1457 ;; call the termination state.
1458 (setq final-state nstates)
1459 ;; Make the termination state.
1460 (setq statep (make-core))
1461 (set-core-number statep nstates)
1462 (setq nstates (1+ nstates))
1463 (set-core-next last-state statep)
1464 (setq last-state statep)))
1466 (defun wisent-save-reductions ()
1467 "Make a reductions structure.
1468 Find which rules can be used for reduction transitions from the
1469 current state and make a reductions structure for the state to record
1470 their rule numbers."
1471 (let (i item count p rules)
1472 ;; Find and count the active items that represent ends of rules.
1473 (setq count 0
1474 i 0)
1475 (while (< i nitemset)
1476 (setq item (aref ritem (aref itemset i)))
1477 (when (< item 0)
1478 (aset redset count (- item))
1479 (setq count (1+ count)))
1480 (setq i (1+ i)))
1481 ;; Make a reductions structure and copy the data into it.
1482 (when (> count 0)
1483 (setq p (make-reductions)
1484 rules (make-vector count 0))
1485 (set-reductions-number p (core-number this-state))
1486 (set-reductions-nreds p count)
1487 (set-reductions-rules p rules)
1488 (setq i 0)
1489 (while (< i count)
1490 ;; (p->rules)[i] = redset[i]
1491 (aset rules i (aref redset i))
1492 (setq i (1+ i)))
1493 (if last-reduction
1494 (set-reductions-next last-reduction p)
1495 (setq first-reduction p))
1496 (setq last-reduction p))))
1498 (defun wisent-generate-states ()
1499 "Compute the nondeterministic finite state machine from the grammar."
1500 (wisent-allocate-storage)
1501 (wisent-initialize-closure nitems)
1502 (wisent-initialize-states)
1503 (while this-state
1504 ;; Set up RULESET and ITEMSET for the transitions out of this
1505 ;; state. RULESET gets a 1 bit for each rule that could reduce
1506 ;; now. ITEMSET gets a vector of all the items that could be
1507 ;; accepted next.
1508 (wisent-closure (core-items this-state) (core-nitems this-state))
1509 ;; Record the reductions allowed out of this state.
1510 (wisent-save-reductions)
1511 ;; Find the itemsets of the states that shifts can reach.
1512 (wisent-new-itemsets)
1513 ;; Find or create the core structures for those states.
1514 (wisent-append-states)
1515 ;; Create the shifts structures for the shifts to those states,
1516 ;; now that the state numbers transitioning to are known.
1517 (if (> nshifts 0)
1518 (wisent-save-shifts))
1519 ;; States are queued when they are created; process them all.
1520 (setq this-state (core-next this-state)))
1521 ;; Set up initial and final states as parser wants them.
1522 (wisent-augment-automaton))
1524 ;;;; ---------------------------
1525 ;;;; Compute look-ahead criteria
1526 ;;;; ---------------------------
1528 ;; Compute how to make the finite state machine deterministic; find
1529 ;; which rules need lookahead in each state, and which lookahead
1530 ;; tokens they accept.
1532 ;; `wisent-lalr', the entry point, builds these data structures:
1534 ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
1535 ;; which accepts a variable (a nonterminal). NGOTOS is the number of
1536 ;; such transitions.
1537 ;; FROM-STATE[t] is the state number which a transition leads from and
1538 ;; TO-STATE[t] is the state number it leads to.
1539 ;; All the transitions that accept a particular variable are grouped
1540 ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
1541 ;; TO-STATE of the first of them.
1543 ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
1544 ;; to do in state s.
1546 ;; LARULENO is a vector which records the rules that need lookahead in
1547 ;; various states. The elements of LARULENO that apply to state s are
1548 ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
1549 ;; of LARULENO is a rule number.
1551 ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
1552 ;; specify both a rule and a state where the rule might be applied.
1553 ;; LA is a LR by NTOKENS matrix of bits.
1554 ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
1555 ;; appropriate state when the next token is symbol i.
1556 ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
1558 (wisent-defcontext digraph
1559 INDEX R VERTICES
1560 infinity top)
1562 (defun wisent-traverse (i)
1563 "Traverse I."
1564 (let (j k height Ri Fi break)
1565 (setq top (1+ top)
1566 height top)
1567 (aset VERTICES top i) ;; VERTICES[++top] = i
1568 (aset INDEX i top) ;; INDEX[i] = height = top
1570 (setq Ri (aref R i))
1571 (when Ri
1572 (setq j 0)
1573 (while (>= (aref Ri j) 0)
1574 (if (zerop (aref INDEX (aref Ri j)))
1575 (wisent-traverse (aref Ri j)))
1576 ;; if (INDEX[i] > INDEX[R[i][j]])
1577 (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
1578 ;; INDEX[i] = INDEX[R[i][j]];
1579 (aset INDEX i (aref INDEX (aref Ri j))))
1580 (setq Fi (aref F i)
1581 k 0)
1582 (while (< k tokensetsize)
1583 ;; F (i)[k] |= F (R[i][j])[k];
1584 (aset Fi k (logior (aref Fi k)
1585 (aref (aref F (aref Ri j)) k)))
1586 (setq k (1+ k)))
1587 (setq j (1+ j))))
1589 (when (= (aref INDEX i) height)
1590 (setq break nil)
1591 (while (not break)
1592 (setq j (aref VERTICES top) ;; j = VERTICES[top--]
1593 top (1- top))
1594 (aset INDEX j infinity)
1595 (if (= i j)
1596 (setq break t)
1597 (setq k 0)
1598 (while (< k tokensetsize)
1599 ;; F (j)[k] = F (i)[k];
1600 (aset (aref F j) k (aref (aref F i) k))
1601 (setq k (1+ k))))))
1604 (defun wisent-digraph (relation)
1605 "Digraph RELATION."
1606 (wisent-with-context digraph
1607 (setq infinity (+ ngotos 2)
1608 INDEX (make-vector (1+ ngotos) 0)
1609 VERTICES (make-vector (1+ ngotos) 0)
1610 top 0
1611 R relation)
1612 (let ((i 0))
1613 (while (< i ngotos)
1614 (if (and (= (aref INDEX i) 0) (aref R i))
1615 (wisent-traverse i))
1616 (setq i (1+ i))))))
1618 (defun wisent-set-state-table ()
1619 "Build state table."
1620 (let (sp)
1621 (setq state-table (make-vector nstates nil)
1622 sp first-state)
1623 (while sp
1624 (aset state-table (core-number sp) sp)
1625 (setq sp (core-next sp)))))
1627 (defun wisent-set-accessing-symbol ()
1628 "Build accessing symbol table."
1629 (let (sp)
1630 (setq accessing-symbol (make-vector nstates 0)
1631 sp first-state)
1632 (while sp
1633 (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
1634 (setq sp (core-next sp)))))
1636 (defun wisent-set-shift-table ()
1637 "Build shift table."
1638 (let (sp)
1639 (setq shift-table (make-vector nstates nil)
1640 sp first-shift)
1641 (while sp
1642 (aset shift-table (shifts-number sp) sp)
1643 (setq sp (shifts-next sp)))))
1645 (defun wisent-set-reduction-table ()
1646 "Build reduction table."
1647 (let (rp)
1648 (setq reduction-table (make-vector nstates nil)
1649 rp first-reduction)
1650 (while rp
1651 (aset reduction-table (reductions-number rp) rp)
1652 (setq rp (reductions-next rp)))))
1654 (defun wisent-set-maxrhs ()
1655 "Setup MAXRHS length."
1656 (let (i len max)
1657 (setq len 0
1658 max 0
1659 i 0)
1660 (while (aref ritem i)
1661 (if (> (aref ritem i) 0)
1662 (setq len (1+ len))
1663 (if (> len max)
1664 (setq max len))
1665 (setq len 0))
1666 (setq i (1+ i)))
1667 (setq maxrhs max)))
1669 (defun wisent-initialize-LA ()
1670 "Set up LA."
1671 (let (i j k count rp sp np v)
1672 (setq consistent (make-vector nstates nil)
1673 lookaheads (make-vector (1+ nstates) 0)
1674 count 0
1675 i 0)
1676 (while (< i nstates)
1677 (aset lookaheads i count)
1678 (setq rp (aref reduction-table i)
1679 sp (aref shift-table i))
1680 ;; if (rp &&
1681 ;; (rp->nreds > 1
1682 ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
1683 (if (and rp
1684 (or (> (reductions-nreds rp) 1)
1685 (and sp
1686 (not (wisent-ISVAR
1687 (aref accessing-symbol
1688 (aref (shifts-shifts sp) 0)))))))
1689 (setq count (+ count (reductions-nreds rp)))
1690 (aset consistent i t))
1692 (when sp
1693 (setq k 0
1694 j (shifts-nshifts sp)
1695 v (shifts-shifts sp))
1696 (while (< k j)
1697 (when (= (aref accessing-symbol (aref v k))
1698 error-token-number)
1699 (aset consistent i nil)
1700 (setq k j)) ;; break
1701 (setq k (1+ k))))
1702 (setq i (1+ i)))
1704 (aset lookaheads nstates count)
1706 (if (zerop count)
1707 (progn
1708 (setq LA (make-vector 1 nil)
1709 LAruleno (make-vector 1 0)
1710 lookback (make-vector 1 nil)))
1711 (setq LA (make-vector count nil)
1712 LAruleno (make-vector count 0)
1713 lookback (make-vector count nil)))
1714 (setq i 0 j (length LA))
1715 (while (< i j)
1716 (aset LA i (make-vector tokensetsize 0))
1717 (setq i (1+ i)))
1719 (setq np 0
1720 i 0)
1721 (while (< i nstates)
1722 (when (not (aref consistent i))
1723 (setq rp (aref reduction-table i))
1724 (when rp
1725 (setq j 0
1726 k (reductions-nreds rp)
1727 v (reductions-rules rp))
1728 (while (< j k)
1729 (aset LAruleno np (aref v j))
1730 (setq np (1+ np)
1731 j (1+ j)))))
1732 (setq i (1+ i)))))
1734 (defun wisent-set-goto-map ()
1735 "Set up GOTO-MAP."
1736 (let (sp i j symbol k temp-map state1 state2 v)
1737 (setq goto-map (make-vector (1+ nvars) 0)
1738 temp-map (make-vector (1+ nvars) 0))
1740 (setq ngotos 0
1741 sp first-shift)
1742 (while sp
1743 (setq i (1- (shifts-nshifts sp))
1744 v (shifts-shifts sp))
1745 (while (>= i 0)
1746 (setq symbol (aref accessing-symbol (aref v i)))
1747 (if (wisent-ISTOKEN symbol)
1748 (setq i 0) ;; break
1749 (setq ngotos (1+ ngotos))
1750 ;; goto-map[symbol]++;
1751 (aset goto-map (- symbol ntokens)
1752 (1+ (aref goto-map (- symbol ntokens)))))
1753 (setq i (1- i)))
1754 (setq sp (shifts-next sp)))
1756 (setq k 0
1757 i ntokens
1758 j 0)
1759 (while (< i nsyms)
1760 (aset temp-map j k)
1761 (setq k (+ k (aref goto-map j))
1762 i (1+ i)
1763 j (1+ j)))
1764 (setq i ntokens
1765 j 0)
1766 (while (< i nsyms)
1767 (aset goto-map j (aref temp-map j))
1768 (setq i (1+ i)
1769 j (1+ j)))
1770 ;; goto-map[nsyms] = ngotos;
1771 ;; temp-map[nsyms] = ngotos;
1772 (aset goto-map j ngotos)
1773 (aset temp-map j ngotos)
1775 (setq from-state (make-vector ngotos 0)
1776 to-state (make-vector ngotos 0)
1777 sp first-shift)
1778 (while sp
1779 (setq state1 (shifts-number sp)
1780 v (shifts-shifts sp)
1781 i (1- (shifts-nshifts sp)))
1782 (while (>= i 0)
1783 (setq state2 (aref v i)
1784 symbol (aref accessing-symbol state2))
1785 (if (wisent-ISTOKEN symbol)
1786 (setq i 0) ;; break
1787 ;; k = temp-map[symbol]++;
1788 (setq k (aref temp-map (- symbol ntokens)))
1789 (aset temp-map (- symbol ntokens) (1+ k))
1790 (aset from-state k state1)
1791 (aset to-state k state2))
1792 (setq i (1- i)))
1793 (setq sp (shifts-next sp)))
1796 (defun wisent-map-goto (state symbol)
1797 "Map a STATE/SYMBOL pair into its numeric representation."
1798 (let (high low middle s result)
1799 ;; low = goto-map[symbol];
1800 ;; high = goto-map[symbol + 1] - 1;
1801 (setq low (aref goto-map (- symbol ntokens))
1802 high (1- (aref goto-map (- (1+ symbol) ntokens))))
1803 (while (and (not result) (<= low high))
1804 (setq middle (/ (+ low high) 2)
1805 s (aref from-state middle))
1806 (cond
1807 ((= s state)
1808 (setq result middle))
1809 ((< s state)
1810 (setq low (1+ middle)))
1812 (setq high (1- middle)))))
1813 (or result
1814 (error "Internal error in `wisent-map-goto'"))
1817 (defun wisent-initialize-F ()
1818 "Set up F."
1819 (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
1820 (setq F (make-vector ngotos nil)
1821 i 0)
1822 (while (< i ngotos)
1823 (aset F i (make-vector tokensetsize 0))
1824 (setq i (1+ i)))
1826 (setq reads (make-vector ngotos nil)
1827 edge (make-vector (1+ ngotos) 0)
1828 nedges 0
1829 rowp 0 ;; rowp = F
1830 i 0)
1831 (while (< i ngotos)
1832 (setq stateno (aref to-state i)
1833 sp (aref shift-table stateno))
1834 (when sp
1835 (setq k (shifts-nshifts sp)
1836 v (shifts-shifts sp)
1838 break nil)
1839 (while (and (not break) (< j k))
1840 ;; symbol = accessing-symbol[sp->shifts[j]];
1841 (setq symbol (aref accessing-symbol (aref v j)))
1842 (if (wisent-ISVAR symbol)
1843 (setq break t) ;; break
1844 (wisent-SETBIT (aref F rowp) symbol)
1845 (setq j (1+ j))))
1847 (while (< j k)
1848 ;; symbol = accessing-symbol[sp->shifts[j]];
1849 (setq symbol (aref accessing-symbol (aref v j)))
1850 (when (aref nullable (- symbol ntokens))
1851 (aset edge nedges (wisent-map-goto stateno symbol))
1852 (setq nedges (1+ nedges)))
1853 (setq j (1+ j)))
1855 (when (> nedges 0)
1856 ;; reads[i] = rp = NEW2(nedges + 1, short);
1857 (setq rp (make-vector (1+ nedges) 0)
1858 j 0)
1859 (aset reads i rp)
1860 (while (< j nedges)
1861 ;; rp[j] = edge[j];
1862 (aset rp j (aref edge j))
1863 (setq j (1+ j)))
1864 (aset rp nedges -1)
1865 (setq nedges 0)))
1866 (setq rowp (1+ rowp))
1867 (setq i (1+ i)))
1868 (wisent-digraph reads)
1871 (defun wisent-add-lookback-edge (stateno ruleno gotono)
1872 "Add a lookback edge.
1873 STATENO, RULENO, GOTONO are self-explanatory."
1874 (let (i k found)
1875 (setq i (aref lookaheads stateno)
1876 k (aref lookaheads (1+ stateno))
1877 found nil)
1878 (while (and (not found) (< i k))
1879 (if (= (aref LAruleno i) ruleno)
1880 (setq found t)
1881 (setq i (1+ i))))
1883 (or found
1884 (error "Internal error in `wisent-add-lookback-edge'"))
1886 ;; value . next
1887 ;; lookback[i] = (gotono . lookback[i])
1888 (aset lookback i (cons gotono (aref lookback i)))))
1890 (defun wisent-transpose (R-arg n)
1891 "Return the transpose of R-ARG, of size N.
1892 Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
1893 a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
1894 terminated list of the I such as NUM is in R-ARG[I]."
1895 (let (i j new-R end-R nedges v sp)
1896 (setq new-R (make-vector n nil)
1897 end-R (make-vector n nil)
1898 nedges (make-vector n 0))
1900 ;; Count.
1901 (setq i 0)
1902 (while (< i n)
1903 (setq v (aref R-arg i))
1904 (when v
1905 (setq j 0)
1906 (while (>= (aref v j) 0)
1907 (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
1908 (setq j (1+ j))))
1909 (setq i (1+ i)))
1911 ;; Allocate.
1912 (setq i 0)
1913 (while (< i n)
1914 (when (> (aref nedges i) 0)
1915 (setq sp (make-vector (1+ (aref nedges i)) 0))
1916 (aset sp (aref nedges i) -1)
1917 (aset new-R i sp)
1918 (aset end-R i 0))
1919 (setq i (1+ i)))
1921 ;; Store.
1922 (setq i 0)
1923 (while (< i n)
1924 (setq v (aref R-arg i))
1925 (when v
1926 (setq j 0)
1927 (while (>= (aref v j) 0)
1928 (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
1929 (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
1930 (setq j (1+ j))))
1931 (setq i (1+ i)))
1933 new-R))
1935 (defun wisent-build-relations ()
1936 "Build relations."
1937 (let (i j k rulep rp sp length nedges done state1 stateno
1938 symbol1 symbol2 edge states v)
1939 (setq includes (make-vector ngotos nil)
1940 edge (make-vector (1+ ngotos) 0)
1941 states (make-vector (1+ maxrhs) 0)
1942 i 0)
1944 (while (< i ngotos)
1945 (setq nedges 0
1946 state1 (aref from-state i)
1947 symbol1 (aref accessing-symbol (aref to-state i))
1948 rulep (aref derives (- symbol1 ntokens)))
1950 (while (> (car rulep) 0)
1951 (aset states 0 state1)
1952 (setq length 1
1953 stateno state1
1954 rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
1955 (while (> (aref ritem rp) 0) ;; *rp > 0
1956 (setq symbol2 (aref ritem rp)
1957 sp (aref shift-table stateno)
1958 k (shifts-nshifts sp)
1959 v (shifts-shifts sp)
1960 j 0)
1961 (while (< j k)
1962 (setq stateno (aref v j))
1963 (if (= (aref accessing-symbol stateno) symbol2)
1964 (setq j k) ;; break
1965 (setq j (1+ j))))
1966 ;; states[length++] = stateno;
1967 (aset states length stateno)
1968 (setq length (1+ length))
1969 (setq rp (1+ rp)))
1971 (if (not (aref consistent stateno))
1972 (wisent-add-lookback-edge stateno (car rulep) i))
1974 (setq length (1- length)
1975 done nil)
1976 (while (not done)
1977 (setq done t
1978 rp (1- rp))
1979 (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
1980 ;; stateno = states[--length];
1981 (setq length (1- length)
1982 stateno (aref states length))
1983 (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
1984 (setq nedges (1+ nedges))
1985 (if (aref nullable (- (aref ritem rp) ntokens))
1986 (setq done nil))))
1987 (setq rulep (cdr rulep)))
1989 (when (> nedges 0)
1990 (setq v (make-vector (1+ nedges) 0)
1991 j 0)
1992 (aset includes i v)
1993 (while (< j nedges)
1994 (aset v j (aref edge j))
1995 (setq j (1+ j)))
1996 (aset v nedges -1))
1997 (setq i (1+ i)))
1999 (setq includes (wisent-transpose includes ngotos))
2002 (defun wisent-compute-FOLLOWS ()
2003 "Compute follows."
2004 (wisent-digraph includes))
2006 (defun wisent-compute-lookaheads ()
2007 "Compute lookaheads."
2008 (let (i j n v1 v2 sp)
2009 (setq n (aref lookaheads nstates)
2010 i 0)
2011 (while (< i n)
2012 (setq sp (aref lookback i))
2013 (while sp
2014 (setq v1 (aref LA i)
2015 v2 (aref F (car sp))
2016 j 0)
2017 (while (< j tokensetsize)
2018 ;; LA (i)[j] |= F (sp->value)[j]
2019 (aset v1 j (logior (aref v1 j) (aref v2 j)))
2020 (setq j (1+ j)))
2021 (setq sp (cdr sp)))
2022 (setq i (1+ i)))))
2024 (defun wisent-lalr ()
2025 "Make the nondeterministic finite state machine deterministic."
2026 (setq tokensetsize (wisent-WORDSIZE ntokens))
2027 (wisent-set-state-table)
2028 (wisent-set-accessing-symbol)
2029 (wisent-set-shift-table)
2030 (wisent-set-reduction-table)
2031 (wisent-set-maxrhs)
2032 (wisent-initialize-LA)
2033 (wisent-set-goto-map)
2034 (wisent-initialize-F)
2035 (wisent-build-relations)
2036 (wisent-compute-FOLLOWS)
2037 (wisent-compute-lookaheads))
2039 ;;;; -----------------------------------------------
2040 ;;;; Find and resolve or report look-ahead conflicts
2041 ;;;; -----------------------------------------------
2043 (defsubst wisent-log-resolution (state LAno token resolution)
2044 "Log a shift-reduce conflict resolution.
2045 In specified STATE between rule pointed by lookahead number LANO and
2046 TOKEN, resolved as RESOLUTION."
2047 (if (or wisent-verbose-flag wisent-debug-flag)
2048 (wisent-log
2049 "Conflict in state %d between rule %d and token %s resolved as %s.\n"
2050 state (aref LAruleno LAno) (wisent-tag token) resolution)))
2052 (defun wisent-flush-shift (state token)
2053 "Turn off the shift recorded in the specified STATE for TOKEN.
2054 Used when we resolve a shift-reduce conflict in favor of the reduction."
2055 (let (shiftp i k v)
2056 (when (setq shiftp (aref shift-table state))
2057 (setq k (shifts-nshifts shiftp)
2058 v (shifts-shifts shiftp)
2059 i 0)
2060 (while (< i k)
2061 (if (and (not (zerop (aref v i)))
2062 (= token (aref accessing-symbol (aref v i))))
2063 (aset v i 0))
2064 (setq i (1+ i))))))
2066 (defun wisent-resolve-sr-conflict (state lookaheadnum)
2067 "Attempt to resolve shift-reduce conflict for one rule.
2068 Resolve by means of precedence declarations. The conflict occurred in
2069 specified STATE for the rule pointed by the lookahead symbol
2070 LOOKAHEADNUM. It has already been checked that the rule has a
2071 precedence. A conflict is resolved by modifying the shift or reduce
2072 tables so that there is no longer a conflict."
2073 (let (i redprec errp errs nerrs token sprec sassoc)
2074 ;; Find the rule to reduce by to get precedence of reduction
2075 (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
2076 redprec (wisent-prec token)
2077 errp (make-errs)
2078 errs (make-vector ntokens 0)
2079 nerrs 0
2080 i 0)
2081 (set-errs-errs errp errs)
2082 (while (< i ntokens)
2083 (setq token (aref tags i))
2084 (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
2085 (wisent-BITISSET lookaheadset i)
2086 (setq sprec (wisent-prec token)))
2087 ;; Shift-reduce conflict occurs for token number I and it has
2088 ;; a precedence. The precedence of shifting is that of token
2089 ;; I.
2090 (cond
2091 ((< sprec redprec)
2092 (wisent-log-resolution state lookaheadnum i "reduce")
2093 ;; Flush the shift for this token
2094 (wisent-RESETBIT lookaheadset i)
2095 (wisent-flush-shift state i)
2097 ((> sprec redprec)
2098 (wisent-log-resolution state lookaheadnum i "shift")
2099 ;; Flush the reduce for this token
2100 (wisent-RESETBIT (aref LA lookaheadnum) i)
2103 ;; Matching precedence levels.
2104 ;; For left association, keep only the reduction.
2105 ;; For right association, keep only the shift.
2106 ;; For nonassociation, keep neither.
2107 (setq sassoc (wisent-assoc token))
2108 (cond
2109 ((eq sassoc 'right)
2110 (wisent-log-resolution state lookaheadnum i "shift"))
2111 ((eq sassoc 'left)
2112 (wisent-log-resolution state lookaheadnum i "reduce"))
2113 ((eq sassoc 'nonassoc)
2114 (wisent-log-resolution state lookaheadnum i "an error"))
2116 (when (not (eq sassoc 'right))
2117 ;; Flush the shift for this token
2118 (wisent-RESETBIT lookaheadset i)
2119 (wisent-flush-shift state i))
2120 (when (not (eq sassoc 'left))
2121 ;; Flush the reduce for this token
2122 (wisent-RESETBIT (aref LA lookaheadnum) i))
2123 (when (eq sassoc 'nonassoc)
2124 ;; Record an explicit error for this token
2125 (aset errs nerrs i)
2126 (setq nerrs (1+ nerrs)))
2128 (setq i (1+ i)))
2129 (when (> nerrs 0)
2130 (set-errs-nerrs errp nerrs)
2131 (aset err-table state errp))
2134 (defun wisent-set-conflicts (state)
2135 "Find and attempt to resolve conflicts in specified STATE."
2136 (let (i j k v shiftp symbol)
2137 (unless (aref consistent state)
2138 (fillarray lookaheadset 0)
2140 (when (setq shiftp (aref shift-table state))
2141 (setq k (shifts-nshifts shiftp)
2142 v (shifts-shifts shiftp)
2143 i 0)
2144 (while (and (< i k)
2145 (wisent-ISTOKEN
2146 (setq symbol (aref accessing-symbol (aref v i)))))
2147 (or (zerop (aref v i))
2148 (wisent-SETBIT lookaheadset symbol))
2149 (setq i (1+ i))))
2151 ;; Loop over all rules which require lookahead in this state
2152 ;; first check for shift-reduce conflict, and try to resolve
2153 ;; using precedence
2154 (setq i (aref lookaheads state)
2155 k (aref lookaheads (1+ state)))
2156 (while (< i k)
2157 (when (aref rprec (aref LAruleno i))
2158 (setq v (aref LA i)
2159 j 0)
2160 (while (< j tokensetsize)
2161 (if (zerop (logand (aref v j) (aref lookaheadset j)))
2162 (setq j (1+ j))
2163 ;; if (LA (i)[j] & lookaheadset[j])
2164 (wisent-resolve-sr-conflict state i)
2165 (setq j tokensetsize)))) ;; break
2166 (setq i (1+ i)))
2168 ;; Loop over all rules which require lookahead in this state
2169 ;; Check for conflicts not resolved above.
2170 (setq i (aref lookaheads state))
2171 (while (< i k)
2172 (setq v (aref LA i)
2173 j 0)
2174 (while (< j tokensetsize)
2175 ;; if (LA (i)[j] & lookaheadset[j])
2176 (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
2177 (aset conflicts state t))
2178 (setq j (1+ j)))
2179 (setq j 0)
2180 (while (< j tokensetsize)
2181 ;; lookaheadset[j] |= LA (i)[j];
2182 (aset lookaheadset j (logior (aref lookaheadset j)
2183 (aref v j)))
2184 (setq j (1+ j)))
2185 (setq i (1+ i)))
2188 (defun wisent-resolve-conflicts ()
2189 "Find and resolve conflicts."
2190 (let (i)
2191 (setq conflicts (make-vector nstates nil)
2192 shiftset (make-vector tokensetsize 0)
2193 lookaheadset (make-vector tokensetsize 0)
2194 err-table (make-vector nstates nil)
2195 i 0)
2196 (while (< i nstates)
2197 (wisent-set-conflicts i)
2198 (setq i (1+ i)))))
2200 (defun wisent-count-sr-conflicts (state)
2201 "Count the number of shift/reduce conflicts in specified STATE."
2202 (let (i j k shiftp symbol v)
2203 (setq src-count 0
2204 shiftp (aref shift-table state))
2205 (when shiftp
2206 (fillarray shiftset 0)
2207 (fillarray lookaheadset 0)
2208 (setq k (shifts-nshifts shiftp)
2209 v (shifts-shifts shiftp)
2210 i 0)
2211 (while (< i k)
2212 (when (not (zerop (aref v i)))
2213 (setq symbol (aref accessing-symbol (aref v i)))
2214 (if (wisent-ISVAR symbol)
2215 (setq i k) ;; break
2216 (wisent-SETBIT shiftset symbol)))
2217 (setq i (1+ i)))
2219 (setq k (aref lookaheads (1+ state))
2220 i (aref lookaheads state))
2221 (while (< i k)
2222 (setq v (aref LA i)
2223 j 0)
2224 (while (< j tokensetsize)
2225 ;; lookaheadset[j] |= LA (i)[j]
2226 (aset lookaheadset j (logior (aref lookaheadset j)
2227 (aref v j)))
2228 (setq j (1+ j)))
2229 (setq i (1+ i)))
2231 (setq k 0)
2232 (while (< k tokensetsize)
2233 ;; lookaheadset[k] &= shiftset[k];
2234 (aset lookaheadset k (logand (aref lookaheadset k)
2235 (aref shiftset k)))
2236 (setq k (1+ k)))
2238 (setq i 0)
2239 (while (< i ntokens)
2240 (if (wisent-BITISSET lookaheadset i)
2241 (setq src-count (1+ src-count)))
2242 (setq i (1+ i))))
2243 src-count))
2245 (defun wisent-count-rr-conflicts (state)
2246 "Count the number of reduce/reduce conflicts in specified STATE."
2247 (let (i j count n m)
2248 (setq rrc-count 0
2249 m (aref lookaheads state)
2250 n (aref lookaheads (1+ state)))
2251 (when (>= (- n m) 2)
2252 (setq i 0)
2253 (while (< i ntokens)
2254 (setq count 0
2255 j m)
2256 (while (< j n)
2257 (if (wisent-BITISSET (aref LA j) i)
2258 (setq count (1+ count)))
2259 (setq j (1+ j)))
2261 (if (>= count 2)
2262 (setq rrc-count (1+ rrc-count)))
2263 (setq i (1+ i))))
2264 rrc-count))
2266 (defcustom wisent-expected-conflicts nil
2267 "If non-nil suppress the warning about shift/reduce conflicts.
2268 It is a decimal integer N that says there should be no warning if
2269 there are N shift/reduce conflicts and no reduce/reduce conflicts. A
2270 warning is given if there are either more or fewer conflicts, or if
2271 there are any reduce/reduce conflicts."
2272 :group 'wisent
2273 :type '(choice (const nil) integer))
2275 (defun wisent-total-conflicts ()
2276 "Report the total number of conflicts."
2277 (unless (and (zerop rrc-total)
2278 (or (zerop src-total)
2279 (= src-total (or wisent-expected-conflicts 0))))
2280 (let* ((src (wisent-source))
2281 (src (if src (concat " in " src) ""))
2282 (msg (format "Grammar%s contains" src)))
2283 (if (> src-total 0)
2284 (setq msg (format "%s %d shift/reduce conflict%s"
2285 msg src-total (if (> src-total 1)
2286 "s" ""))))
2287 (if (and (> src-total 0) (> rrc-total 0))
2288 (setq msg (format "%s and" msg)))
2289 (if (> rrc-total 0)
2290 (setq msg (format "%s %d reduce/reduce conflict%s"
2291 msg rrc-total (if (> rrc-total 1)
2292 "s" ""))))
2293 (message msg))))
2295 (defun wisent-print-conflicts ()
2296 "Report conflicts."
2297 (let (i)
2298 (setq src-total 0
2299 rrc-total 0
2300 i 0)
2301 (while (< i nstates)
2302 (when (aref conflicts i)
2303 (wisent-count-sr-conflicts i)
2304 (wisent-count-rr-conflicts i)
2305 (setq src-total (+ src-total src-count)
2306 rrc-total (+ rrc-total rrc-count))
2307 (when (or wisent-verbose-flag wisent-debug-flag)
2308 (wisent-log "State %d contains" i)
2309 (if (> src-count 0)
2310 (wisent-log " %d shift/reduce conflict%s"
2311 src-count (if (> src-count 1) "s" "")))
2313 (if (and (> src-count 0) (> rrc-count 0))
2314 (wisent-log " and"))
2316 (if (> rrc-count 0)
2317 (wisent-log " %d reduce/reduce conflict%s"
2318 rrc-count (if (> rrc-count 1) "s" "")))
2320 (wisent-log ".\n")))
2321 (setq i (1+ i)))
2322 (wisent-total-conflicts)))
2324 ;;;; --------------------------------------
2325 ;;;; Report information on generated parser
2326 ;;;; --------------------------------------
2327 (defun wisent-print-grammar ()
2328 "Print grammar."
2329 (let (i j r break left-count right-count)
2331 (wisent-log "\n\nGrammar\n\n Number, Rule\n")
2332 (setq i 1)
2333 (while (<= i nrules)
2334 ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
2335 (when (aref ruseful i)
2336 (wisent-log " %s %s ->"
2337 (wisent-pad-string (number-to-string i) 6)
2338 (wisent-tag (aref rlhs i)))
2339 (setq r (aref rrhs i))
2340 (if (> (aref ritem r) 0)
2341 (while (> (aref ritem r) 0)
2342 (wisent-log " %s" (wisent-tag (aref ritem r)))
2343 (setq r (1+ r)))
2344 (wisent-log " /* empty */"))
2345 (wisent-log "\n"))
2346 (setq i (1+ i)))
2348 (wisent-log "\n\nTerminals, with rules where they appear\n\n")
2349 (wisent-log "%s (-1)\n" (wisent-tag 0))
2350 (setq i 1)
2351 (while (< i ntokens)
2352 (wisent-log "%s (%d)" (wisent-tag i) i)
2353 (setq j 1)
2354 (while (<= j nrules)
2355 (setq r (aref rrhs j)
2356 break nil)
2357 (while (and (not break) (> (aref ritem r) 0))
2358 (if (setq break (= (aref ritem r) i))
2359 (wisent-log " %d" j)
2360 (setq r (1+ r))))
2361 (setq j (1+ j)))
2362 (wisent-log "\n")
2363 (setq i (1+ i)))
2365 (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
2366 (setq i ntokens)
2367 (while (< i nsyms)
2368 (setq left-count 0
2369 right-count 0
2370 j 1)
2371 (while (<= j nrules)
2372 (if (= (aref rlhs j) i)
2373 (setq left-count (1+ left-count)))
2374 (setq r (aref rrhs j)
2375 break nil)
2376 (while (and (not break) (> (aref ritem r) 0))
2377 (if (= (aref ritem r) i)
2378 (setq right-count (1+ right-count)
2379 break t)
2380 (setq r (1+ r))))
2381 (setq j (1+ j)))
2382 (wisent-log "%s (%d)\n " (wisent-tag i) i)
2383 (when (> left-count 0)
2384 (wisent-log " on left:")
2385 (setq j 1)
2386 (while (<= j nrules)
2387 (if (= (aref rlhs j) i)
2388 (wisent-log " %d" j))
2389 (setq j (1+ j))))
2390 (when (> right-count 0)
2391 (if (> left-count 0)
2392 (wisent-log ","))
2393 (wisent-log " on right:")
2394 (setq j 1)
2395 (while (<= j nrules)
2396 (setq r (aref rrhs j)
2397 break nil)
2398 (while (and (not break) (> (aref ritem r) 0))
2399 (if (setq break (= (aref ritem r) i))
2400 (wisent-log " %d" j)
2401 (setq r (1+ r))))
2402 (setq j (1+ j))))
2403 (wisent-log "\n")
2404 (setq i (1+ i)))
2407 (defun wisent-print-reductions (state)
2408 "Print reductions on STATE."
2409 (let (i j k v symbol m n defaulted
2410 default-LA default-rule cmax count shiftp errp nodefault)
2411 (setq nodefault nil
2412 i 0)
2413 (fillarray shiftset 0)
2415 (setq shiftp (aref shift-table state))
2416 (when shiftp
2417 (setq k (shifts-nshifts shiftp)
2418 v (shifts-shifts shiftp)
2419 i 0)
2420 (while (< i k)
2421 (when (not (zerop (aref v i)))
2422 (setq symbol (aref accessing-symbol (aref v i)))
2423 (if (wisent-ISVAR symbol)
2424 (setq i k) ;; break
2425 ;; If this state has a shift for the error token, don't
2426 ;; use a default rule.
2427 (if (= symbol error-token-number)
2428 (setq nodefault t))
2429 (wisent-SETBIT shiftset symbol)))
2430 (setq i (1+ i))))
2432 (setq errp (aref err-table state))
2433 (when errp
2434 (setq k (errs-nerrs errp)
2435 v (errs-errs errp)
2436 i 0)
2437 (while (< i k)
2438 (if (not (zerop (setq symbol (aref v i))))
2439 (wisent-SETBIT shiftset symbol))
2440 (setq i (1+ i))))
2442 (setq m (aref lookaheads state)
2443 n (aref lookaheads (1+ state)))
2445 (cond
2446 ((and (= (- n m) 1) (not nodefault))
2447 (setq default-rule (aref LAruleno m)
2448 v (aref LA m)
2449 k 0)
2450 (while (< k tokensetsize)
2451 (aset lookaheadset k (logand (aref v k)
2452 (aref shiftset k)))
2453 (setq k (1+ k)))
2455 (setq i 0)
2456 (while (< i ntokens)
2457 (if (wisent-BITISSET lookaheadset i)
2458 (wisent-log " %s\t[reduce using rule %d (%s)]\n"
2459 (wisent-tag i) default-rule
2460 (wisent-tag (aref rlhs default-rule))))
2461 (setq i (1+ i)))
2462 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2463 default-rule
2464 (wisent-tag (aref rlhs default-rule)))
2466 ((>= (- n m) 1)
2467 (setq cmax 0
2468 default-LA -1
2469 default-rule 0)
2470 (when (not nodefault)
2471 (setq i m)
2472 (while (< i n)
2473 (setq v (aref LA i)
2474 count 0
2475 k 0)
2476 (while (< k tokensetsize)
2477 ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
2478 (aset lookaheadset k
2479 (logand (aref v k)
2480 (lognot (aref shiftset k))))
2481 (setq k (1+ k)))
2482 (setq j 0)
2483 (while (< j ntokens)
2484 (if (wisent-BITISSET lookaheadset j)
2485 (setq count (1+ count)))
2486 (setq j (1+ j)))
2487 (if (> count cmax)
2488 (setq cmax count
2489 default-LA i
2490 default-rule (aref LAruleno i)))
2491 (setq k 0)
2492 (while (< k tokensetsize)
2493 (aset shiftset k (logior (aref shiftset k)
2494 (aref lookaheadset k)))
2495 (setq k (1+ k)))
2496 (setq i (1+ i))))
2498 (fillarray shiftset 0)
2500 (when shiftp
2501 (setq k (shifts-nshifts shiftp)
2502 v (shifts-shifts shiftp)
2503 i 0)
2504 (while (< i k)
2505 (when (not (zerop (aref v i)))
2506 (setq symbol (aref accessing-symbol (aref v i)))
2507 (if (wisent-ISVAR symbol)
2508 (setq i k) ;; break
2509 (wisent-SETBIT shiftset symbol)))
2510 (setq i (1+ i))))
2512 (setq i 0)
2513 (while (< i ntokens)
2514 (setq defaulted nil
2515 count (if (wisent-BITISSET shiftset i) 1 0)
2516 j m)
2517 (while (< j n)
2518 (when (wisent-BITISSET (aref LA j) i)
2519 (if (zerop count)
2520 (progn
2521 (if (not (= j default-LA))
2522 (wisent-log
2523 " %s\treduce using rule %d (%s)\n"
2524 (wisent-tag i) (aref LAruleno j)
2525 (wisent-tag (aref rlhs (aref LAruleno j))))
2526 (setq defaulted t))
2527 (setq count (1+ count)))
2528 (if defaulted
2529 (wisent-log
2530 " %s\treduce using rule %d (%s)\n"
2531 (wisent-tag i) (aref LAruleno default-LA)
2532 (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
2533 (setq defaulted nil)
2534 (wisent-log
2535 " %s\t[reduce using rule %d (%s)]\n"
2536 (wisent-tag i) (aref LAruleno j)
2537 (wisent-tag (aref rlhs (aref LAruleno j))))))
2538 (setq j (1+ j)))
2539 (setq i (1+ i)))
2541 (if (>= default-LA 0)
2542 (wisent-log
2543 " $default\treduce using rule %d (%s)\n"
2544 default-rule
2545 (wisent-tag (aref rlhs default-rule))))
2546 ))))
2548 (defun wisent-print-actions (state)
2549 "Print actions on STATE."
2550 (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
2551 (setq shiftp (aref shift-table state)
2552 redp (aref reduction-table state)
2553 errp (aref err-table state))
2554 (if (and (not shiftp) (not redp))
2555 (if (= final-state state)
2556 (wisent-log " $default\taccept\n")
2557 (wisent-log " NO ACTIONS\n"))
2558 (if (not shiftp)
2559 (setq i 0
2560 k 0)
2561 (setq k (shifts-nshifts shiftp)
2562 v (shifts-shifts shiftp)
2564 break nil)
2565 (while (and (not break) (< i k))
2566 (if (zerop (setq state1 (aref v i)))
2567 (setq i (1+ i))
2568 (setq symbol (aref accessing-symbol state1))
2569 ;; The following line used to be turned off.
2570 (if (wisent-ISVAR symbol)
2571 (setq break t) ;; break
2572 (wisent-log " %s\tshift, and go to state %d\n"
2573 (wisent-tag symbol) state1)
2574 (setq i (1+ i)))))
2575 (if (> i 0)
2576 (wisent-log "\n")))
2578 (when errp
2579 (setq nerrs (errs-nerrs errp)
2580 v (errs-errs errp)
2581 j 0)
2582 (while (< j nerrs)
2583 (if (aref v j)
2584 (wisent-log " %s\terror (nonassociative)\n"
2585 (wisent-tag (aref v j))))
2586 (setq j (1+ j)))
2587 (if (> j 0)
2588 (wisent-log "\n")))
2590 (cond
2591 ((and (aref consistent state) redp)
2592 (setq rule (aref (reductions-rules redp) 0)
2593 symbol (aref rlhs rule))
2594 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2595 rule (wisent-tag symbol))
2597 (redp
2598 (wisent-print-reductions state)
2601 (when (< i k)
2602 (setq v (shifts-shifts shiftp))
2603 (while (< i k)
2604 (when (setq state1 (aref v i))
2605 (setq symbol (aref accessing-symbol state1))
2606 (wisent-log " %s\tgo to state %d\n"
2607 (wisent-tag symbol) state1))
2608 (setq i (1+ i)))
2609 (wisent-log "\n"))
2612 (defun wisent-print-core (state)
2613 "Print STATE core."
2614 (let (i k rule statep sp sp1)
2615 (setq statep (aref state-table state)
2616 k (core-nitems statep))
2617 (when (> k 0)
2618 (setq i 0)
2619 (while (< i k)
2620 ;; sp1 = sp = ritem + statep->items[i];
2621 (setq sp1 (aref (core-items statep) i)
2622 sp sp1)
2623 (while (> (aref ritem sp) 0)
2624 (setq sp (1+ sp)))
2626 (setq rule (- (aref ritem sp)))
2627 (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
2629 (setq sp (aref rrhs rule))
2630 (while (< sp sp1)
2631 (wisent-log "%s " (wisent-tag (aref ritem sp)))
2632 (setq sp (1+ sp)))
2633 (wisent-log ".")
2634 (while (> (aref ritem sp) 0)
2635 (wisent-log " %s" (wisent-tag (aref ritem sp)))
2636 (setq sp (1+ sp)))
2637 (wisent-log " (rule %d)\n" rule)
2638 (setq i (1+ i)))
2639 (wisent-log "\n"))))
2641 (defun wisent-print-state (state)
2642 "Print information on STATE."
2643 (wisent-log "\n\nstate %d\n\n" state)
2644 (wisent-print-core state)
2645 (wisent-print-actions state))
2647 (defun wisent-print-states ()
2648 "Print information on states."
2649 (let ((i 0))
2650 (while (< i nstates)
2651 (wisent-print-state i)
2652 (setq i (1+ i)))))
2654 (defun wisent-print-results ()
2655 "Print information on generated parser.
2656 Report detailed information if `wisent-verbose-flag' or
2657 `wisent-debug-flag' are non-nil."
2658 (when (or wisent-verbose-flag wisent-debug-flag)
2659 (wisent-print-useless))
2660 (wisent-print-conflicts)
2661 (when (or wisent-verbose-flag wisent-debug-flag)
2662 (wisent-print-grammar)
2663 (wisent-print-states))
2664 ;; Append output to log file when running in batch mode
2665 (when (wisent-noninteractive)
2666 (wisent-append-to-log-file)
2667 (wisent-clear-log)))
2669 ;;;; ---------------------------------
2670 ;;;; Build the generated parser tables
2671 ;;;; ---------------------------------
2673 (defun wisent-action-row (state actrow)
2674 "Figure out the actions for the specified STATE.
2675 Decide what to do for each type of token if seen as the lookahead
2676 token in specified state. The value returned is used as the default
2677 action for the state. In addition, ACTROW is filled with what to do
2678 for each kind of token, index by symbol number, with nil meaning do
2679 the default action. The value 'error, means this situation is an
2680 error. The parser recognizes this value specially.
2682 This is where conflicts are resolved. The loop over lookahead rules
2683 considered lower-numbered rules last, and the last rule considered
2684 that likes a token gets to handle it."
2685 (let (i j k m n v default-rule nreds rule max count
2686 shift-state symbol redp shiftp errp nodefault)
2688 (fillarray actrow nil)
2690 (setq default-rule 0
2691 nodefault nil ;; nil inhibit having any default reduction
2692 nreds 0
2695 redp (aref reduction-table state))
2697 (when redp
2698 (setq nreds (reductions-nreds redp))
2699 (when (>= nreds 1)
2700 ;; loop over all the rules available here which require
2701 ;; lookahead
2702 (setq m (aref lookaheads state)
2703 n (aref lookaheads (1+ state))
2704 i (1- n))
2705 (while (>= i m)
2706 ;; and find each token which the rule finds acceptable to
2707 ;; come next
2708 (setq j 0)
2709 (while (< j ntokens)
2710 ;; and record this rule as the rule to use if that token
2711 ;; follows.
2712 (if (wisent-BITISSET (aref LA i) j)
2713 (aset actrow j (- (aref LAruleno i)))
2715 (setq j (1+ j)))
2716 (setq i (1- i)))))
2718 ;; Now see which tokens are allowed for shifts in this state. For
2719 ;; them, record the shift as the thing to do. So shift is
2720 ;; preferred to reduce.
2721 (setq shiftp (aref shift-table state))
2722 (when shiftp
2723 (setq k (shifts-nshifts shiftp)
2724 v (shifts-shifts shiftp)
2725 i 0)
2726 (while (< i k)
2727 (setq shift-state (aref v i))
2728 (if (zerop shift-state)
2729 nil ;; continue
2730 (setq symbol (aref accessing-symbol shift-state))
2731 (if (wisent-ISVAR symbol)
2732 (setq i k) ;; break
2733 (aset actrow symbol shift-state)
2734 ;; Do not use any default reduction if there is a shift
2735 ;; for error
2736 (if (= symbol error-token-number)
2737 (setq nodefault t))))
2738 (setq i (1+ i))))
2740 ;; See which tokens are an explicit error in this state (due to
2741 ;; %nonassoc). For them, record error as the action.
2742 (setq errp (aref err-table state))
2743 (when errp
2744 (setq k (errs-nerrs errp)
2745 v (errs-errs errp)
2746 i 0)
2747 (while (< i k)
2748 (aset actrow (aref v i) wisent-error-tag)
2749 (setq i (1+ i))))
2751 ;; Now find the most common reduction and make it the default
2752 ;; action for this state.
2753 (when (and (>= nreds 1) (not nodefault))
2754 (if (aref consistent state)
2755 (setq default-rule (- (aref (reductions-rules redp) 0)))
2756 (setq max 0
2757 i m)
2758 (while (< i n)
2759 (setq count 0
2760 rule (- (aref LAruleno i))
2761 j 0)
2762 (while (< j ntokens)
2763 (if (and (numberp (aref actrow j))
2764 (= (aref actrow j) rule))
2765 (setq count (1+ count)))
2766 (setq j (1+ j)))
2767 (if (> count max)
2768 (setq max count
2769 default-rule rule))
2770 (setq i (1+ i)))
2771 ;; actions which match the default are replaced with zero,
2772 ;; which means "use the default"
2773 (when (> max 0)
2774 (setq j 0)
2775 (while (< j ntokens)
2776 (if (and (numberp (aref actrow j))
2777 (= (aref actrow j) default-rule))
2778 (aset actrow j nil))
2779 (setq j (1+ j)))
2782 ;; If have no default rule, if this is the final state the default
2783 ;; is accept else it is an error. So replace any action which
2784 ;; says "error" with "use default".
2785 (when (zerop default-rule)
2786 (if (= final-state state)
2787 (setq default-rule wisent-accept-tag)
2788 (setq j 0)
2789 (while (< j ntokens)
2790 (if (eq (aref actrow j) wisent-error-tag)
2791 (aset actrow j nil))
2792 (setq j (1+ j)))
2793 (setq default-rule wisent-error-tag)))
2794 default-rule))
2796 (defconst wisent-default-tag 'default
2797 "Tag used in an action table to indicate a default action.")
2799 ;; These variables only exist locally in the function
2800 ;; `wisent-state-actions' and are shared by all other nested callees.
2801 (wisent-defcontext semantic-actions
2802 ;; Uninterned symbols used in code generation.
2803 stack sp gotos state
2804 ;; Name of the current semantic action
2805 NAME)
2807 (defun wisent-state-actions ()
2808 "Figure out the actions for every state.
2809 Return the action table."
2810 ;; Store the semantic action obarray in (unused) RCODE[0].
2811 (aset rcode 0 (make-vector 13 0))
2812 (let (i j action-table actrow action)
2813 (setq action-table (make-vector nstates nil)
2814 actrow (make-vector ntokens nil)
2815 i 0)
2816 (wisent-with-context semantic-actions
2817 (setq stack (make-symbol "stack")
2818 sp (make-symbol "sp")
2819 gotos (make-symbol "gotos")
2820 state (make-symbol "state"))
2821 (while (< i nstates)
2822 (setq action (wisent-action-row i actrow))
2823 ;; Translate a reduction into semantic action
2824 (and (integerp action) (< action 0)
2825 (setq action (wisent-semantic-action (- action))))
2826 (aset action-table i (list (cons wisent-default-tag action)))
2827 (setq j 0)
2828 (while (< j ntokens)
2829 (when (setq action (aref actrow j))
2830 ;; Translate a reduction into semantic action
2831 (and (integerp action) (< action 0)
2832 (setq action (wisent-semantic-action (- action))))
2833 (aset action-table i (cons (cons (aref tags j) action)
2834 (aref action-table i)))
2836 (setq j (1+ j)))
2837 (aset action-table i (nreverse (aref action-table i)))
2838 (setq i (1+ i)))
2839 action-table)))
2841 (defun wisent-goto-actions ()
2842 "Figure out what to do after reducing with each rule.
2843 Depending on the saved state from before the beginning of parsing the
2844 data that matched this rule. Return the goto table."
2845 (let (i j m n symbol state goto-table)
2846 (setq goto-table (make-vector nstates nil)
2847 i ntokens)
2848 (while (< i nsyms)
2849 (setq symbol (- i ntokens)
2850 m (aref goto-map symbol)
2851 n (aref goto-map (1+ symbol))
2852 j m)
2853 (while (< j n)
2854 (setq state (aref from-state j))
2855 (aset goto-table state
2856 (cons (cons (aref tags i) (aref to-state j))
2857 (aref goto-table state)))
2858 (setq j (1+ j)))
2859 (setq i (1+ i)))
2860 goto-table))
2862 (defsubst wisent-quote-p (sym)
2863 "Return non-nil if SYM is bound to the `quote' function."
2864 (condition-case nil
2865 (eq (indirect-function sym)
2866 (indirect-function 'quote))
2867 (error nil)))
2869 (defsubst wisent-backquote-p (sym)
2870 "Return non-nil if SYM is bound to the `backquote' function."
2871 (condition-case nil
2872 (eq (indirect-function sym)
2873 (indirect-function 'backquote))
2874 (error nil)))
2876 (defun wisent-check-$N (x m)
2877 "Return non-nil if X is a valid $N or $regionN symbol.
2878 That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
2879 Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
2880 (when (symbolp x)
2881 (let* ((n (symbol-name x))
2882 (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
2883 (string-to-number (match-string 2 n)))))
2884 (when i
2885 (if (and (>= i 1) (<= i m))
2887 (message
2888 "*** In %s, %s might be a free variable (rule has %s)"
2889 NAME x (format (cond ((< m 1) "no component")
2890 ((= m 1) "%d component")
2891 ("%d components"))
2893 nil)))))
2895 (defun wisent-semantic-action-expand-body (body n &optional found)
2896 "Parse BODY of semantic action.
2897 N is the maximum number of $N variables that can be referenced in
2898 BODY. Warn on references out of permitted range.
2899 Optional argument FOUND is the accumulated list of $N references
2900 encountered so far.
2901 Return a cons (FOUND . XBODY), where FOUND is the list of $N
2902 references found in BODY, and XBODY is BODY expression with
2903 `backquote' forms expanded."
2904 (if (not (listp body))
2905 ;; BODY is an atom, no expansion needed
2906 (progn
2907 (if (wisent-check-$N body n)
2908 ;; Accumulate $i symbol
2909 (pushnew body found :test #'equal))
2910 (cons found body))
2911 ;; BODY is a list, expand inside it
2912 (let (xbody sexpr)
2913 ;; If backquote expand it first
2914 (if (wisent-backquote-p (car body))
2915 (setq body (macroexpand body)))
2916 (while body
2917 (setq sexpr (car body)
2918 body (cdr body))
2919 (cond
2920 ;; Function call excepted quote expression
2921 ((and (consp sexpr)
2922 (not (wisent-quote-p (car sexpr))))
2923 (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
2924 found (car sexpr)
2925 sexpr (cdr sexpr)))
2926 ;; $i symbol
2927 ((wisent-check-$N sexpr n)
2928 ;; Accumulate $i symbol
2929 (pushnew sexpr found :test #'equal))
2931 ;; Accumulate expanded forms
2932 (setq xbody (nconc xbody (list sexpr))))
2933 (cons found xbody))))
2935 (defun wisent-semantic-action (r)
2936 "Set up the Elisp function for semantic action at rule R.
2937 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
2938 body of the semantic action, N is the maximum number of values
2939 available in the parser's stack, NTERM is the nonterminal the semantic
2940 action belongs to, and I is the index of the semantic action inside
2941 NTERM definition. Return the semantic action symbol.
2942 The semantic action function accepts three arguments:
2944 - the state/value stack
2945 - the top-of-stack index
2946 - the goto table
2948 And returns the updated top-of-stack index."
2949 (if (not (aref ruseful r))
2950 (aset rcode r nil)
2951 (let* ((actn (aref rcode r))
2952 (n (aref actn 1)) ; nb of val avail. in stack
2953 (NAME (apply 'format "%s:%d" (aref actn 2)))
2954 (form (wisent-semantic-action-expand-body (aref actn 0) n))
2955 ($l (car form)) ; list of $vars used in body
2956 (form (cdr form)) ; expanded form of body
2957 (nt (aref rlhs r)) ; nonterminal item no.
2958 (bl nil) ; `let*' binding list
2959 $v i j)
2961 ;; Compute $N and $regionN bindings
2962 (setq i n)
2963 (while (> i 0)
2964 (setq j (1+ (* 2 (- n i))))
2965 ;; Only bind $regionI if used in action
2966 (setq $v (intern (format "$region%d" i)))
2967 (if (memq $v $l)
2968 (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
2969 ;; Only bind $I if used in action
2970 (setq $v (intern (format "$%d" i)))
2971 (if (memq $v $l)
2972 (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
2973 (setq i (1- i)))
2975 ;; Compute J, the length of rule's RHS. It will give the
2976 ;; current parser state at STACK[SP - 2*J], and where to push
2977 ;; the new semantic value and the next state, respectively at:
2978 ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
2979 ;; the maximum number of values available in the stack, is equal
2980 ;; to J. But, for mid-rule actions, N is the number of rule
2981 ;; elements before the action and J is always 0 (empty rule).
2982 (setq i (aref rrhs r)
2983 j 0)
2984 (while (> (aref ritem i) 0)
2985 (setq j (1+ j)
2986 i (1+ i)))
2988 ;; Create the semantic action symbol.
2989 (setq actn (intern NAME (aref rcode 0)))
2991 ;; Store source code in function cell of the semantic action
2992 ;; symbol. It will be byte-compiled at automaton's compilation
2993 ;; time. Using a byte-compiled automaton can significantly
2994 ;; speed up parsing!
2995 (fset actn
2996 `(lambda (,stack ,sp ,gotos)
2997 (let* (,@bl
2998 ($region
2999 ,(cond
3000 ((= n 1)
3001 (if (assq '$region1 bl)
3002 '$region1
3003 `(cdr (aref ,stack (1- ,sp)))))
3004 ((> n 1)
3005 `(wisent-production-bounds
3006 ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
3007 ($action ,NAME)
3008 ($nterm ',(aref tags nt))
3009 ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
3010 (,state (cdr (assq $nterm
3011 (aref ,gotos
3012 (aref ,stack ,sp))))))
3013 (setq ,sp (+ ,sp 2))
3014 ;; push semantic value
3015 (aset ,stack (1- ,sp) (cons ,form $region))
3016 ;; push next state
3017 (aset ,stack ,sp ,state)
3018 ;; return new top of stack
3019 ,sp)))
3021 ;; Return the semantic action symbol
3022 actn)))
3024 ;;;; ----------------------------
3025 ;;;; Build parser LALR automaton.
3026 ;;;; ----------------------------
3028 (defun wisent-parser-automaton ()
3029 "Compute and return LALR(1) automaton from GRAMMAR.
3030 GRAMMAR is in internal format. GRAM/ACTS are grammar rules
3031 in internal format. STARTS defines the start symbols."
3032 ;; Check for useless stuff
3033 (wisent-reduce-grammar)
3035 (wisent-set-derives)
3036 (wisent-set-nullable)
3037 ;; convert to nondeterministic finite state machine.
3038 (wisent-generate-states)
3039 ;; make it deterministic.
3040 (wisent-lalr)
3041 ;; Find and record any conflicts: places where one token of
3042 ;; lookahead is not enough to disambiguate the parsing. Also
3043 ;; resolve s/r conflicts based on precedence declarations.
3044 (wisent-resolve-conflicts)
3045 (wisent-print-results)
3047 (vector (wisent-state-actions) ; action table
3048 (wisent-goto-actions) ; goto table
3049 start-table ; start symbols
3050 (aref rcode 0) ; sem. action symbol obarray
3054 ;;;; -------------------
3055 ;;;; Parse input grammar
3056 ;;;; -------------------
3058 (defconst wisent-reserved-symbols (list wisent-error-term)
3059 "The list of reserved symbols.
3060 Also all symbols starting with a character defined in
3061 `wisent-reserved-capitals' are reserved for internal use.")
3063 (defconst wisent-reserved-capitals '(?\$ ?\@)
3064 "The list of reserved capital letters.
3065 All symbol starting with one of these letters are reserved for
3066 internal use.")
3068 (defconst wisent-starts-nonterm '$STARTS
3069 "Main start symbol.
3070 It gives the rules for start symbols.")
3072 (defvar wisent-single-start-flag nil
3073 "Non-nil means allows only one start symbol like in Bison.
3074 That is don't add extra start rules to the grammar. This is
3075 useful to compare the Wisent's generated automaton with the Bison's
3076 one.")
3078 (defsubst wisent-ISVALID-VAR (x)
3079 "Return non-nil if X is a character or an allowed symbol."
3080 (and x (symbolp x)
3081 (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
3082 (not (memq x wisent-reserved-symbols))))
3084 (defsubst wisent-ISVALID-TOKEN (x)
3085 "Return non-nil if X is a character or an allowed symbol."
3086 (or (wisent-char-p x)
3087 (wisent-ISVALID-VAR x)))
3089 (defun wisent-push-token (symbol &optional nocheck)
3090 "Push a new SYMBOL in the list of tokens.
3091 Bypass checking if NOCHECK is non-nil."
3092 ;; Check
3093 (or nocheck (wisent-ISVALID-TOKEN symbol)
3094 (error "Invalid terminal symbol: %S" symbol))
3095 (if (memq symbol token-list)
3096 (message "*** duplicate terminal `%s' ignored" symbol)
3097 ;; Set up properties
3098 (wisent-set-prec symbol nil)
3099 (wisent-set-assoc symbol nil)
3100 (wisent-set-item-number symbol ntokens)
3101 ;; Add
3102 (setq ntokens (1+ ntokens)
3103 token-list (cons symbol token-list))))
3105 (defun wisent-push-var (symbol &optional nocheck)
3106 "Push a new SYMBOL in the list of nonterminals.
3107 Bypass checking if NOCHECK is non-nil."
3108 ;; Check
3109 (unless nocheck
3110 (or (wisent-ISVALID-VAR symbol)
3111 (error "Invalid nonterminal symbol: %S" symbol))
3112 (if (memq symbol var-list)
3113 (error "Nonterminal `%s' already defined" symbol)))
3114 ;; Set up properties
3115 (wisent-set-item-number symbol nvars)
3116 ;; Add
3117 (setq nvars (1+ nvars)
3118 var-list (cons symbol var-list)))
3120 (defun wisent-parse-nonterminals (defs)
3121 "Parse nonterminal definitions in DEFS.
3122 Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
3123 respectively rule precedence level, semantic action code and
3124 usefulness flag. Return a list of rules of the form (LHS . RHS) where
3125 LHS and RHS are respectively the Left Hand Side and Right Hand Side of
3126 the rule."
3127 (setq rprec nil
3128 rcode nil
3129 nitems 0
3130 nrules 0)
3131 (let (def nonterm rlist rule rules rhs rest item items
3132 rhl plevel semact @n @count iactn)
3133 (setq @count 0)
3134 (while defs
3135 (setq def (car defs)
3136 defs (cdr defs)
3137 nonterm (car def)
3138 rlist (cdr def)
3139 iactn 0)
3140 (or (consp rlist)
3141 (error "Invalid nonterminal definition syntax: %S" def))
3142 (while rlist
3143 (setq rule (car rlist)
3144 rlist (cdr rlist)
3145 items (car rule)
3146 rest (cdr rule)
3147 rhl 0
3148 rhs nil)
3150 ;; Check & count items
3151 (setq nitems (1+ nitems)) ;; LHS item
3152 (while items
3153 (setq item (car items)
3154 items (cdr items)
3155 nitems (1+ nitems)) ;; RHS items
3156 (if (listp item)
3157 ;; Mid-rule action
3158 (progn
3159 (setq @count (1+ @count)
3160 @n (intern (format "@%d" @count)))
3161 (wisent-push-var @n t)
3162 ;; Push a new empty rule with the mid-rule action
3163 (setq semact (vector item rhl (list nonterm iactn))
3164 iactn (1+ iactn)
3165 plevel nil
3166 rcode (cons semact rcode)
3167 rprec (cons plevel rprec)
3168 item @n ;; Replace action by @N nonterminal
3169 rules (cons (list item) rules)
3170 nitems (1+ nitems)
3171 nrules (1+ nrules)))
3172 ;; Check terminal or nonterminal symbol
3173 (cond
3174 ((or (memq item token-list) (memq item var-list)))
3175 ;; Create new literal character token
3176 ((wisent-char-p item) (wisent-push-token item t))
3177 ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
3178 item))))
3179 (setq rhl (1+ rhl)
3180 rhs (cons item rhs)))
3182 ;; Check & collect rule precedence level
3183 (setq plevel (when (vectorp (car rest))
3184 (setq item (car rest)
3185 rest (cdr rest))
3186 (if (and (= (length item) 1)
3187 (memq (aref item 0) token-list)
3188 (wisent-prec (aref item 0)))
3189 (wisent-item-number (aref item 0))
3190 (error "Invalid rule precedence level syntax: %S" item)))
3191 rprec (cons plevel rprec))
3193 ;; Check & collect semantic action body
3194 (setq semact (vector
3195 (if rest
3196 (if (cdr rest)
3197 (error "Invalid semantic action syntax: %S" rest)
3198 (car rest))
3199 ;; Give a default semantic action body: nil
3200 ;; for an empty rule or $1, the value of the
3201 ;; first symbol in the rule, otherwise.
3202 (if (> rhl 0) '$1 '()))
3204 (list nonterm iactn))
3205 iactn (1+ iactn)
3206 rcode (cons semact rcode))
3207 (setq rules (cons (cons nonterm (nreverse rhs)) rules)
3208 nrules (1+ nrules))))
3210 (setq ruseful (make-vector (1+ nrules) t)
3211 rprec (vconcat (cons nil (nreverse rprec)))
3212 rcode (vconcat (cons nil (nreverse rcode))))
3213 (nreverse rules)
3216 (defun wisent-parse-grammar (grammar &optional start-list)
3217 "Parse GRAMMAR and build a suitable internal representation.
3218 Optional argument START-LIST defines the start symbols.
3219 GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
3221 TOKENS is a list of terminal symbols (tokens).
3223 ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3224 describing the associativity of TOKENS. ASSOC-TYPE must be one of the
3225 `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
3226 is `default-prec', ASSOC-VALUE must be nil or t (the default).
3227 Otherwise it is a list of tokens which must have been previously
3228 declared in TOKENS.
3230 NONTERMS is the list of non terminal definitions (see function
3231 `wisent-parse-nonterminals')."
3232 (or (and (consp grammar) (> (length grammar) 2))
3233 (error "Bad input grammar"))
3235 (let (i r rhs pre dpre lst start-var assoc rules item
3236 token var def tokens defs ep-token ep-var ep-def)
3238 ;; Built-in tokens
3239 (setq ntokens 0 nvars 0)
3240 (wisent-push-token wisent-eoi-term t)
3241 (wisent-push-token wisent-error-term t)
3243 ;; Check/collect terminals
3244 (setq lst (car grammar))
3245 (while lst
3246 (wisent-push-token (car lst))
3247 (setq lst (cdr lst)))
3249 ;; Check/Set up tokens precedence & associativity
3250 (setq lst (nth 1 grammar)
3251 pre 0
3252 defs nil
3253 dpre nil
3254 default-prec t)
3255 (while lst
3256 (setq def (car lst)
3257 assoc (car def)
3258 tokens (cdr def)
3259 lst (cdr lst))
3260 (if (eq assoc 'default-prec)
3261 (progn
3262 (or (null (cdr tokens))
3263 (memq (car tokens) '(t nil))
3264 (error "Invalid default-prec value: %S" tokens))
3265 (setq default-prec (car tokens))
3266 (if dpre
3267 (message "*** redefining default-prec to %s"
3268 default-prec))
3269 (setq dpre t))
3270 (or (memq assoc '(left right nonassoc))
3271 (error "Invalid associativity syntax: %S" assoc))
3272 (setq pre (1+ pre))
3273 (while tokens
3274 (setq token (car tokens)
3275 tokens (cdr tokens))
3276 (if (memq token defs)
3277 (message "*** redefining precedence of `%s'" token))
3278 (or (memq token token-list)
3279 ;; Define token not previously declared.
3280 (wisent-push-token token))
3281 (setq defs (cons token defs))
3282 ;; Record the precedence and associativity of the terminal.
3283 (wisent-set-prec token pre)
3284 (wisent-set-assoc token assoc))))
3286 ;; Check/Collect nonterminals
3287 (setq lst (nthcdr 2 grammar)
3288 defs nil)
3289 (while lst
3290 (setq def (car lst)
3291 lst (cdr lst))
3292 (or (consp def)
3293 (error "Invalid nonterminal definition: %S" def))
3294 (if (memq (car def) token-list)
3295 (error "Nonterminal `%s' already defined as token" (car def)))
3296 (wisent-push-var (car def))
3297 (setq defs (cons def defs)))
3298 (or defs
3299 (error "No input grammar"))
3300 (setq defs (nreverse defs))
3302 ;; Set up the start symbol.
3303 (setq start-table nil)
3304 (cond
3306 ;; 1. START-LIST is nil, the start symbol is the first
3307 ;; nonterminal defined in the grammar (Bison like).
3308 ((null start-list)
3309 (setq start-var (caar defs)))
3311 ;; 2. START-LIST contains only one element, it is the start
3312 ;; symbol (Bison like).
3313 ((or wisent-single-start-flag (null (cdr start-list)))
3314 (setq start-var (car start-list))
3315 (or (assq start-var defs)
3316 (error "Start symbol `%s' has no rule" start-var)))
3318 ;; 3. START-LIST contains more than one element. All defines
3319 ;; potential start symbols. One of them (the first one by
3320 ;; default) will be given at parse time to be the parser goal.
3321 ;; If `wisent-single-start-flag' is non-nil that feature is
3322 ;; disabled and the first nonterminal in START-LIST defines
3323 ;; the start symbol, like in case 2 above.
3324 ((not wisent-single-start-flag)
3326 ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
3327 ;; Build and push ad hoc start rules in the grammar:
3329 ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
3330 ;; ($nt1 (($$nt1 nt1) $2))
3331 ;; ...
3332 ;; ($ntN (($$ntN ntN) $2))
3334 ;; Where internal symbols $ntI and $$ntI are respectively
3335 ;; nonterminals and terminals.
3337 ;; The internal start symbol $STARTS is used to build the
3338 ;; LALR(1) automaton. The true default start symbol used by the
3339 ;; parser is the first nonterminal in START-LIST (nt0).
3340 (setq start-var wisent-starts-nonterm
3341 lst (nreverse start-list))
3342 (while lst
3343 (setq var (car lst)
3344 lst (cdr lst))
3345 (or (memq var var-list)
3346 (error "Start symbol `%s' has no rule" var))
3347 (unless (assq var start-table) ;; Ignore duplicates
3348 ;; For each nt start symbol
3349 (setq ep-var (intern (format "$%s" var))
3350 ep-token (intern (format "$$%s" var)))
3351 (wisent-push-token ep-token t)
3352 (wisent-push-var ep-var t)
3353 (setq
3354 ;; Add entry (nt . $$nt) to start-table
3355 start-table (cons (cons var ep-token) start-table)
3356 ;; Add rule ($nt (($$nt nt) $2))
3357 defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
3358 ;; Add start rule (($nt) $1)
3359 ep-def (cons (list (list ep-var) '$1) ep-def))
3361 (wisent-push-var start-var t)
3362 (setq defs (cons (cons start-var ep-def) defs))))
3364 ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
3365 (setq rules (wisent-parse-nonterminals defs))
3367 ;; Set up the terminal & nonterminal lists.
3368 (setq nsyms (+ ntokens nvars)
3369 token-list (nreverse token-list)
3370 lst var-list
3371 var-list nil)
3372 (while lst
3373 (setq var (car lst)
3374 lst (cdr lst)
3375 var-list (cons var var-list))
3376 (wisent-set-item-number ;; adjust nonterminal item number to
3377 var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
3379 ;; Store special item numbers
3380 (setq error-token-number (wisent-item-number wisent-error-term)
3381 start-symbol (wisent-item-number start-var))
3383 ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
3384 ;; associated to item number I.
3385 (setq tags (vconcat token-list var-list))
3386 ;; Set up RLHS RRHS & RITEM data structures from list of rules
3387 ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
3388 (setq rlhs (make-vector (1+ nrules) nil)
3389 rrhs (make-vector (1+ nrules) nil)
3390 ritem (make-vector (1+ nitems) nil)
3392 r 1)
3393 (while rules
3394 (aset rlhs r (wisent-item-number (caar rules)))
3395 (aset rrhs r i)
3396 (setq rhs (cdar rules)
3397 pre nil)
3398 (while rhs
3399 (setq item (wisent-item-number (car rhs)))
3400 ;; Get default precedence level of rule, that is the
3401 ;; precedence of the last terminal in it.
3402 (and (wisent-ISTOKEN item)
3403 default-prec
3404 (setq pre item))
3406 (aset ritem i item)
3407 (setq i (1+ i)
3408 rhs (cdr rhs)))
3409 ;; Setup the precedence level of the rule, that is the one
3410 ;; specified by %prec or the default one.
3411 (and (not (aref rprec r)) ;; Already set by %prec
3413 (wisent-prec (aref tags pre))
3414 (aset rprec r pre))
3415 (aset ritem i (- r))
3416 (setq i (1+ i)
3417 r (1+ r))
3418 (setq rules (cdr rules)))
3421 ;;;; ---------------------
3422 ;;;; Compile input grammar
3423 ;;;; ---------------------
3425 (defun wisent-compile-grammar (grammar &optional start-list)
3426 "Compile the LALR(1) GRAMMAR.
3428 GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
3430 - TOKENS is a list of terminal symbols (tokens).
3432 - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3433 describing the associativity of TOKENS. ASSOC-TYPE must be one of
3434 the `default-prec' `nonassoc', `left' or `right' symbols. When
3435 ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
3436 default). Otherwise it is a list of tokens which must have been
3437 previously declared in TOKENS.
3439 - NONTERMS is a list of nonterminal definitions.
3441 Optional argument START-LIST specify the possible grammar start
3442 symbols. This is a list of nonterminals which must have been
3443 previously declared in GRAMMAR's NONTERMS form. By default, the start
3444 symbol is the first nonterminal defined. When START-LIST contains
3445 only one element, it is the start symbol. Otherwise, all elements are
3446 possible start symbols, unless `wisent-single-start-flag' is non-nil.
3447 In that case, the first element is the start symbol, and others are
3448 ignored.
3450 Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
3451 where:
3453 - ACTIONS is a state/token matrix telling the parser what to do at
3454 every state based on the current lookahead token. That is shift,
3455 reduce, accept or error.
3457 - GOTOS is a state/nonterminal matrix telling the parser the next
3458 state to go to after reducing with each rule.
3460 - STARTS is an alist which maps the allowed start nonterminal symbols
3461 to tokens that will be first shifted into the parser stack.
3463 - FUNCTIONS is an obarray of semantic action symbols. Each symbol's
3464 function definition is the semantic action lambda expression."
3465 (if (wisent-automaton-p grammar)
3466 grammar ;; Grammar already compiled just return it
3467 (wisent-with-context compile-grammar
3468 (let* ((gc-cons-threshold 1000000))
3469 (garbage-collect)
3470 (setq wisent-new-log-flag t)
3471 ;; Parse input grammar
3472 (wisent-parse-grammar grammar start-list)
3473 ;; Generate the LALR(1) automaton
3474 (wisent-parser-automaton)))))
3476 ;;;; --------------------------
3477 ;;;; Byte compile input grammar
3478 ;;;; --------------------------
3480 (require 'bytecomp)
3482 (defun wisent-byte-compile-grammar (form)
3483 "Byte compile the `wisent-compile-grammar' FORM.
3484 Automatically called by the Emacs Lisp byte compiler as a
3485 `byte-compile' handler."
3486 ;; Eval the `wisent-compile-grammar' form to obtain an LALR
3487 ;; automaton internal data structure. Then, because the internal
3488 ;; data structure contains an obarray, convert it to a lisp form so
3489 ;; it can be byte-compiled.
3490 (byte-compile-form
3491 ;; FIXME: we macroexpand here since `byte-compile-form' expects
3492 ;; macroexpanded code, but that's just a workaround: for lexical-binding
3493 ;; the lisp form should have to pass through closure-conversion and
3494 ;; `wisent-byte-compile-grammar' is called much too late for that.
3495 ;; Why isn't this `wisent-automaton-lisp-form' performed at
3496 ;; macroexpansion time? --Stef
3497 (macroexpand-all
3498 (wisent-automaton-lisp-form (eval form)))))
3500 ;; FIXME: We shouldn't use a `byte-compile' handler. Maybe using a hash-table
3501 ;; instead of an obarray would work around the problem that obarrays
3502 ;; aren't printable. Then (put 'wisent-compile-grammar 'side-effect-free t).
3503 (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
3505 (defun wisent-automaton-lisp-form (automaton)
3506 "Return a Lisp form that produces AUTOMATON.
3507 See also `wisent-compile-grammar' for more details on AUTOMATON."
3508 (or (wisent-automaton-p automaton)
3509 (signal 'wrong-type-argument
3510 (list 'wisent-automaton-p automaton)))
3511 (let ((obn (make-symbol "ob")) ; Generated obarray name
3512 (obv (aref automaton 3)) ; Semantic actions obarray
3514 `(let ((,obn (make-vector 13 0)))
3515 ;; Generate code to initialize the semantic actions obarray,
3516 ;; in local variable OBN.
3517 ,@(let (obcode)
3518 (mapatoms
3519 #'(lambda (s)
3520 (setq obcode
3521 (cons `(fset (intern ,(symbol-name s) ,obn)
3522 #',(symbol-function s))
3523 obcode)))
3524 obv)
3525 obcode)
3526 ;; Generate code to create the automaton.
3527 (vector
3528 ;; In code generated to initialize the action table, take
3529 ;; care of symbols that are interned in the semantic actions
3530 ;; obarray.
3531 (vector
3532 ,@(mapcar
3533 #'(lambda (state) ;; for each state
3534 `(list
3535 ,@(mapcar
3536 #'(lambda (tr) ;; for each transition
3537 (let ((k (car tr)) ; token
3538 (a (cdr tr))) ; action
3539 (if (and (symbolp a)
3540 (intern-soft (symbol-name a) obv))
3541 `(cons ,(if (symbolp k) `(quote ,k) k)
3542 (intern-soft ,(symbol-name a) ,obn))
3543 `(quote ,tr))))
3544 state)))
3545 (aref automaton 0)))
3546 ;; The code of the goto table is unchanged.
3547 ,(aref automaton 1)
3548 ;; The code of the alist of start symbols is unchanged.
3549 ',(aref automaton 2)
3550 ;; The semantic actions obarray is in the local variable OBN.
3551 ,obn))))
3553 (provide 'semantic/wisent/comp)
3555 ;; Disable messages with regards to lexical scoping, since this will
3556 ;; produce a bunch of 'lacks a prefix' warnings with the
3557 ;; `wisent-defcontext' trickery above.
3559 ;; Local variables:
3560 ;; byte-compile-warnings: (not lexical)
3561 ;; generated-autoload-load-name: "semantic/wisent/comp"
3562 ;; End:
3564 ;;; semantic/wisent/comp.el ends here