Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / cedet / semantic / wisent.el
blobbb6f5fca1017e4167523f149173e6f537cfb677b
1 ;;; semantic/wisent.el --- Wisent - Semantic gateway
3 ;; Copyright (C) 2001-2007, 2009-2014 Free Software Foundation, Inc.
5 ;; Author: David Ponce <david@dponce.com>
6 ;; Maintainer: David Ponce <david@dponce.com>
7 ;; Created: 30 Aug 2001
8 ;; Keywords: syntax
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; Here are functions necessary to use the Wisent LALR parser from
28 ;; Semantic environment.
30 ;;; History:
33 ;;; Code:
35 (require 'semantic)
36 (require 'semantic/wisent/wisent)
38 ;;; Lexical analysis
40 (defvar wisent-lex-istream nil
41 "Input stream of `semantic-lex' syntactic tokens.")
43 (defvar wisent-lex-lookahead nil
44 "Extra lookahead token.
45 When non-nil it is directly returned by `wisent-lex-function'.")
47 ;; Maintain this alias for compatibility until all WY grammars have
48 ;; been translated again to Elisp code.
49 (semantic-alias-obsolete 'wisent-lex-make-token-table
50 'semantic-lex-make-type-table "23.2")
52 (defmacro wisent-lex-eoi ()
53 "Return an End-Of-Input lexical token.
54 The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
55 `(cons ',wisent-eoi-term
56 (cons ""
57 (cons (point-max) (point-max)))))
59 (defmacro define-wisent-lexer (name doc &rest body)
60 "Create a new lexical analyzer with NAME.
61 DOC is a documentation string describing this analyzer.
62 When a token is available in `wisent-lex-istream', eval BODY forms
63 sequentially. BODY must return a lexical token for the LALR parser.
65 Each token in input was produced by `semantic-lex', it is a list:
67 (TOKSYM START . END)
69 TOKSYM is a terminal symbol used in the grammar.
70 START and END mark boundary in the current buffer of that token's
71 value.
73 Returned tokens must have the form:
75 (TOKSYM VALUE START . END)
77 where VALUE is the buffer substring between START and END positions."
78 `(defun
79 ,name () ,doc
80 (cond
81 (wisent-lex-lookahead
82 (prog1 wisent-lex-lookahead
83 (setq wisent-lex-lookahead nil)))
84 (wisent-lex-istream
85 ,@body)
86 ((wisent-lex-eoi)))))
88 (define-wisent-lexer wisent-lex
89 "Return the next available lexical token in Wisent's form.
90 The variable `wisent-lex-istream' contains the list of lexical tokens
91 produced by `semantic-lex'. Pop the next token available and convert
92 it to a form suitable for the Wisent's parser."
93 (let* ((tk (car wisent-lex-istream)))
94 ;; Eat input stream
95 (setq wisent-lex-istream (cdr wisent-lex-istream))
96 (cons (semantic-lex-token-class tk)
97 (cons (semantic-lex-token-text tk)
98 (semantic-lex-token-bounds tk)))))
100 ;;; Syntax analysis
102 (defvar wisent-error-function nil
103 "Function used to report parse error.
104 By default use the function `wisent-message'.")
105 (make-variable-buffer-local 'wisent-error-function)
107 (defvar wisent-lexer-function 'wisent-lex
108 "Function used to obtain the next lexical token in input.
109 Should be a lexical analyzer created with `define-wisent-lexer'.")
110 (make-variable-buffer-local 'wisent-lexer-function)
112 ;; Tag production
114 (defsubst wisent-raw-tag (semantic-tag)
115 "Return raw form of given Semantic tag SEMANTIC-TAG.
116 Should be used in semantic actions, in grammars, to build a Semantic
117 parse tree."
118 (nconc semantic-tag
119 (if (or $region
120 (setq $region (nthcdr 2 wisent-input)))
121 (list (car $region) (cdr $region))
122 (list (point-max) (point-max)))))
124 (defsubst wisent-cook-tag (raw-tag)
125 "From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
126 Should be used in semantic actions, in grammars, to build a Semantic
127 parse tree."
128 (let* ((cooked (semantic--tag-expand raw-tag))
129 (l cooked))
130 (while l
131 (semantic--tag-put-property (car l) 'reparse-symbol $nterm)
132 (setq l (cdr l)))
133 cooked))
135 ;; Unmatched syntax collector
137 (defun wisent-collect-unmatched-syntax (nomatch)
138 "Add lexical token NOMATCH to the cache of unmatched tokens.
139 See also the variable `semantic-unmatched-syntax-cache'.
141 NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
142 and will be collected in `semantic-lex' form: (SYMBOL START . END)."
143 (let ((region (cddr nomatch)))
144 (and (number-or-marker-p (car region))
145 (number-or-marker-p (cdr region))
146 (setq semantic-unmatched-syntax-cache
147 (cons (cons (car nomatch) region)
148 semantic-unmatched-syntax-cache)))))
150 ;; Parser plug-ins
152 ;; The following functions permit to plug the Wisent LALR parser in
153 ;; Semantic toolkit. They use the standard API provided by Semantic
154 ;; to plug parsers in.
156 ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
158 ;; - `wisent-parse-stream' designed to override the standard function
159 ;; `semantic-parse-stream'.
161 ;; - `wisent-parse-region' designed to override the standard function
162 ;; `semantic-parse-region'.
164 ;; Maybe the latter is faster because it eliminates a lot of function
165 ;; call.
167 (defun wisent-parse-stream (stream goal)
168 "Parse STREAM using the Wisent LALR parser.
169 GOAL is a nonterminal symbol to start parsing at.
170 Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
171 elements of STREAM that have not been used. SEMANTIC-STREAM is the
172 list of semantic tags found.
173 The LALR parser automaton must be available in buffer local variable
174 `semantic--parse-table'.
176 Must be installed by `semantic-install-function-overrides' to override
177 the standard function `semantic-parse-stream'."
178 (let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
180 ;; IMPLEMENTATION NOTES:
181 ;; `wisent-parse' returns a lookahead token when it stopped
182 ;; parsing before encountering the end of input. To re-enter the
183 ;; parser it is necessary to push back in the lexical input stream
184 ;; the last lookahead token issued. Because the format of
185 ;; lookahead tokens and tokens in STREAM can be different the
186 ;; lookahead token is put in the variable `wisent-lex-lookahead'
187 ;; before calling `wisent-parse'. Wisent's lexers always pop the
188 ;; next lexical token from that variable when non nil, then from
189 ;; the lexical input stream.
191 ;; The first element of STREAM is used to keep lookahead tokens
192 ;; across successive calls to `wisent-parse-stream'. In fact
193 ;; what is kept is a stack of lookaheads encountered so far. It
194 ;; is cleared when `wisent-parse' returns a valid semantic tag,
195 ;; or twice the same lookahead token! The latter indicates that
196 ;; there is a syntax error on that token. If so, tokens currently
197 ;; in the lookahead stack have not been used, and are moved into
198 ;; `semantic-unmatched-syntax-cache'. When the parser will be
199 ;; re-entered, a new lexical token will be read from STREAM.
201 ;; The first element of STREAM that contains the lookahead stack
202 ;; has this format (compatible with the format of `semantic-lex'
203 ;; tokens):
205 ;; (LOOKAHEAD-STACK START . END)
207 ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
208 ;; START/END are the bounds of the lookahead at top of stack.
210 ;; Retrieve lookahead token from stack
211 (setq la-elt (car stream))
212 (if (consp (car la-elt))
213 ;; The first elt of STREAM contains a lookahead stack
214 (setq wisent-lex-lookahead (caar la-elt)
215 stream (cdr stream))
216 (setq la-elt nil))
217 ;; Parse
218 (setq wisent-lex-istream stream
219 cache (semantic-safe "wisent-parse-stream: %s"
220 (condition-case error-to-filter
221 (wisent-parse semantic--parse-table
222 wisent-lexer-function
223 wisent-error-function
224 goal)
225 (args-out-of-range
226 (if (and (not debug-on-error)
227 (= wisent-parse-max-stack-size
228 (nth 2 error-to-filter)))
229 (progn
230 (message "wisent-parse-stream: %s"
231 (error-message-string error-to-filter))
232 (message "wisent-parse-max-stack-size \
233 might need to be increased"))
234 (apply 'signal error-to-filter))))))
235 ;; Manage returned lookahead token
236 (if wisent-lookahead
237 (if (eq (caar la-elt) wisent-lookahead)
238 ;; It is already at top of lookahead stack
239 (progn
240 (setq cache nil
241 la-elt (car la-elt))
242 (while la-elt
243 ;; Collect unmatched tokens from the stack
244 (run-hook-with-args
245 'wisent-discarding-token-functions (car la-elt))
246 (setq la-elt (cdr la-elt))))
247 ;; New lookahead token
248 (if (or (consp cache) ;; Clear the stack if parse succeeded
249 (null la-elt))
250 (setq la-elt (cons nil nil)))
251 ;; Push it into the stack
252 (setcar la-elt (cons wisent-lookahead (car la-elt)))
253 ;; Update START/END
254 (setcdr la-elt (cddr wisent-lookahead))
255 ;; Push (LOOKAHEAD-STACK START . END) in STREAM
256 (setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
257 ;; Return (STREAM SEMANTIC-STREAM)
258 (list wisent-lex-istream
259 (if (consp cache) cache '(nil))
262 (defun wisent-parse-region (start end &optional goal depth returnonerror)
263 "Parse the area between START and END using the Wisent LALR parser.
264 Return the list of semantic tags found.
265 Optional arguments GOAL is a nonterminal symbol to start parsing at,
266 DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
267 stop parsing on syntax error, when non-nil.
268 The LALR parser automaton must be available in buffer local variable
269 `semantic--parse-table'.
271 Must be installed by `semantic-install-function-overrides' to override
272 the standard function `semantic-parse-region'."
273 (if (or (< start (point-min)) (> end (point-max)) (< end start))
274 (error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
275 start end))
276 (let* ((case-fold-search semantic-case-fold)
277 (wisent-lex-istream (semantic-lex start end depth))
278 ptree tag cooked lstack wisent-lex-lookahead)
279 ;; Loop while there are lexical tokens available
280 (while wisent-lex-istream
281 ;; Parse
282 (setq wisent-lex-lookahead (car lstack)
283 tag (semantic-safe "wisent-parse-region: %s"
284 (wisent-parse semantic--parse-table
285 wisent-lexer-function
286 wisent-error-function
287 goal)))
288 ;; Manage returned lookahead token
289 (if wisent-lookahead
290 (if (eq (car lstack) wisent-lookahead)
291 ;; It is already at top of lookahead stack
292 (progn
293 (setq tag nil)
294 (while lstack
295 ;; Collect unmatched tokens from lookahead stack
296 (run-hook-with-args
297 'wisent-discarding-token-functions (car lstack))
298 (setq lstack (cdr lstack))))
299 ;; Push new lookahead token into the stack
300 (setq lstack (cons wisent-lookahead lstack))))
301 ;; Manage the parser result
302 (cond
303 ;; Parse succeeded, cook result
304 ((consp tag)
305 (setq lstack nil ;; Clear the lookahead stack
306 cooked (semantic--tag-expand tag)
307 ptree (append cooked ptree))
308 (while cooked
309 (setq tag (car cooked)
310 cooked (cdr cooked))
311 (or (semantic--tag-get-property tag 'reparse-symbol)
312 (semantic--tag-put-property tag 'reparse-symbol goal)))
314 ;; Return on error if requested
315 (returnonerror
316 (setq wisent-lex-istream nil)
318 ;; Work in progress...
319 (if wisent-lex-istream
320 (and (eq semantic-working-type 'percent)
321 (boundp 'semantic--progress-reporter)
322 semantic--progress-reporter
323 (progress-reporter-update
324 semantic--progress-reporter
325 (/ (* 100 (semantic-lex-token-start
326 (car wisent-lex-istream)))
327 (point-max))))))
328 ;; Return parse tree
329 (nreverse ptree)))
331 ;;; Interfacing with edebug
333 (add-hook
334 'edebug-setup-hook
335 #'(lambda ()
337 (def-edebug-spec define-wisent-lexer
338 (&define name stringp def-body)
343 (provide 'semantic/wisent)
345 ;;; semantic/wisent.el ends here