Nuke arch-tags.
[emacs.git] / lisp / cedet / semantic / wisent.el
blob5135ca06680d188db7165c6ee02579caad4cfe6b
1 ;;; semantic/wisent.el --- Wisent - Semantic gateway
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2011
4 ;; Free Software Foundation, Inc.
6 ;; Author: David Ponce <david@dponce.com>
7 ;; Maintainer: David Ponce <david@dponce.com>
8 ;; Created: 30 Aug 2001
9 ;; Keywords: syntax
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;;; Commentary:
28 ;; Here are functions necessary to use the Wisent LALR parser from
29 ;; Semantic environment.
31 ;;; History:
34 ;;; Code:
36 (require 'semantic)
37 (require 'semantic/wisent/wisent)
39 ;;; Lexical analysis
41 (defvar wisent-lex-istream nil
42 "Input stream of `semantic-lex' syntactic tokens.")
44 (defvar wisent-lex-lookahead nil
45 "Extra lookahead token.
46 When non-nil it is directly returned by `wisent-lex-function'.")
48 ;; Maintain this alias for compatibility until all WY grammars have
49 ;; been translated again to Elisp code.
50 (semantic-alias-obsolete 'wisent-lex-make-token-table
51 'semantic-lex-make-type-table "23.2")
53 (defmacro wisent-lex-eoi ()
54 "Return an End-Of-Input lexical token.
55 The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
56 `(cons ',wisent-eoi-term
57 (cons ""
58 (cons (point-max) (point-max)))))
60 (defmacro define-wisent-lexer (name doc &rest body)
61 "Create a new lexical analyzer with NAME.
62 DOC is a documentation string describing this analyzer.
63 When a token is available in `wisent-lex-istream', eval BODY forms
64 sequentially. BODY must return a lexical token for the LALR parser.
66 Each token in input was produced by `semantic-lex', it is a list:
68 (TOKSYM START . END)
70 TOKSYM is a terminal symbol used in the grammar.
71 START and END mark boundary in the current buffer of that token's
72 value.
74 Returned tokens must have the form:
76 (TOKSYM VALUE START . END)
78 where VALUE is the buffer substring between START and END positions."
79 `(defun
80 ,name () ,doc
81 (cond
82 (wisent-lex-lookahead
83 (prog1 wisent-lex-lookahead
84 (setq wisent-lex-lookahead nil)))
85 (wisent-lex-istream
86 ,@body)
87 ((wisent-lex-eoi)))))
89 (define-wisent-lexer wisent-lex
90 "Return the next available lexical token in Wisent's form.
91 The variable `wisent-lex-istream' contains the list of lexical tokens
92 produced by `semantic-lex'. Pop the next token available and convert
93 it to a form suitable for the Wisent's parser."
94 (let* ((tk (car wisent-lex-istream)))
95 ;; Eat input stream
96 (setq wisent-lex-istream (cdr wisent-lex-istream))
97 (cons (semantic-lex-token-class tk)
98 (cons (semantic-lex-token-text tk)
99 (semantic-lex-token-bounds tk)))))
101 ;;; Syntax analysis
103 (defvar wisent-error-function nil
104 "Function used to report parse error.
105 By default use the function `wisent-message'.")
106 (make-variable-buffer-local 'wisent-error-function)
108 (defvar wisent-lexer-function 'wisent-lex
109 "Function used to obtain the next lexical token in input.
110 Should be a lexical analyzer created with `define-wisent-lexer'.")
111 (make-variable-buffer-local 'wisent-lexer-function)
113 ;; Tag production
115 (defsubst wisent-raw-tag (semantic-tag)
116 "Return raw form of given Semantic tag SEMANTIC-TAG.
117 Should be used in semantic actions, in grammars, to build a Semantic
118 parse tree."
119 (nconc semantic-tag
120 (if (or $region
121 (setq $region (nthcdr 2 wisent-input)))
122 (list (car $region) (cdr $region))
123 (list (point-max) (point-max)))))
125 (defsubst wisent-cook-tag (raw-tag)
126 "From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
127 Should be used in semantic actions, in grammars, to build a Semantic
128 parse tree."
129 (let* ((cooked (semantic--tag-expand raw-tag))
130 (l cooked))
131 (while l
132 (semantic--tag-put-property (car l) 'reparse-symbol $nterm)
133 (setq l (cdr l)))
134 cooked))
136 ;; Unmatched syntax collector
138 (defun wisent-collect-unmatched-syntax (nomatch)
139 "Add lexical token NOMATCH to the cache of unmatched tokens.
140 See also the variable `semantic-unmatched-syntax-cache'.
142 NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
143 and will be collected in `semantic-lex' form: (SYMBOL START . END)."
144 (let ((region (cddr nomatch)))
145 (and (number-or-marker-p (car region))
146 (number-or-marker-p (cdr region))
147 (setq semantic-unmatched-syntax-cache
148 (cons (cons (car nomatch) region)
149 semantic-unmatched-syntax-cache)))))
151 ;; Parser plug-ins
153 ;; The following functions permit to plug the Wisent LALR parser in
154 ;; Semantic toolkit. They use the standard API provided by Semantic
155 ;; to plug parsers in.
157 ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
159 ;; - `wisent-parse-stream' designed to override the standard function
160 ;; `semantic-parse-stream'.
162 ;; - `wisent-parse-region' designed to override the standard function
163 ;; `semantic-parse-region'.
165 ;; Maybe the latter is faster because it eliminates a lot of function
166 ;; call.
168 (defun wisent-parse-stream (stream goal)
169 "Parse STREAM using the Wisent LALR parser.
170 GOAL is a nonterminal symbol to start parsing at.
171 Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
172 elements of STREAM that have not been used. SEMANTIC-STREAM is the
173 list of semantic tags found.
174 The LALR parser automaton must be available in buffer local variable
175 `semantic--parse-table'.
177 Must be installed by `semantic-install-function-overrides' to override
178 the standard function `semantic-parse-stream'."
179 (let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
181 ;; IMPLEMENTATION NOTES:
182 ;; `wisent-parse' returns a lookahead token when it stopped
183 ;; parsing before encountering the end of input. To re-enter the
184 ;; parser it is necessary to push back in the lexical input stream
185 ;; the last lookahead token issued. Because the format of
186 ;; lookahead tokens and tokens in STREAM can be different the
187 ;; lookahead token is put in the variable `wisent-lex-lookahead'
188 ;; before calling `wisent-parse'. Wisent's lexers always pop the
189 ;; next lexical token from that variable when non nil, then from
190 ;; the lexical input stream.
192 ;; The first element of STREAM is used to keep lookahead tokens
193 ;; across successive calls to `wisent-parse-stream'. In fact
194 ;; what is kept is a stack of lookaheads encountered so far. It
195 ;; is cleared when `wisent-parse' returns a valid semantic tag,
196 ;; or twice the same lookahead token! The latter indicates that
197 ;; there is a syntax error on that token. If so, tokens currently
198 ;; in the lookahead stack have not been used, and are moved into
199 ;; `semantic-unmatched-syntax-cache'. When the parser will be
200 ;; re-entered, a new lexical token will be read from STREAM.
202 ;; The first element of STREAM that contains the lookahead stack
203 ;; has this format (compatible with the format of `semantic-lex'
204 ;; tokens):
206 ;; (LOOKAHEAD-STACK START . END)
208 ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
209 ;; START/END are the bounds of the lookahead at top of stack.
211 ;; Retrieve lookahead token from stack
212 (setq la-elt (car stream))
213 (if (consp (car la-elt))
214 ;; The first elt of STREAM contains a lookahead stack
215 (setq wisent-lex-lookahead (caar la-elt)
216 stream (cdr stream))
217 (setq la-elt nil))
218 ;; Parse
219 (setq wisent-lex-istream stream
220 cache (semantic-safe "wisent-parse-stream: %s"
221 (condition-case error-to-filter
222 (wisent-parse semantic--parse-table
223 wisent-lexer-function
224 wisent-error-function
225 goal)
226 (args-out-of-range
227 (if (and (not debug-on-error)
228 (= wisent-parse-max-stack-size
229 (nth 2 error-to-filter)))
230 (progn
231 (message "wisent-parse-stream: %s"
232 (error-message-string error-to-filter))
233 (message "wisent-parse-max-stack-size \
234 might need to be increased"))
235 (apply 'signal error-to-filter))))))
236 ;; Manage returned lookahead token
237 (if wisent-lookahead
238 (if (eq (caar la-elt) wisent-lookahead)
239 ;; It is already at top of lookahead stack
240 (progn
241 (setq cache nil
242 la-elt (car la-elt))
243 (while la-elt
244 ;; Collect unmatched tokens from the stack
245 (run-hook-with-args
246 'wisent-discarding-token-functions (car la-elt))
247 (setq la-elt (cdr la-elt))))
248 ;; New lookahead token
249 (if (or (consp cache) ;; Clear the stack if parse succeeded
250 (null la-elt))
251 (setq la-elt (cons nil nil)))
252 ;; Push it into the stack
253 (setcar la-elt (cons wisent-lookahead (car la-elt)))
254 ;; Update START/END
255 (setcdr la-elt (cddr wisent-lookahead))
256 ;; Push (LOOKAHEAD-STACK START . END) in STREAM
257 (setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
258 ;; Return (STREAM SEMANTIC-STREAM)
259 (list wisent-lex-istream
260 (if (consp cache) cache '(nil))
263 (defun wisent-parse-region (start end &optional goal depth returnonerror)
264 "Parse the area between START and END using the Wisent LALR parser.
265 Return the list of semantic tags found.
266 Optional arguments GOAL is a nonterminal symbol to start parsing at,
267 DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
268 stop parsing on syntax error, when non-nil.
269 The LALR parser automaton must be available in buffer local variable
270 `semantic--parse-table'.
272 Must be installed by `semantic-install-function-overrides' to override
273 the standard function `semantic-parse-region'."
274 (if (or (< start (point-min)) (> end (point-max)) (< end start))
275 (error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
276 start end))
277 (let* ((case-fold-search semantic-case-fold)
278 (wisent-lex-istream (semantic-lex start end depth))
279 ptree tag cooked lstack wisent-lex-lookahead)
280 ;; Loop while there are lexical tokens available
281 (while wisent-lex-istream
282 ;; Parse
283 (setq wisent-lex-lookahead (car lstack)
284 tag (semantic-safe "wisent-parse-region: %s"
285 (wisent-parse semantic--parse-table
286 wisent-lexer-function
287 wisent-error-function
288 goal)))
289 ;; Manage returned lookahead token
290 (if wisent-lookahead
291 (if (eq (car lstack) wisent-lookahead)
292 ;; It is already at top of lookahead stack
293 (progn
294 (setq tag nil)
295 (while lstack
296 ;; Collect unmatched tokens from lookahead stack
297 (run-hook-with-args
298 'wisent-discarding-token-functions (car lstack))
299 (setq lstack (cdr lstack))))
300 ;; Push new lookahead token into the stack
301 (setq lstack (cons wisent-lookahead lstack))))
302 ;; Manage the parser result
303 (cond
304 ;; Parse succeeded, cook result
305 ((consp tag)
306 (setq lstack nil ;; Clear the lookahead stack
307 cooked (semantic--tag-expand tag)
308 ptree (append cooked ptree))
309 (while cooked
310 (setq tag (car cooked)
311 cooked (cdr cooked))
312 (or (semantic--tag-get-property tag 'reparse-symbol)
313 (semantic--tag-put-property tag 'reparse-symbol goal)))
315 ;; Return on error if requested
316 (returnonerror
317 (setq wisent-lex-istream nil)
319 ;; Work in progress...
320 (if wisent-lex-istream
321 (and (eq semantic-working-type 'percent)
322 (boundp 'semantic--progress-reporter)
323 semantic--progress-reporter
324 (progress-reporter-update
325 semantic--progress-reporter
326 (/ (* 100 (semantic-lex-token-start
327 (car wisent-lex-istream)))
328 (point-max))))))
329 ;; Return parse tree
330 (nreverse ptree)))
332 ;;; Interfacing with edebug
334 (add-hook
335 'edebug-setup-hook
336 #'(lambda ()
338 (def-edebug-spec define-wisent-lexer
339 (&define name stringp def-body)
344 (provide 'semantic/wisent)
346 ;;; semantic/wisent.el ends here