3 * ----------------------------------------------------------------
5 * Copyright (C) 2001 Adam Granicz, Caltech
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Author: Adam Granicz
22 * Email: granicz@cs.caltech.edu
28 open Phobos_parse_state
36 open Phobos_token_inheritance
38 (*****************************************************
39 * Associativity and precedence list.
41 * Check that it contains only valid terminals.
42 *****************************************************)
43 let rec check_terminals terminals
= function
45 if string_set_mem terminals s
then
46 check_terminals terminals rest
49 print_warning pos
(string_format
"undeclared terminal %s" s
);
50 check_terminals terminals rest
55 let check_assocs gst
=
56 List.iter
(fun assoc
->
60 | Dir_rightassoc lst
->
61 check_terminals gst
.grammar_terminals lst
) gst
.grammar_assocs
63 let rec assoc_list_mem lst s
=
74 * Return priority (lowest being 1) of a token.
75 * For non-terminals, return -1.
77 let rec priority_of_aux s level
= function
81 if assoc_list_mem lst s
then
84 priority_of_aux s
(level
+1) rest
85 | Dir_leftassoc lst
->
86 if assoc_list_mem lst s
then
89 priority_of_aux s
(level
+1) rest
90 | Dir_rightassoc lst
->
91 if assoc_list_mem lst s
then
92 level
, Some RightAssoc
94 priority_of_aux s
(level
+1) rest
)
98 let priority_of s assocs
=
99 priority_of_aux s
1 assocs
101 let rec get_start_symbol = function
107 get_start_symbol rest
)
109 raise
(PhobosException
(bogus_pos
, "no start symbol declared"))
112 * Convert from pre-grammar to grammar, and do some
115 let rec create_symbols terminals nonterminals
= function
116 (head
, pos
) :: rest
->
120 else if string_set_mem terminals head
then
122 else if string_set_mem nonterminals head
then
123 NonTerminal head
, pos
125 raise
(PhobosException
(pos
, (Printf.sprintf
"undefined symbol [%s]" head
)))
127 psym :: create_symbols terminals nonterminals rest
131 and create_grammar_aux gst
= function
132 ((sym
, pos
), (prods
, prec_opt
, rewrites
)) :: rest
->
133 (NonTerminal sym
, pos
, create_symbols gst
.grammar_terminals gst
.grammar_nonterminals prods
,
134 prec_opt
, List.rev rewrites
) :: create_grammar_aux gst rest
138 and create_grammar gst pre_grammar
=
139 List.rev
(create_grammar_aux gst pre_grammar
)
142 * Make sure that every symbol is either a variable (e.g.
143 * it has been defined as the head of a production), or a
144 * terminal symbol (e.g. there is a regexp for it).
145 * We do this by searching the list of terminals and
146 * non-terminals that we previously computed.
148 let rec check_productions terminals nonterminals
= function
152 | (Eof
, pos
) :: rest
->
153 raise
(PhobosException
(pos
, "EOF can't be followed by any symbols"))
154 | (Empty
, pos
) :: rest
->
155 raise
(PhobosException
(pos
, "'_' can't be followed by any symbols"))
156 | (psym, pos
) :: rest
->
157 let sym = string_of_psymbol
psym in
158 if string_set_mem nonterminals
sym then
159 check_productions terminals nonterminals rest
161 if string_set_mem terminals
sym then
162 check_productions terminals nonterminals rest
164 raise
(PhobosException
(pos
, string_add
["unbound symbol ["; sym; "]"]))
168 and check_grammar gst
=
169 List.iter
(fun (_
, _
, prods
, _
, _
) ->
170 check_productions gst
.grammar_terminals gst
.grammar_nonterminals prods
) gst
.grammar_grammar
172 let grammar_table_of_grammar (grammar
: grammar
) =
173 List.fold_left
(fun grammar_table
(psym, _
, prods
, prec_opt
, rewrites
) ->
174 let prods = List.map fst
prods in
175 (* Do not add duplicate productions *)
176 let grammar_table = grammar_table_add_once
grammar_table psym prods in
177 grammar_table) PSymbolMTable.empty grammar
180 * Augment grammar with a new start production S' -> S$.
182 let augment_grammar gst
=
183 (* Leave if no start symbol is given *)
184 if gst
.grammar_start_symbol
= bogus_symbol
then
187 (* We make up a position for the new start symbol *)
189 (global_start_symbol
, bogus_pos
, [(gst
.grammar_start_symbol
, bogus_pos
); (Eof
, bogus_pos
)], None
, [])
191 { grammar_nonterminals
= gst
.grammar_nonterminals
;
192 grammar_terminals
= gst
.grammar_terminals
;
193 grammar_assocs
= gst
.grammar_assocs
;
194 grammar_token_rules
= gst
.grammar_token_rules
;
195 grammar_start_symbol
= gst
.grammar_start_symbol
;
196 grammar_grammar
= new_production :: gst
.grammar_grammar
;
197 grammar_termsets
= gst
.grammar_termsets
;
198 grammar_local_rewrites
= gst
.grammar_local_rewrites
;
199 grammar_post_rewrites
= gst
.grammar_post_rewrites
;
200 grammar_inline_forms
= gst
.grammar_inline_forms
203 let remove_start_productions gst
=
204 (* Remove redefined start productions *)
205 if gst
.grammar_start_symbol
<> bogus_symbol
then begin
206 let (grammar'
, _
) = List.partition
(fun (head_sym
, _
, _
, _
, _
) ->
208 NonTerminal s
when s
= global_start_string
->
211 true) gst
.grammar_grammar
218 * Include other semantic modules.
219 * For now, there are a couple bogus things in here.
220 * [regexps; assocs; grammar; ..]
222 let include_grammars paths includes
=
224 { grammar_nonterminals
= StringSet.empty
;
225 grammar_terminals
= StringSet.empty
;
227 grammar_token_rules
= [];
228 grammar_start_symbol
= bogus_symbol
;
229 grammar_grammar
= [];
230 grammar_termsets
= [];
231 grammar_local_rewrites
= [];
232 grammar_post_rewrites
= [];
233 grammar_inline_forms
= []
237 List.fold_left
(fun gst incl
->
238 let gst'
, _
, _
, _
= load_grammar
(find_file paths incl
) in
239 (* Remove redefined start productions *)
240 let old_grammar = remove_start_productions gst in
241 (* Keep old start symbol if no new one is given *)
243 if gst'
.grammar_start_symbol
= bogus_symbol
then
244 gst.grammar_start_symbol
246 gst'
.grammar_start_symbol
248 let nt_set = gst.grammar_nonterminals
in
249 let nt_set'
= gst'
.grammar_nonterminals
in
250 let t_set = gst.grammar_terminals
in
251 let t_set'
= gst'
.grammar_terminals
in
252 { grammar_nonterminals
= string_set_union
nt_set nt_set'
;
253 grammar_terminals
= string_set_union
t_set t_set'
;
254 grammar_assocs
= gst'
.grammar_assocs
;
255 grammar_token_rules
= gst'
.grammar_token_rules
@ gst.grammar_token_rules
;
256 grammar_start_symbol
= start_symbol;
257 grammar_grammar
= gst'
.grammar_grammar
@ old_grammar;
258 grammar_termsets
= gst'
.grammar_termsets
@ gst.grammar_termsets
;
259 grammar_local_rewrites
= gst'
.grammar_local_rewrites
@ gst.grammar_local_rewrites
;
260 grammar_post_rewrites
= gst'
.grammar_post_rewrites
@ gst.grammar_post_rewrites
;
261 grammar_inline_forms
= gst'
.grammar_inline_forms
@ gst.grammar_inline_forms
262 }) dummy_gst includes
264 if !Fir_state.debug_phobos
then begin
265 Format.print_string
"After include_grammars: terminals = \n";
266 print_string_set
gst.grammar_terminals
;
267 Format.print_string
"After include_grammars: nonterminals = \n";
268 print_string_set
gst.grammar_nonterminals
273 * Process fresh pre_grammar.
276 { phobos_module_name
= module_name
;
277 phobos_includes
= includes
;
278 phobos_termsets
= termsets
;
279 phobos_local_rewrites
= local_pre_rewrites
;
280 phobos_lexer_info
= (token_rules
, loptions
);
281 phobos_assoc_info
= assocs
;
282 phobos_grammar_info
= (pre_grammar
, grammar_options
);
283 phobos_post_rewrites
= post_pre_rewrites
;
284 phobos_inline_forms
= inline_forms
286 debug_pre_grammar pre_grammar
;
288 (* Produce terminal and nonterminal symbols *)
289 let terminals = List.map
(fun (_
, ((sym, _
), _
), _
, _
) -> sym) token_rules
in
290 let nonterminals = List.map
(fun ((sym, _
), _
) -> sym) pre_grammar
in
292 (* Filter out duplicates *)
293 let terminals = string_set_of_list
(List.sort compare
terminals) in
294 let nonterminals = string_set_of_list
(List.sort compare
nonterminals) in
295 debug_symbols
"Symbols defined in this module (with no duplicates):\n" nonterminals terminals;
297 (* Import necessary semantic modules. *)
298 let gst = include_grammars paths includes
in
300 (* Do we redefine disambiguation rules? *)
309 (* Retrieve start symbol *)
310 let start_symbol, ss_is_defined
=
312 (* Do we have a new start symbol? *)
313 get_start_symbol grammar_options
, true
315 Phobos_exn.PhobosException _
->
316 let old_symbol = gst.grammar_start_symbol
in
317 if old_symbol <> bogus_symbol
then
323 debug_symbols
"After joining with inherited grammars\n"
324 (string_set_union
gst.grammar_nonterminals
nonterminals)
325 (string_set_union
gst.grammar_terminals
terminals);
327 (* Add terminal and nonterminal symbols, and remove start production *)
329 { grammar_nonterminals
= string_set_union
gst.grammar_nonterminals
nonterminals;
330 grammar_terminals
= string_set_union
gst.grammar_terminals
terminals;
331 grammar_assocs
= gst.grammar_assocs
;
332 grammar_token_rules
= gst.grammar_token_rules
;
333 grammar_start_symbol
= gst.grammar_start_symbol
;
334 grammar_grammar
= remove_start_productions gst;
335 grammar_termsets
= gst.grammar_termsets
;
336 grammar_local_rewrites
= gst.grammar_local_rewrites
;
337 grammar_post_rewrites
= gst.grammar_post_rewrites
;
338 grammar_inline_forms
= gst.grammar_inline_forms
342 (* Create grammar from pre_grammar *)
343 let grammar = create_grammar
gst pre_grammar
in
345 (* Update grammar information if necessary *)
347 { grammar_nonterminals
= string_set_union
gst.grammar_nonterminals
nonterminals;
348 grammar_terminals
= string_set_union
gst.grammar_terminals
terminals;
349 grammar_assocs
= assocs;
350 grammar_token_rules
= gst.grammar_token_rules
@ token_rules
;
351 grammar_start_symbol
= start_symbol;
352 grammar_grammar
= gst.grammar_grammar
@ grammar;
353 grammar_termsets
= gst.grammar_termsets
@ termsets
;
354 grammar_local_rewrites
= gst.grammar_local_rewrites
@ local_pre_rewrites
;
355 grammar_post_rewrites
= gst.grammar_post_rewrites
@ post_pre_rewrites
;
356 grammar_inline_forms
= gst.grammar_inline_forms
@ inline_forms
360 (* Augment grammar with new start production *)
361 let gst = augment_grammar gst in
363 (* Check associativity list *)
366 (* Do sanity-check on grammar *)
369 (* Create lexer environment *)
370 let lenv = create_lenv
gst loptions
in
372 (* Apply token inheritance rules (-extend/-override/-remove) *)
373 let gst, lenv = apply_token_inheritance
gst lenv in
375 (* Are we applying the token rules right? *)
376 debug_regexps
lenv.lexer_regexps
;
378 debug_token_rules
"\nWe have the following lexer-rewrites:\n" gst.grammar_token_rules
;
379 gst, module_name
, lenv, ss_is_defined