Got rid of previous stuff and just imported mcc.
[shack.git] / mcc-0.5.4rta03 / front / generic / phobos_grammar.ml
blobeecabb065149840cd6b4ae048409361c8074b0c0
1 (*
2 * Grammar utilities.
3 * ----------------------------------------------------------------
5 * Copyright (C) 2001 Adam Granicz, Caltech
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Author: Adam Granicz
22 * Email: granicz@cs.caltech.edu
24 open Mc_string_util
26 open Phobos_type
27 open Phobos_constants
28 open Phobos_parse_state
29 open Phobos_exn
30 open Phobos_print
31 open Phobos_debug
32 open Phobos_util
33 open Phobos_rewrite
34 open Phobos_marshal
35 open Phobos_tokenizer
36 open Phobos_token_inheritance
38 (*****************************************************
39 * Associativity and precedence list.
41 * Check that it contains only valid terminals.
42 *****************************************************)
43 let rec check_terminals terminals = function
44 (s, pos) :: rest ->
45 if string_set_mem terminals s then
46 check_terminals terminals rest
47 else
48 begin
49 print_warning pos (string_format "undeclared terminal %s" s);
50 check_terminals terminals rest
51 end
52 | [] ->
55 let check_assocs gst =
56 List.iter (fun assoc ->
57 match assoc with
58 Dir_nonassoc lst
59 | Dir_leftassoc lst
60 | Dir_rightassoc lst ->
61 check_terminals gst.grammar_terminals lst) gst.grammar_assocs
63 let rec assoc_list_mem lst s =
64 match lst with
65 (s1, pos) :: rest ->
66 if s1 = s then
67 true
68 else
69 assoc_list_mem rest s
70 | [] ->
71 false
74 * Return priority (lowest being 1) of a token.
75 * For non-terminals, return -1.
77 let rec priority_of_aux s level = function
78 assoc :: rest ->
79 (match assoc with
80 Dir_nonassoc lst ->
81 if assoc_list_mem lst s then
82 level, Some NonAssoc
83 else
84 priority_of_aux s (level+1) rest
85 | Dir_leftassoc lst ->
86 if assoc_list_mem lst s then
87 level, Some LeftAssoc
88 else
89 priority_of_aux s (level+1) rest
90 | Dir_rightassoc lst ->
91 if assoc_list_mem lst s then
92 level, Some RightAssoc
93 else
94 priority_of_aux s (level+1) rest)
95 | [] ->
96 -1, None
98 let priority_of s assocs =
99 priority_of_aux s 1 assocs
101 let rec get_start_symbol = function
102 opt :: rest ->
103 (match opt with
104 Go_start s ->
105 NonTerminal s
106 | _ ->
107 get_start_symbol rest)
108 | [] ->
109 raise (PhobosException (bogus_pos, "no start symbol declared"))
112 * Convert from pre-grammar to grammar, and do some
113 * sanity checks.
115 let rec create_symbols terminals nonterminals = function
116 (head, pos) :: rest ->
117 let psym =
118 if head = "_" then
119 Empty, pos
120 else if string_set_mem terminals head then
121 Terminal head, pos
122 else if string_set_mem nonterminals head then
123 NonTerminal head, pos
124 else
125 raise (PhobosException (pos, (Printf.sprintf "undefined symbol [%s]" head)))
127 psym :: create_symbols terminals nonterminals rest
128 | [] ->
131 and create_grammar_aux gst = function
132 ((sym, pos), (prods, prec_opt, rewrites)) :: rest ->
133 (NonTerminal sym, pos, create_symbols gst.grammar_terminals gst.grammar_nonterminals prods,
134 prec_opt, List.rev rewrites) :: create_grammar_aux gst rest
135 | [] ->
138 and create_grammar gst pre_grammar =
139 List.rev (create_grammar_aux gst pre_grammar)
142 * Make sure that every symbol is either a variable (e.g.
143 * it has been defined as the head of a production), or a
144 * terminal symbol (e.g. there is a regexp for it).
145 * We do this by searching the list of terminals and
146 * non-terminals that we previously computed.
148 let rec check_productions terminals nonterminals = function
149 [(Eof, pos)]
150 | [(Empty, pos)] ->
152 | (Eof, pos) :: rest ->
153 raise (PhobosException (pos, "EOF can't be followed by any symbols"))
154 | (Empty, pos) :: rest ->
155 raise (PhobosException (pos, "'_' can't be followed by any symbols"))
156 | (psym, pos) :: rest ->
157 let sym = string_of_psymbol psym in
158 if string_set_mem nonterminals sym then
159 check_productions terminals nonterminals rest
160 else
161 if string_set_mem terminals sym then
162 check_productions terminals nonterminals rest
163 else
164 raise (PhobosException (pos, string_add ["unbound symbol ["; sym; "]"]))
165 | [] ->
168 and check_grammar gst =
169 List.iter (fun (_, _, prods, _, _) ->
170 check_productions gst.grammar_terminals gst.grammar_nonterminals prods) gst.grammar_grammar
172 let grammar_table_of_grammar (grammar: grammar) =
173 List.fold_left (fun grammar_table (psym, _, prods, prec_opt, rewrites) ->
174 let prods = List.map fst prods in
175 (* Do not add duplicate productions *)
176 let grammar_table = grammar_table_add_once grammar_table psym prods in
177 grammar_table) PSymbolMTable.empty grammar
180 * Augment grammar with a new start production S' -> S$.
182 let augment_grammar gst =
183 (* Leave if no start symbol is given *)
184 if gst.grammar_start_symbol = bogus_symbol then
186 else
187 (* We make up a position for the new start symbol *)
188 let new_production =
189 (global_start_symbol, bogus_pos, [(gst.grammar_start_symbol, bogus_pos); (Eof, bogus_pos)], None, [])
191 { grammar_nonterminals = gst.grammar_nonterminals;
192 grammar_terminals = gst.grammar_terminals;
193 grammar_assocs = gst.grammar_assocs;
194 grammar_token_rules = gst.grammar_token_rules;
195 grammar_start_symbol = gst.grammar_start_symbol;
196 grammar_grammar = new_production :: gst.grammar_grammar;
197 grammar_termsets = gst.grammar_termsets;
198 grammar_local_rewrites= gst.grammar_local_rewrites;
199 grammar_post_rewrites = gst.grammar_post_rewrites;
200 grammar_inline_forms = gst.grammar_inline_forms
203 let remove_start_productions gst =
204 (* Remove redefined start productions *)
205 if gst.grammar_start_symbol <> bogus_symbol then begin
206 let (grammar', _) = List.partition (fun (head_sym, _, _, _, _) ->
207 match head_sym with
208 NonTerminal s when s = global_start_string ->
209 false
210 | _ ->
211 true) gst.grammar_grammar
213 grammar'
214 end else
215 gst.grammar_grammar
218 * Include other semantic modules.
219 * For now, there are a couple bogus things in here.
220 * [regexps; assocs; grammar; ..]
222 let include_grammars paths includes =
223 let dummy_gst =
224 { grammar_nonterminals = StringSet.empty;
225 grammar_terminals = StringSet.empty;
226 grammar_assocs = [];
227 grammar_token_rules = [];
228 grammar_start_symbol = bogus_symbol;
229 grammar_grammar = [];
230 grammar_termsets = [];
231 grammar_local_rewrites = [];
232 grammar_post_rewrites = [];
233 grammar_inline_forms = []
236 let gst =
237 List.fold_left (fun gst incl ->
238 let gst', _, _, _ = load_grammar (find_file paths incl) in
239 (* Remove redefined start productions *)
240 let old_grammar = remove_start_productions gst in
241 (* Keep old start symbol if no new one is given *)
242 let start_symbol =
243 if gst'.grammar_start_symbol = bogus_symbol then
244 gst.grammar_start_symbol
245 else
246 gst'.grammar_start_symbol
248 let nt_set = gst.grammar_nonterminals in
249 let nt_set' = gst'.grammar_nonterminals in
250 let t_set = gst.grammar_terminals in
251 let t_set' = gst'.grammar_terminals in
252 { grammar_nonterminals = string_set_union nt_set nt_set';
253 grammar_terminals = string_set_union t_set t_set';
254 grammar_assocs = gst'.grammar_assocs;
255 grammar_token_rules = gst'.grammar_token_rules @ gst.grammar_token_rules;
256 grammar_start_symbol = start_symbol;
257 grammar_grammar = gst'.grammar_grammar @ old_grammar;
258 grammar_termsets = gst'.grammar_termsets @ gst.grammar_termsets;
259 grammar_local_rewrites = gst'.grammar_local_rewrites @ gst.grammar_local_rewrites;
260 grammar_post_rewrites = gst'.grammar_post_rewrites @ gst.grammar_post_rewrites;
261 grammar_inline_forms = gst'.grammar_inline_forms @ gst.grammar_inline_forms
262 }) dummy_gst includes
264 if !Fir_state.debug_phobos then begin
265 Format.print_string "After include_grammars: terminals = \n";
266 print_string_set gst.grammar_terminals;
267 Format.print_string "After include_grammars: nonterminals = \n";
268 print_string_set gst.grammar_nonterminals
269 end;
273 * Process fresh pre_grammar.
275 let compile paths
276 { phobos_module_name = module_name;
277 phobos_includes = includes;
278 phobos_termsets = termsets;
279 phobos_local_rewrites = local_pre_rewrites;
280 phobos_lexer_info = (token_rules, loptions);
281 phobos_assoc_info = assocs;
282 phobos_grammar_info = (pre_grammar, grammar_options);
283 phobos_post_rewrites = post_pre_rewrites;
284 phobos_inline_forms = inline_forms
286 debug_pre_grammar pre_grammar;
288 (* Produce terminal and nonterminal symbols *)
289 let terminals = List.map (fun (_, ((sym, _), _), _, _) -> sym) token_rules in
290 let nonterminals = List.map (fun ((sym, _), _) -> sym) pre_grammar in
292 (* Filter out duplicates *)
293 let terminals = string_set_of_list (List.sort compare terminals) in
294 let nonterminals = string_set_of_list (List.sort compare nonterminals) in
295 debug_symbols "Symbols defined in this module (with no duplicates):\n" nonterminals terminals;
297 (* Import necessary semantic modules. *)
298 let gst = include_grammars paths includes in
300 (* Do we redefine disambiguation rules? *)
301 let assocs =
302 match assocs with
303 [] ->
304 gst.grammar_assocs
305 | _ ->
306 assocs
309 (* Retrieve start symbol *)
310 let start_symbol, ss_is_defined =
312 (* Do we have a new start symbol? *)
313 get_start_symbol grammar_options, true
314 with
315 Phobos_exn.PhobosException _ ->
316 let old_symbol = gst.grammar_start_symbol in
317 if old_symbol <> bogus_symbol then
318 old_symbol, true
319 else
320 bogus_symbol, false
323 debug_symbols "After joining with inherited grammars\n"
324 (string_set_union gst.grammar_nonterminals nonterminals)
325 (string_set_union gst.grammar_terminals terminals);
327 (* Add terminal and nonterminal symbols, and remove start production *)
328 let gst =
329 { grammar_nonterminals = string_set_union gst.grammar_nonterminals nonterminals;
330 grammar_terminals = string_set_union gst.grammar_terminals terminals;
331 grammar_assocs = gst.grammar_assocs;
332 grammar_token_rules = gst.grammar_token_rules;
333 grammar_start_symbol = gst.grammar_start_symbol;
334 grammar_grammar = remove_start_productions gst;
335 grammar_termsets = gst.grammar_termsets;
336 grammar_local_rewrites= gst.grammar_local_rewrites;
337 grammar_post_rewrites = gst.grammar_post_rewrites;
338 grammar_inline_forms = gst.grammar_inline_forms
342 (* Create grammar from pre_grammar *)
343 let grammar = create_grammar gst pre_grammar in
345 (* Update grammar information if necessary *)
346 let gst =
347 { grammar_nonterminals = string_set_union gst.grammar_nonterminals nonterminals;
348 grammar_terminals = string_set_union gst.grammar_terminals terminals;
349 grammar_assocs = assocs;
350 grammar_token_rules = gst.grammar_token_rules @ token_rules;
351 grammar_start_symbol = start_symbol;
352 grammar_grammar = gst.grammar_grammar @ grammar;
353 grammar_termsets = gst.grammar_termsets @ termsets;
354 grammar_local_rewrites= gst.grammar_local_rewrites @ local_pre_rewrites;
355 grammar_post_rewrites = gst.grammar_post_rewrites @ post_pre_rewrites;
356 grammar_inline_forms = gst.grammar_inline_forms @ inline_forms
360 (* Augment grammar with new start production *)
361 let gst = augment_grammar gst in
363 (* Check associativity list *)
364 check_assocs gst;
366 (* Do sanity-check on grammar *)
367 check_grammar gst;
369 (* Create lexer environment *)
370 let lenv = create_lenv gst loptions in
372 (* Apply token inheritance rules (-extend/-override/-remove) *)
373 let gst, lenv = apply_token_inheritance gst lenv in
375 (* Are we applying the token rules right? *)
376 debug_regexps lenv.lexer_regexps;
378 debug_token_rules "\nWe have the following lexer-rewrites:\n" gst.grammar_token_rules;
379 gst, module_name, lenv, ss_is_defined