Imported Upstream version 6.33.1~b2+dfsg.1
[debian_inform6.git] / src / syntax.c
blob329abdd830bea79cb7511bc8bc905c4bf1b258cf
1 /* ------------------------------------------------------------------------- */
2 /* "syntax" : Syntax analyser and compiler */
3 /* */
4 /* Part of Inform 6.33 */
5 /* copyright (c) Graham Nelson 1993 - 2014 */
6 /* */
7 /* ------------------------------------------------------------------------- */
9 #include "header.h"
11 static char *lexical_source;
13 int no_syntax_lines; /* Syntax line count */
15 static void begin_syntax_line(int statement_mode)
16 { no_syntax_lines++;
17 next_token_begins_syntax_line = TRUE;
19 clear_expression_space();
20 if (statement_mode)
21 { statements.enabled = TRUE;
22 conditions.enabled = TRUE;
23 local_variables.enabled = TRUE;
24 system_functions.enabled = TRUE;
26 misc_keywords.enabled = FALSE;
27 directive_keywords.enabled = FALSE;
28 directives.enabled = FALSE;
29 segment_markers.enabled = FALSE;
30 opcode_names.enabled = FALSE;
32 else
33 { directives.enabled = TRUE;
34 segment_markers.enabled = TRUE;
36 statements.enabled = FALSE;
37 misc_keywords.enabled = FALSE;
38 directive_keywords.enabled = FALSE;
39 local_variables.enabled = FALSE;
40 system_functions.enabled = FALSE;
41 conditions.enabled = FALSE;
42 opcode_names.enabled = FALSE;
45 sequence_point_follows = TRUE;
47 if (debugfile_switch)
48 { get_next_token();
49 statement_debug_location = get_token_location();
50 put_token_back();
54 extern void panic_mode_error_recovery(void)
56 /* Consume tokens until the next semicolon (or end of file).
57 This is typically called after a syntax error, in hopes of
58 getting parsing back on track. */
60 while ((token_type != EOF_TT)
61 && ((token_type != SEP_TT)||(token_value != SEMICOLON_SEP)))
63 get_next_token();
66 extern void get_next_token_with_directives(void)
68 /* A higher-level version of get_next_token(), which detects and
69 obeys directives such as #ifdef/#ifnot/#endif. (The # sign is
70 required in this case.)
72 This is called while parsing a long construct, such as Class or
73 Object, where we want to support internal #ifdefs. (Although
74 function-parsing predates this and doesn't make use of it.)
76 (Technically this permits *any* #-directive, which means you
77 can define global variables or properties or what-have-you in
78 the middle of an object. You can do that in the middle of an
79 object, too. Don't. It's about as well-supported as Wile E.
80 Coyote one beat before the plummet-lines kick in.) */
82 int directives_save, segment_markers_save, statements_save;
84 while (TRUE)
86 get_next_token();
88 /* If the first token is not a '#', return it directly. */
89 if ((token_type != SEP_TT) || (token_value != HASH_SEP))
90 return;
92 /* Save the lexer flags, and set up for directive parsing. */
93 directives_save = directives.enabled;
94 segment_markers_save = segment_markers.enabled;
95 statements_save = statements.enabled;
97 directives.enabled = TRUE;
98 segment_markers.enabled = FALSE;
99 statements.enabled = FALSE;
100 conditions.enabled = FALSE;
101 local_variables.enabled = FALSE;
102 misc_keywords.enabled = FALSE;
103 system_functions.enabled = FALSE;
105 get_next_token();
107 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
108 { error("It is illegal to nest a routine inside an object using '#['");
109 return;
112 if (token_type == DIRECTIVE_TT)
113 parse_given_directive(TRUE);
114 else
115 { ebf_error("directive", token_text);
116 return;
119 /* Restore all the lexer flags. (We are squashing several of them
120 into a single save variable, which I think is safe because that's
121 what CKnight did.)
123 directive_keywords.enabled = FALSE;
124 directives.enabled = directives_save;
125 segment_markers.enabled = segment_markers_save;
126 statements.enabled =
127 conditions.enabled =
128 local_variables.enabled =
129 misc_keywords.enabled =
130 system_functions.enabled = statements_save;
134 extern void parse_program(char *source)
136 lexical_source = source;
137 while (parse_directive(FALSE)) ;
140 extern int parse_directive(int internal_flag)
142 /* Internal_flag is FALSE if the directive is encountered normally,
143 TRUE if encountered with a # prefix inside a routine or object
144 definition.
146 Returns: TRUE if program continues, FALSE if end of file reached. */
148 int routine_symbol, rep_symbol;
149 int is_renamed;
151 begin_syntax_line(FALSE);
152 get_next_token();
154 if (token_type == EOF_TT) return(FALSE);
156 if ((token_type == SEP_TT) && (token_value == HASH_SEP))
157 get_next_token();
159 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
160 { if (internal_flag)
161 { error("It is illegal to nest routines using '#['");
162 return(TRUE);
165 directives.enabled = FALSE;
166 directive_keywords.enabled = FALSE;
167 segment_markers.enabled = FALSE;
169 /* The upcoming symbol is a definition; don't count it as a
170 top-level reference *to* the function. */
171 df_dont_note_global_symbols = TRUE;
172 get_next_token();
173 df_dont_note_global_symbols = FALSE;
174 if ((token_type != SYMBOL_TT)
175 || ((!(sflags[token_value] & UNKNOWN_SFLAG))
176 && (!(sflags[token_value] & REPLACE_SFLAG))))
177 { ebf_error("routine name", token_text);
178 return(FALSE);
181 routine_symbol = token_value;
183 rep_symbol = routine_symbol;
184 is_renamed = find_symbol_replacement(&rep_symbol);
186 if ((sflags[routine_symbol] & REPLACE_SFLAG)
187 && !is_renamed && (is_systemfile()))
188 { /* The function is definitely being replaced (system_file
189 always loses priority in a replacement) but is not
190 being renamed to something else. Skip its definition
191 entirely. */
192 dont_enter_into_symbol_table = TRUE;
194 { get_next_token();
195 } while (!((token_type == EOF_TT)
196 || ((token_type==SEP_TT)
197 && (token_value==CLOSE_SQUARE_SEP))));
198 dont_enter_into_symbol_table = FALSE;
199 if (token_type == EOF_TT) return FALSE;
201 else
202 { /* Parse the function definition and assign its symbol. */
203 assign_symbol(routine_symbol,
204 parse_routine(lexical_source, FALSE,
205 (char *) symbs[routine_symbol], FALSE, routine_symbol),
206 ROUTINE_T);
207 slines[routine_symbol] = routine_starts_line;
210 if (is_renamed) {
211 /* This function was subject to a "Replace X Y" directive.
212 The first time we see a definition for symbol X, we
213 copy it to Y -- that's the "original" form of the
214 function. */
215 if (svals[rep_symbol] == 0) {
216 assign_symbol(rep_symbol, svals[routine_symbol], ROUTINE_T);
220 get_next_token();
221 if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
222 { ebf_error("';' after ']'", token_text);
223 put_token_back();
225 return TRUE;
228 if ((token_type == SYMBOL_TT) && (stypes[token_value] == CLASS_T))
229 { if (internal_flag)
230 { error("It is illegal to nest an object in a routine using '#classname'");
231 return(TRUE);
233 sflags[token_value] |= USED_SFLAG;
234 make_object(FALSE, NULL, -1, -1, svals[token_value]);
235 return TRUE;
238 if (token_type != DIRECTIVE_TT)
239 { /* If we're internal, we expect only a directive here. If
240 we're top-level, the possibilities are broader. */
241 if (internal_flag)
242 ebf_error("directive", token_text);
243 else
244 ebf_error("directive, '[' or class name", token_text);
245 panic_mode_error_recovery();
246 return TRUE;
249 return !(parse_given_directive(internal_flag));
252 static int switch_sign(void)
254 if ((token_type == SEP_TT)&&(token_value == COLON_SEP)) return 1;
255 if ((token_type == SEP_TT)&&(token_value == COMMA_SEP)) return 2;
256 if ((token_type==MISC_KEYWORD_TT)&&(token_value==TO_MK)) return 3;
257 return 0;
260 static assembly_operand spec_stack[32];
261 static int spec_type[32];
263 static void compile_alternatives_z(assembly_operand switch_value, int n,
264 int stack_level, int label, int flag)
265 { switch(n)
266 { case 1:
267 assemblez_2_branch(je_zc, switch_value,
268 spec_stack[stack_level],
269 label, flag); return;
270 case 2:
271 assemblez_3_branch(je_zc, switch_value,
272 spec_stack[stack_level], spec_stack[stack_level+1],
273 label, flag); return;
274 case 3:
275 assemblez_4_branch(je_zc, switch_value,
276 spec_stack[stack_level], spec_stack[stack_level+1],
277 spec_stack[stack_level+2],
278 label, flag); return;
282 static void compile_alternatives_g(assembly_operand switch_value, int n,
283 int stack_level, int label, int flag)
285 int the_zc = (flag) ? jeq_gc : jne_gc;
287 if (n == 1) {
288 assembleg_2_branch(the_zc, switch_value,
289 spec_stack[stack_level],
290 label);
292 else {
293 error("*** Cannot generate multi-equality tests in Glulx ***");
297 static void compile_alternatives(assembly_operand switch_value, int n,
298 int stack_level, int label, int flag)
300 if (!glulx_mode)
301 compile_alternatives_z(switch_value, n, stack_level, label, flag);
302 else
303 compile_alternatives_g(switch_value, n, stack_level, label, flag);
306 static void parse_switch_spec(assembly_operand switch_value, int label,
307 int action_switch)
309 int i, j, label_after = -1, spec_sp = 0;
310 int max_equality_args = ((!glulx_mode) ? 3 : 1);
312 sequence_point_follows = FALSE;
315 { if (spec_sp == 32)
316 { error("At most 32 values can be given in a single 'switch' case");
317 panic_mode_error_recovery();
318 return;
321 if (action_switch)
322 { get_next_token();
323 spec_stack[spec_sp].type =
324 ((!glulx_mode) ? LONG_CONSTANT_OT : CONSTANT_OT);
325 spec_stack[spec_sp].value = 0;
326 spec_stack[spec_sp].marker = 0;
327 spec_stack[spec_sp] = action_of_name(token_text);
329 if (spec_stack[spec_sp].value == -1)
330 { spec_stack[spec_sp].value = 0;
331 ebf_error("action (or fake action) name", token_text);
334 else
335 spec_stack[spec_sp] =
336 code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);
338 misc_keywords.enabled = TRUE;
339 get_next_token();
340 misc_keywords.enabled = FALSE;
342 spec_type[spec_sp++] = switch_sign();
343 switch(spec_type[spec_sp-1])
344 { case 0:
345 if (action_switch)
346 ebf_error("',' or ':'", token_text);
347 else ebf_error("',', ':' or 'to'", token_text);
348 panic_mode_error_recovery();
349 return;
350 case 1: goto GenSpecCode;
351 case 3: if (label_after == -1) label_after = next_label++;
353 } while(TRUE);
355 GenSpecCode:
357 if ((spec_sp > max_equality_args) && (label_after == -1))
358 label_after = next_label++;
360 if (label_after == -1)
361 { compile_alternatives(switch_value, spec_sp, 0, label, FALSE); return;
364 for (i=0; i<spec_sp;)
366 j=i; while ((j<spec_sp) && (spec_type[j] != 3)) j++;
368 if (j > i)
369 { if (j-i > max_equality_args) j=i+max_equality_args;
371 if (j == spec_sp)
372 compile_alternatives(switch_value, j-i, i, label, FALSE);
373 else
374 compile_alternatives(switch_value, j-i, i, label_after, TRUE);
376 i=j;
378 else
380 if (!glulx_mode) {
381 if (i == spec_sp - 2)
382 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
383 label, TRUE);
384 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
385 label, TRUE);
387 else
388 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
389 next_label, TRUE);
390 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
391 label_after, FALSE);
392 assemble_label_no(next_label++);
395 else {
396 if (i == spec_sp - 2)
397 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
398 label);
399 assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
400 label);
402 else
403 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
404 next_label);
405 assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
406 label_after);
407 assemble_label_no(next_label++);
410 i = i+2;
414 assemble_label_no(label_after);
417 extern int32 parse_routine(char *source, int embedded_flag, char *name,
418 int veneer_flag, int r_symbol)
419 { int32 packed_address; int i; int debug_flag = FALSE;
420 int switch_clause_made = FALSE, default_clause_made = FALSE,
421 switch_label = 0;
422 debug_location_beginning beginning_debug_location =
423 get_token_location_beginning();
425 /* (switch_label needs no initialisation here, but it prevents some
426 compilers from issuing warnings) */
428 if ((source != lexical_source) || (veneer_flag))
429 { lexical_source = source;
430 restart_lexer(lexical_source, name);
433 no_locals = 0;
435 for (i=0;i<MAX_LOCAL_VARIABLES-1;i++) local_variables.keywords[i] = "";
438 { statements.enabled = TRUE;
439 dont_enter_into_symbol_table = TRUE;
440 get_next_token();
441 dont_enter_into_symbol_table = FALSE;
442 if ((token_type == SEP_TT) && (token_value == TIMES_SEP)
443 && (no_locals == 0) && (!debug_flag))
444 { debug_flag = TRUE; continue;
447 if (token_type != DQ_TT)
448 { if ((token_type == SEP_TT)
449 && (token_value == SEMICOLON_SEP)) break;
450 ebf_error("local variable name or ';'", token_text);
451 panic_mode_error_recovery();
452 break;
455 if (no_locals == MAX_LOCAL_VARIABLES-1)
456 { error_numbered("Too many local variables for a routine; max is",
457 MAX_LOCAL_VARIABLES-1);
458 panic_mode_error_recovery();
459 break;
462 for (i=0;i<no_locals;i++)
463 if (strcmpcis(token_text, local_variables.keywords[i])==0)
464 error_named("Local variable defined twice:", token_text);
465 local_variables.keywords[no_locals++] = token_text;
466 } while(TRUE);
468 construct_local_variable_tables();
470 if ((trace_fns_setting==3)
471 || ((trace_fns_setting==2) && (veneer_mode==FALSE))
472 || ((trace_fns_setting==1) && (is_systemfile()==FALSE)))
473 debug_flag = TRUE;
474 if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
475 sflags[r_symbol] |= STAR_SFLAG;
477 packed_address = assemble_routine_header(no_locals, debug_flag,
478 name, embedded_flag, r_symbol);
481 { begin_syntax_line(TRUE);
483 get_next_token();
485 if (token_type == EOF_TT)
486 { ebf_error("']'", token_text);
487 assemble_routine_end
488 (embedded_flag,
489 get_token_location_end(beginning_debug_location));
490 put_token_back();
491 break;
494 if ((token_type == SEP_TT)
495 && (token_value == CLOSE_SQUARE_SEP))
496 { if (switch_clause_made && (!default_clause_made))
497 assemble_label_no(switch_label);
498 directives.enabled = TRUE;
499 sequence_point_follows = TRUE;
500 get_next_token();
501 assemble_routine_end
502 (embedded_flag,
503 get_token_location_end(beginning_debug_location));
504 put_token_back();
505 break;
508 if ((token_type == STATEMENT_TT) && (token_value == SDEFAULT_CODE))
509 { if (default_clause_made)
510 error("Multiple 'default' clauses defined in same 'switch'");
511 default_clause_made = TRUE;
513 if (switch_clause_made)
514 { if (!execution_never_reaches_here)
515 { sequence_point_follows = FALSE;
516 if (!glulx_mode)
517 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
518 else
519 assembleg_1(return_gc,
520 ((embedded_flag)?zero_operand:one_operand));
522 assemble_label_no(switch_label);
524 switch_clause_made = TRUE;
526 get_next_token();
527 if ((token_type == SEP_TT) &&
528 (token_value == COLON_SEP)) continue;
529 ebf_error("':' after 'default'", token_text);
530 panic_mode_error_recovery();
531 continue;
534 /* Only check for the form of a case switch if the initial token
535 isn't double-quoted text, as that would mean it was a print_ret
536 statement: this is a mild ambiguity in the grammar.
537 Action statements also cannot be cases. */
539 if ((token_type != DQ_TT) && (token_type != SEP_TT))
540 { get_next_token();
541 if (switch_sign() > 0)
542 { assembly_operand AO;
543 if (default_clause_made)
544 error("'default' must be the last 'switch' case");
546 if (switch_clause_made)
547 { if (!execution_never_reaches_here)
548 { sequence_point_follows = FALSE;
549 if (!glulx_mode)
550 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
551 else
552 assembleg_1(return_gc,
553 ((embedded_flag)?zero_operand:one_operand));
555 assemble_label_no(switch_label);
558 switch_label = next_label++;
559 switch_clause_made = TRUE;
560 put_token_back(); put_token_back();
562 if (!glulx_mode) {
563 AO.type = VARIABLE_OT; AO.value = 249; AO.marker = 0;
565 else {
566 AO.type = GLOBALVAR_OT;
567 AO.value = MAX_LOCAL_VARIABLES+6; /* sw__var */
568 AO.marker = 0;
570 parse_switch_spec(AO, switch_label, TRUE);
572 continue;
574 else
575 { put_token_back(); put_token_back(); get_next_token();
576 sequence_point_follows = TRUE;
580 parse_statement(-1, -1);
582 } while (TRUE);
584 return packed_address;
587 extern void parse_code_block(int break_label, int continue_label,
588 int switch_rule)
589 { int switch_clause_made = FALSE, default_clause_made = FALSE, switch_label,
590 unary_minus_flag;
592 begin_syntax_line(TRUE);
593 get_next_token();
595 if (token_type == SEP_TT && token_value == OPEN_BRACE_SEP)
596 { do
597 { begin_syntax_line(TRUE);
598 get_next_token();
599 if (token_type == SEP_TT && token_value == CLOSE_BRACE_SEP)
600 { if (switch_clause_made && (!default_clause_made))
601 assemble_label_no(switch_label);
602 return;
604 if (token_type == EOF_TT)
605 { ebf_error("'}'", token_text); return; }
607 if (switch_rule != 0)
609 /* Within a 'switch' block */
611 if ((token_type==STATEMENT_TT)&&(token_value==SDEFAULT_CODE))
612 { if (default_clause_made)
613 error("Multiple 'default' clauses defined in same 'switch'");
614 default_clause_made = TRUE;
616 if (switch_clause_made)
617 { if (!execution_never_reaches_here)
618 { sequence_point_follows = FALSE;
619 assemble_jump(break_label);
621 assemble_label_no(switch_label);
623 switch_clause_made = TRUE;
625 get_next_token();
626 if ((token_type == SEP_TT) &&
627 (token_value == COLON_SEP)) continue;
628 ebf_error("':' after 'default'", token_text);
629 panic_mode_error_recovery();
630 continue;
633 /* Decide: is this an ordinary statement, or the start
634 of a new case? */
636 if (token_type == DQ_TT) goto NotASwitchCase;
638 unary_minus_flag
639 = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
640 if (unary_minus_flag) get_next_token();
642 /* Now read the token _after_ any possible constant:
643 if that's a 'to', ',' or ':' then we have a case */
645 misc_keywords.enabled = TRUE;
646 get_next_token();
647 misc_keywords.enabled = FALSE;
649 if (switch_sign() > 0)
650 { assembly_operand AO;
652 if (default_clause_made)
653 error("'default' must be the last 'switch' case");
655 if (switch_clause_made)
656 { if (!execution_never_reaches_here)
657 { sequence_point_follows = FALSE;
658 assemble_jump(break_label);
660 assemble_label_no(switch_label);
663 switch_label = next_label++;
664 switch_clause_made = TRUE;
665 put_token_back(); put_token_back();
666 if (unary_minus_flag) put_token_back();
668 AO = temp_var1;
669 parse_switch_spec(AO, switch_label, FALSE);
670 continue;
672 else
673 { put_token_back(); put_token_back();
674 if (unary_minus_flag) put_token_back();
675 get_next_token();
679 if ((switch_rule != 0) && (!switch_clause_made))
680 ebf_error("switch value", token_text);
682 NotASwitchCase:
683 sequence_point_follows = TRUE;
684 parse_statement(break_label, continue_label);
686 while(TRUE);
689 if (switch_rule != 0)
690 ebf_error("braced code block after 'switch'", token_text);
692 parse_statement(break_label, continue_label);
693 return;
696 /* ========================================================================= */
697 /* Data structure management routines */
698 /* ------------------------------------------------------------------------- */
700 extern void init_syntax_vars(void)
704 extern void syntax_begin_pass(void)
705 { no_syntax_lines = 0;
708 extern void syntax_allocate_arrays(void)
712 extern void syntax_free_arrays(void)
716 /* ========================================================================= */