Imported Upstream version 6.33.1~b2+dfsg.1
[debian_inform6.git] / src / expressp.c
blob8a3f1bec10ba82ce7b92e59ac0f92f70edb5b098
1 /* ------------------------------------------------------------------------- */
2 /* "expressp" : The expression parser */
3 /* */
4 /* Part of Inform 6.33 */
5 /* copyright (c) Graham Nelson 1993 - 2014 */
6 /* */
7 /* ------------------------------------------------------------------------- */
9 #include "header.h"
11 /* --- Interface to lexer -------------------------------------------------- */
13 static char separators_to_operators[103];
14 static char conditionals_to_operators[7];
15 static char token_type_allowable[301];
17 #define NOT_AN_OPERATOR (char) 0x7e
19 static void make_lexical_interface_tables(void)
20 { int i;
21 for (i=0;i<103;i++)
22 separators_to_operators[i] = NOT_AN_OPERATOR;
23 for (i=0;i<NUM_OPERATORS;i++)
24 if (operators[i].token_type == SEP_TT)
25 separators_to_operators[operators[i].token_value] = i;
27 for (i=0;i<7;i++) /* 7 being the size of keyword_group "conditions" */
28 conditionals_to_operators[i] = NOT_AN_OPERATOR;
29 for (i=0;i<NUM_OPERATORS;i++)
30 if (operators[i].token_type == CND_TT)
31 conditionals_to_operators[operators[i].token_value] = i;
33 for (i=0;i<301;i++) token_type_allowable[i] = 0;
35 token_type_allowable[VARIABLE_TT] = 1;
36 token_type_allowable[SYSFUN_TT] = 1;
37 token_type_allowable[DQ_TT] = 1;
38 token_type_allowable[DICTWORD_TT] = 1;
39 token_type_allowable[SUBOPEN_TT] = 1;
40 token_type_allowable[SUBCLOSE_TT] = 1;
41 token_type_allowable[SMALL_NUMBER_TT] = 1;
42 token_type_allowable[LARGE_NUMBER_TT] = 1;
43 token_type_allowable[ACTION_TT] = 1;
44 token_type_allowable[SYSTEM_CONSTANT_TT] = 1;
45 token_type_allowable[OP_TT] = 1;
48 static token_data current_token, previous_token, heldback_token;
50 static int comma_allowed, arrow_allowed, superclass_allowed,
51 bare_prop_allowed,
52 array_init_ambiguity, action_ambiguity,
53 etoken_count, inserting_token, bracket_level;
55 extern int *variable_usage;
57 int system_function_usage[32];
59 static int get_next_etoken(void)
60 { int v, symbol, mark_symbol_as_used = FALSE,
61 initial_bracket_level = bracket_level;
63 etoken_count++;
65 if (inserting_token)
66 { current_token = heldback_token;
67 inserting_token = FALSE;
69 else
70 { get_next_token();
71 current_token.text = token_text;
72 current_token.value = token_value;
73 current_token.type = token_type;
74 current_token.marker = 0;
75 current_token.symtype = 0;
76 current_token.symflags = -1;
79 switch(current_token.type)
80 { case LOCAL_VARIABLE_TT:
81 current_token.type = VARIABLE_TT;
82 variable_usage[current_token.value] = TRUE;
83 break;
85 case DQ_TT:
86 current_token.marker = STRING_MV;
87 break;
89 case SQ_TT:
90 { int32 unicode = text_to_unicode(token_text);
91 if (token_text[textual_form_length] == 0)
93 if (!glulx_mode) {
94 current_token.value = unicode_to_zscii(unicode);
95 if (current_token.value == 5)
96 { unicode_char_error("Character can be printed \
97 but not used as a value:", unicode);
98 current_token.value = '?';
100 if (current_token.value >= 0x100)
101 current_token.type = LARGE_NUMBER_TT;
102 else current_token.type = SMALL_NUMBER_TT;
104 else {
105 current_token.value = unicode;
106 if (current_token.value >= 0x8000
107 || current_token.value < -0x8000)
108 current_token.type = LARGE_NUMBER_TT;
109 else current_token.type = SMALL_NUMBER_TT;
112 else
113 { current_token.type = DICTWORD_TT;
114 current_token.marker = DWORD_MV;
117 break;
119 case SYMBOL_TT:
120 ReceiveSymbol:
121 symbol = current_token.value;
123 mark_symbol_as_used = TRUE;
125 v = svals[symbol];
127 current_token.symtype = stypes[symbol];
128 current_token.symflags = sflags[symbol];
129 switch(stypes[symbol])
130 { case ROUTINE_T:
131 current_token.marker = IROUTINE_MV;
132 break;
133 case GLOBAL_VARIABLE_T:
134 current_token.marker = VARIABLE_MV;
135 break;
136 case OBJECT_T:
137 case CLASS_T:
138 /* All objects must be backpatched in Glulx. */
139 if (module_switch || glulx_mode)
140 current_token.marker = OBJECT_MV;
141 break;
142 case ARRAY_T:
143 current_token.marker = ARRAY_MV;
144 break;
145 case INDIVIDUAL_PROPERTY_T:
146 if (module_switch) current_token.marker = IDENT_MV;
147 break;
148 case CONSTANT_T:
149 if (sflags[symbol] & (UNKNOWN_SFLAG + CHANGE_SFLAG))
150 { current_token.marker = SYMBOL_MV;
151 if (module_switch) import_symbol(symbol);
152 v = symbol;
154 else current_token.marker = 0;
155 break;
156 case LABEL_T:
157 error_named("Label name used as value:", token_text);
158 break;
159 default:
160 current_token.marker = 0;
161 break;
163 if (sflags[symbol] & SYSTEM_SFLAG)
164 current_token.marker = 0;
166 current_token.value = v;
168 if (!glulx_mode) {
169 if (((current_token.marker != 0)
170 && (current_token.marker != VARIABLE_MV))
171 || (v < 0) || (v > 255))
172 current_token.type = LARGE_NUMBER_TT;
173 else current_token.type = SMALL_NUMBER_TT;
175 else {
176 if (((current_token.marker != 0)
177 && (current_token.marker != VARIABLE_MV))
178 || (v < -0x8000) || (v >= 0x8000))
179 current_token.type = LARGE_NUMBER_TT;
180 else current_token.type = SMALL_NUMBER_TT;
183 if (stypes[symbol] == GLOBAL_VARIABLE_T)
184 { current_token.type = VARIABLE_TT;
185 variable_usage[current_token.value] = TRUE;
187 break;
189 case NUMBER_TT:
190 if (!glulx_mode) {
191 if (current_token.value >= 256)
192 current_token.type = LARGE_NUMBER_TT;
193 else
194 current_token.type = SMALL_NUMBER_TT;
196 else {
197 if (current_token.value < -0x8000
198 || current_token.value >= 0x8000)
199 current_token.type = LARGE_NUMBER_TT;
200 else
201 current_token.type = SMALL_NUMBER_TT;
203 break;
205 case SEP_TT:
206 switch(current_token.value)
207 { case ARROW_SEP:
208 if (!arrow_allowed)
209 current_token.type = ENDEXP_TT;
210 break;
212 case COMMA_SEP:
213 if ((bracket_level==0) && (!comma_allowed))
214 current_token.type = ENDEXP_TT;
215 break;
217 case SUPERCLASS_SEP:
218 if ((bracket_level==0) && (!superclass_allowed))
219 current_token.type = ENDEXP_TT;
220 break;
222 case GREATER_SEP:
223 get_next_token();
224 if ((token_type == SEP_TT)
225 &&((token_value == SEMICOLON_SEP)
226 || (token_value == GREATER_SEP)))
227 current_token.type = ENDEXP_TT;
228 put_token_back();
229 break;
231 case OPENB_SEP:
232 bracket_level++;
233 if (expr_trace_level>=3)
234 { printf("Previous token type = %d\n",previous_token.type);
235 printf("Previous token val = %d\n",previous_token.value);
237 if ((previous_token.type == OP_TT)
238 || (previous_token.type == SUBOPEN_TT)
239 || (previous_token.type == ENDEXP_TT)
240 || (array_init_ambiguity)
241 || ((bracket_level == 1) && (action_ambiguity)))
242 current_token.type = SUBOPEN_TT;
243 else
244 { inserting_token = TRUE;
245 heldback_token = current_token;
246 current_token.text = "<call>";
247 bracket_level--;
249 break;
251 case CLOSEB_SEP:
252 bracket_level--;
253 if (bracket_level < 0)
254 current_token.type = ENDEXP_TT;
255 else current_token.type = SUBCLOSE_TT;
256 break;
258 case SEMICOLON_SEP:
259 current_token.type = ENDEXP_TT; break;
261 case MINUS_SEP:
262 if ((previous_token.type == OP_TT)
263 || (previous_token.type == SUBOPEN_TT)
264 || (previous_token.type == ENDEXP_TT))
265 current_token.value = UNARY_MINUS_SEP; break;
267 case INC_SEP:
268 if ((previous_token.type == VARIABLE_TT)
269 || (previous_token.type == SUBCLOSE_TT)
270 || (previous_token.type == LARGE_NUMBER_TT)
271 || (previous_token.type == SMALL_NUMBER_TT))
272 current_token.value = POST_INC_SEP; break;
274 case DEC_SEP:
275 if ((previous_token.type == VARIABLE_TT)
276 || (previous_token.type == SUBCLOSE_TT)
277 || (previous_token.type == LARGE_NUMBER_TT)
278 || (previous_token.type == SMALL_NUMBER_TT))
279 current_token.value = POST_DEC_SEP; break;
281 case HASHHASH_SEP:
282 token_text = current_token.text + 2;
284 ActionUsedAsConstant:
286 current_token.type = ACTION_TT;
287 current_token.text = token_text;
288 current_token.value = 0;
289 current_token.marker = ACTION_MV;
291 break;
293 case HASHADOLLAR_SEP:
294 obsolete_warning("'#a$Act' is now superseded by '##Act'");
295 token_text = current_token.text + 3;
296 goto ActionUsedAsConstant;
298 case HASHGDOLLAR_SEP:
300 /* This form generates the position of a global variable
301 in the global variables array. So Glob is the same as
302 #globals_array --> #g$Glob */
304 current_token.text += 3;
305 current_token.type = SYMBOL_TT;
306 symbol = symbol_index(current_token.text, -1);
307 if (stypes[symbol] != GLOBAL_VARIABLE_T) {
308 ebf_error(
309 "global variable name after '#g$'",
310 current_token.text);
311 current_token.value = 0;
312 current_token.type = SMALL_NUMBER_TT;
313 current_token.marker = 0;
314 break;
316 mark_symbol_as_used = TRUE;
317 current_token.value = svals[symbol] - MAX_LOCAL_VARIABLES;
318 current_token.marker = 0;
319 if (!glulx_mode) {
320 if (current_token.value >= 0x100)
321 current_token.type = LARGE_NUMBER_TT;
322 else current_token.type = SMALL_NUMBER_TT;
324 else {
325 if (current_token.value >= 0x8000
326 || current_token.value < -0x8000)
327 current_token.type = LARGE_NUMBER_TT;
328 else current_token.type = SMALL_NUMBER_TT;
330 break;
332 case HASHNDOLLAR_SEP:
334 /* This form is still needed for constants like #n$a (the
335 dictionary address of the word "a"), since 'a' means
336 the ASCII value of 'a' */
338 if (strlen(token_text) > 4)
339 obsolete_warning(
340 "'#n$word' is now superseded by ''word''");
341 current_token.type = DICTWORD_TT;
342 current_token.value = 0;
343 current_token.text = token_text + 3;
344 current_token.marker = DWORD_MV;
345 break;
347 case HASHRDOLLAR_SEP:
349 /* This form -- #r$Routinename, to return the routine's */
350 /* packed address -- is needed far less often in Inform 6, */
351 /* where just giving the name Routine returns the packed */
352 /* address. But it's used in a lot of Inform 5 code. */
354 obsolete_warning(
355 "'#r$Routine' can now be written just 'Routine'");
356 current_token.text += 3;
357 current_token.type = SYMBOL_TT;
358 current_token.value = symbol_index(current_token.text, -1);
359 goto ReceiveSymbol;
361 case HASHWDOLLAR_SEP:
362 error("The obsolete '#w$word' construct has been removed");
363 break;
365 case HASH_SEP:
366 system_constants.enabled = TRUE;
367 get_next_token();
368 system_constants.enabled = FALSE;
369 if (token_type != SYSTEM_CONSTANT_TT)
370 { ebf_error(
371 "'r$', 'n$', 'g$' or internal Inform constant name after '#'",
372 token_text);
373 break;
375 else
376 { current_token.type = token_type;
377 current_token.value = token_value;
378 current_token.text = token_text;
379 current_token.marker = INCON_MV;
381 break;
383 break;
385 case CND_TT:
386 v = conditionals_to_operators[current_token.value];
387 if (v != NOT_AN_OPERATOR)
388 { current_token.type = OP_TT; current_token.value = v;
390 break;
393 if (current_token.type == SEP_TT)
394 { v = separators_to_operators[current_token.value];
395 if (v != NOT_AN_OPERATOR)
396 { if ((veneer_mode)
397 || ((v!=MESSAGE_OP) && (v!=MPROP_NUM_OP) && (v!=MPROP_NUM_OP)))
398 { current_token.type = OP_TT; current_token.value = v;
399 if (array_init_ambiguity &&
400 ((v==MINUS_OP) || (v==UNARY_MINUS_OP)) &&
401 (initial_bracket_level == 0) &&
402 (etoken_count != 1))
403 warning("Without bracketing, the minus sign '-' is ambiguous");
408 /* A feature of Inform making it annoyingly hard to parse left-to-right
409 is that there is no clear delimiter for expressions; that is, the
410 legal syntax often includes sequences of expressions with no
411 intervening markers such as commas. We therefore need to use some
412 internal context to determine whether an end is in sight... */
414 if (token_type_allowable[current_token.type]==0)
415 { if (expr_trace_level >= 3)
416 { printf("Discarding as not allowable: '%s' ", current_token.text);
417 describe_token(current_token);
418 printf("\n");
420 current_token.type = ENDEXP_TT;
422 else
423 if ((!((initial_bracket_level > 0)
424 || (previous_token.type == ENDEXP_TT)
425 || ((previous_token.type == OP_TT)
426 && (operators[previous_token.value].usage != POST_U))
427 || (previous_token.type == SYSFUN_TT)))
428 && ((current_token.type != OP_TT)
429 || (operators[current_token.value].usage == PRE_U)))
430 { if (expr_trace_level >= 3)
431 { printf("Discarding as no longer part: '%s' ", current_token.text);
432 describe_token(current_token);
433 printf("\n");
435 current_token.type = ENDEXP_TT;
437 else
438 { if (mark_symbol_as_used) sflags[symbol] |= USED_SFLAG;
439 if (expr_trace_level >= 3)
440 { printf("Expr token = '%s' ", current_token.text);
441 describe_token(current_token);
442 printf("\n");
446 if ((previous_token.type == ENDEXP_TT)
447 && (current_token.type == ENDEXP_TT)) return FALSE;
449 previous_token = current_token;
451 return TRUE;
454 /* --- Operator precedences ------------------------------------------------ */
456 #define LOWER_P 101
457 #define EQUAL_P 102
458 #define GREATER_P 103
460 #define e1 1 /* Missing operand error */
461 #define e2 2 /* Unexpected close bracket */
462 #define e3 3 /* Missing operator error */
463 #define e4 4 /* Expression ends with an open bracket */
464 #define e5 5 /* Associativity illegal error */
466 const int prec_table[] = {
468 /* a .......... ( ) end op term */
470 /* b ( */ LOWER_P, e3, LOWER_P, LOWER_P, e3,
471 /* . ) */ EQUAL_P, GREATER_P, e2, GREATER_P, GREATER_P,
472 /* . end */ e4, GREATER_P, e1, GREATER_P, GREATER_P,
473 /* . op */ LOWER_P, GREATER_P, LOWER_P, -1, GREATER_P,
474 /* . term */ LOWER_P, e3, LOWER_P, LOWER_P, e3
478 static int find_prec(token_data a, token_data b)
480 /* We are comparing the precedence of tokens a and b
481 (where a occurs to the left of b). If the expression is correct,
482 the only possible values are GREATER_P, LOWER_P or EQUAL_P;
483 if it is malformed then one of e1 to e5 results.
485 Note that this routine is not symmetrical and that the relation
486 is not trichotomous.
488 If a and b are equal (and aren't brackets), then
490 a LOWER_P a if a right-associative
491 a GREATER_P a if a left-associative
494 int i, j, l1, l2;
496 switch(a.type)
497 { case SUBOPEN_TT: i=0; break;
498 case SUBCLOSE_TT: i=1; break;
499 case ENDEXP_TT: i=2; break;
500 case OP_TT: i=3; break;
501 default: i=4; break;
503 switch(b.type)
504 { case SUBOPEN_TT: i+=0; break;
505 case SUBCLOSE_TT: i+=5; break;
506 case ENDEXP_TT: i+=10; break;
507 case OP_TT: i+=15; break;
508 default: i+=20; break;
511 j = prec_table[i]; if (j != -1) return j;
513 l1 = operators[a.value].precedence;
514 l2 = operators[b.value].precedence;
515 if (operators[b.value].usage == PRE_U) return LOWER_P;
516 if (operators[a.value].usage == POST_U) return GREATER_P;
518 /* Anomalous rule to resolve the function call precedence, which is
519 different on the right from on the left, e.g., in:
521 alpha.beta(gamma)
522 beta(gamma).alpha
525 if ((l1 == 11) && (l2 > 11)) return GREATER_P;
527 if (l1 < l2) return LOWER_P;
528 if (l1 > l2) return GREATER_P;
529 switch(operators[a.value].associativity)
530 { case L_A: return GREATER_P;
531 case R_A: return LOWER_P;
532 case 0: return e5;
534 return GREATER_P;
537 /* --- Converting token to operand ----------------------------------------- */
539 /* Must match the switch statement below */
540 int z_system_constant_list[] =
541 { adjectives_table_SC,
542 actions_table_SC,
543 classes_table_SC,
544 identifiers_table_SC,
545 preactions_table_SC,
546 largest_object_SC,
547 strings_offset_SC,
548 code_offset_SC,
549 actual_largest_object_SC,
550 static_memory_offset_SC,
551 array_names_offset_SC,
552 readable_memory_offset_SC,
553 cpv__start_SC,
554 cpv__end_SC,
555 ipv__start_SC,
556 ipv__end_SC,
557 array__start_SC,
558 array__end_SC,
559 highest_attribute_number_SC,
560 attribute_names_array_SC,
561 highest_property_number_SC,
562 property_names_array_SC,
563 highest_action_number_SC,
564 action_names_array_SC,
565 highest_fake_action_number_SC,
566 fake_action_names_array_SC,
567 highest_routine_number_SC,
568 routine_names_array_SC,
569 routines_array_SC,
570 routine_flags_array_SC,
571 highest_global_number_SC,
572 global_names_array_SC,
573 globals_array_SC,
574 global_flags_array_SC,
575 highest_array_number_SC,
576 array_names_array_SC,
577 array_flags_array_SC,
578 highest_constant_number_SC,
579 constant_names_array_SC,
580 highest_class_number_SC,
581 class_objects_array_SC,
582 highest_object_number_SC,
583 -1 };
585 static int32 value_of_system_constant_z(int t)
586 { switch(t)
587 { case adjectives_table_SC:
588 return adjectives_offset;
589 case actions_table_SC:
590 return actions_offset;
591 case classes_table_SC:
592 return class_numbers_offset;
593 case identifiers_table_SC:
594 return identifier_names_offset;
595 case preactions_table_SC:
596 return preactions_offset;
597 case largest_object_SC:
598 return 256 + no_objects - 1;
599 case strings_offset_SC:
600 return strings_offset/scale_factor;
601 case code_offset_SC:
602 return code_offset/scale_factor;
603 case actual_largest_object_SC:
604 return no_objects;
605 case static_memory_offset_SC:
606 return static_memory_offset;
607 case array_names_offset_SC:
608 return array_names_offset;
609 case readable_memory_offset_SC:
610 return Write_Code_At;
611 case cpv__start_SC:
612 return prop_values_offset;
613 case cpv__end_SC:
614 return class_numbers_offset;
615 case ipv__start_SC:
616 return individuals_offset;
617 case ipv__end_SC:
618 return variables_offset;
619 case array__start_SC:
620 return variables_offset + (MAX_GLOBAL_VARIABLES*WORDSIZE);
621 case array__end_SC:
622 return static_memory_offset;
624 case highest_attribute_number_SC:
625 return no_attributes-1;
626 case attribute_names_array_SC:
627 return attribute_names_offset;
629 case highest_property_number_SC:
630 return no_individual_properties-1;
631 case property_names_array_SC:
632 return identifier_names_offset + 2;
634 case highest_action_number_SC:
635 return no_actions-1;
636 case action_names_array_SC:
637 return action_names_offset;
639 case highest_fake_action_number_SC:
640 return ((grammar_version_number==1)?256:4096) + no_fake_actions-1;
641 case fake_action_names_array_SC:
642 return fake_action_names_offset;
644 case highest_routine_number_SC:
645 return no_named_routines-1;
646 case routine_names_array_SC:
647 return routine_names_offset;
648 case routines_array_SC:
649 return routines_array_offset;
650 case routine_flags_array_SC:
651 return routine_flags_array_offset;
652 case highest_global_number_SC:
653 return 16 + no_globals-1;
654 case global_names_array_SC:
655 return global_names_offset;
656 case globals_array_SC:
657 return variables_offset;
658 case global_flags_array_SC:
659 return global_flags_array_offset;
660 case highest_array_number_SC:
661 return no_arrays-1;
662 case array_names_array_SC:
663 return array_names_offset;
664 case array_flags_array_SC:
665 return array_flags_array_offset;
666 case highest_constant_number_SC:
667 return no_named_constants-1;
668 case constant_names_array_SC:
669 return constant_names_offset;
670 case highest_class_number_SC:
671 return no_classes-1;
672 case class_objects_array_SC:
673 return class_numbers_offset;
674 case highest_object_number_SC:
675 return no_objects-1;
678 error_named("System constant not implemented in Z-code",
679 system_constants.keywords[t]);
681 return(0);
684 /* Must match the switch statement below */
685 int glulx_system_constant_list[] =
686 { classes_table_SC,
687 identifiers_table_SC,
688 array_names_offset_SC,
689 cpv__start_SC,
690 cpv__end_SC,
691 dictionary_table_SC,
692 dynam_string_table_SC,
693 grammar_table_SC,
694 actions_table_SC,
695 globals_array_SC,
696 -1 };
698 static int32 value_of_system_constant_g(int t)
700 switch (t) {
701 case classes_table_SC:
702 return Write_RAM_At + class_numbers_offset;
703 case identifiers_table_SC:
704 return Write_RAM_At + identifier_names_offset;
705 case array_names_offset_SC:
706 return Write_RAM_At + array_names_offset;
707 case cpv__start_SC:
708 return prop_defaults_offset;
709 case cpv__end_SC:
710 return Write_RAM_At + class_numbers_offset;
711 case dictionary_table_SC:
712 return dictionary_offset;
713 case dynam_string_table_SC:
714 return abbreviations_offset;
715 case grammar_table_SC:
716 return grammar_table_offset;
717 case actions_table_SC:
718 return actions_offset;
719 case globals_array_SC:
720 return variables_offset;
723 error_named("System constant not implemented in Glulx",
724 system_constants.keywords[t]);
726 return 0;
729 extern int32 value_of_system_constant(int t)
731 if (!glulx_mode)
732 return value_of_system_constant_z(t);
733 else
734 return value_of_system_constant_g(t);
737 static int evaluate_term(token_data t, assembly_operand *o)
739 /* If the given token is a constant, evaluate it into the operand.
740 For now, the identifiers are considered variables.
742 Returns FALSE if it fails to understand type. */
744 int32 v;
746 o->marker = t.marker;
747 o->symtype = t.symtype;
748 o->symflags = t.symflags;
750 switch(t.type)
751 { case LARGE_NUMBER_TT:
752 v = t.value;
753 if (!glulx_mode) {
754 if (v < 0) v = v + 0x10000;
755 o->type = LONG_CONSTANT_OT;
756 o->value = v;
758 else {
759 o->value = v;
760 o->type = CONSTANT_OT;
762 return(TRUE);
763 case SMALL_NUMBER_TT:
764 v = t.value;
765 if (!glulx_mode) {
766 if (v < 0) v = v + 0x10000;
767 o->type = SHORT_CONSTANT_OT;
768 o->value = v;
770 else {
771 o->value = v;
772 set_constant_ot(o);
774 return(TRUE);
775 case DICTWORD_TT:
776 /* Find the dictionary address, adding to dictionary if absent */
777 if (!glulx_mode)
778 o->type = LONG_CONSTANT_OT;
779 else
780 o->type = CONSTANT_OT;
781 o->value = dictionary_add(t.text, 0x80, 0, 0);
782 return(TRUE);
783 case DQ_TT:
784 /* Create as a static string */
785 if (!glulx_mode)
786 o->type = LONG_CONSTANT_OT;
787 else
788 o->type = CONSTANT_OT;
789 o->value = compile_string(t.text, FALSE, FALSE);
790 return(TRUE);
791 case VARIABLE_TT:
792 if (!glulx_mode) {
793 o->type = VARIABLE_OT;
795 else {
796 if (t.value >= MAX_LOCAL_VARIABLES) {
797 o->type = GLOBALVAR_OT;
799 else {
800 /* This includes "local variable zero", which is really
801 the stack-pointer magic variable. */
802 o->type = LOCALVAR_OT;
805 o->value = t.value;
806 return(TRUE);
807 case SYSFUN_TT:
808 if (!glulx_mode) {
809 o->type = VARIABLE_OT;
810 o->value = t.value + 256;
812 else {
813 o->type = SYSFUN_OT;
814 o->value = t.value;
816 system_function_usage[t.value] = 1;
817 return(TRUE);
818 case ACTION_TT:
819 *o = action_of_name(t.text);
820 return(TRUE);
821 case SYSTEM_CONSTANT_TT:
822 /* Certain system constants depend only on the
823 version number and need no backpatching, as they
824 are known in advance. We can therefore evaluate
825 them immediately. */
826 if (!glulx_mode) {
827 o->type = LONG_CONSTANT_OT;
828 switch(t.value)
830 case version_number_SC:
831 o->type = SHORT_CONSTANT_OT;
832 o->marker = 0;
833 v = version_number; break;
834 case dict_par1_SC:
835 o->type = SHORT_CONSTANT_OT;
836 o->marker = 0;
837 v = (version_number==3)?4:6; break;
838 case dict_par2_SC:
839 o->type = SHORT_CONSTANT_OT;
840 o->marker = 0;
841 v = (version_number==3)?5:7; break;
842 case dict_par3_SC:
843 o->type = SHORT_CONSTANT_OT;
844 o->marker = 0;
845 v = (version_number==3)?6:8; break;
846 case lowest_attribute_number_SC:
847 case lowest_action_number_SC:
848 case lowest_routine_number_SC:
849 case lowest_array_number_SC:
850 case lowest_constant_number_SC:
851 case lowest_class_number_SC:
852 o->type = SHORT_CONSTANT_OT; o->marker = 0; v = 0; break;
853 case lowest_object_number_SC:
854 case lowest_property_number_SC:
855 o->type = SHORT_CONSTANT_OT; o->marker = 0; v = 1; break;
856 case lowest_global_number_SC:
857 o->type = SHORT_CONSTANT_OT; o->marker = 0; v = 16; break;
858 case lowest_fake_action_number_SC:
859 o->type = LONG_CONSTANT_OT; o->marker = 0;
860 v = ((grammar_version_number==1)?256:4096); break;
861 case oddeven_packing_SC:
862 o->type = SHORT_CONSTANT_OT; o->marker = 0;
863 v = oddeven_packing_switch; break;
864 default:
865 v = t.value;
866 o->marker = INCON_MV;
867 break;
869 o->value = v;
871 else {
872 o->type = CONSTANT_OT;
873 switch(t.value)
875 /* The three dict_par flags point at the lower byte
876 of the flag field, because the library is written
877 to expect one-byte fields, even though the compiler
878 generates a dictionary with room for two. */
879 case dict_par1_SC:
880 o->type = BYTECONSTANT_OT;
881 o->marker = 0;
882 v = DICT_ENTRY_FLAG_POS+1;
883 break;
884 case dict_par2_SC:
885 o->type = BYTECONSTANT_OT;
886 o->marker = 0;
887 v = DICT_ENTRY_FLAG_POS+3;
888 break;
889 case dict_par3_SC:
890 o->type = BYTECONSTANT_OT;
891 o->marker = 0;
892 v = DICT_ENTRY_FLAG_POS+5;
893 break;
895 /* ###fix: need to fill more of these in! */
897 default:
898 v = t.value;
899 o->marker = INCON_MV;
900 break;
902 o->value = v;
904 return(TRUE);
905 default:
906 return(FALSE);
910 /* --- Emitter ------------------------------------------------------------- */
912 expression_tree_node *ET;
913 static int ET_used;
915 extern void clear_expression_space(void)
916 { ET_used = 0;
919 static assembly_operand *emitter_stack;
920 static int *emitter_markers;
921 static int *emitter_bracket_counts;
923 #define FUNCTION_VALUE_MARKER 1
924 #define ARGUMENT_VALUE_MARKER 2
925 #define OR_VALUE_MARKER 3
927 static int emitter_sp;
929 static int is_property_t(int symbol_type)
930 { return ((symbol_type == PROPERTY_T) || (symbol_type == INDIVIDUAL_PROPERTY_T));
933 static void mark_top_of_emitter_stack(int marker, token_data t)
934 { if (emitter_sp < 1)
935 { compiler_error("SR error: Attempt to add a marker to the top of an empty emitter stack");
936 return;
938 if (expr_trace_level >= 2)
939 { printf("Marking top of emitter stack (which is ");
940 print_operand(emitter_stack[emitter_sp-1]);
941 printf(") as ");
942 switch(marker)
944 case FUNCTION_VALUE_MARKER:
945 printf("FUNCTION");
946 break;
947 case ARGUMENT_VALUE_MARKER:
948 printf("ARGUMENT");
949 break;
950 case OR_VALUE_MARKER:
951 printf("OR_VALUE");
952 break;
953 default:
954 printf("UNKNOWN");
955 break;
957 printf("\n");
959 if (emitter_markers[emitter_sp-1])
960 { if (marker == ARGUMENT_VALUE_MARKER)
962 warning("Ignoring spurious leading comma");
963 return;
965 error_named("Missing operand for", t.text);
966 if (emitter_sp == MAX_EXPRESSION_NODES)
967 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
968 emitter_markers[emitter_sp] = 0;
969 emitter_bracket_counts[emitter_sp] = 0;
970 emitter_stack[emitter_sp] = zero_operand;
971 emitter_sp++;
973 emitter_markers[emitter_sp-1] = marker;
976 static void add_bracket_layer_to_emitter_stack(int depth)
977 { /* There's no point in tracking bracket layers that don't fence off any values. */
978 if (emitter_sp < depth + 1) return;
979 if (expr_trace_level >= 2)
980 printf("Adding bracket layer\n");
981 ++emitter_bracket_counts[emitter_sp-depth-1];
984 static void remove_bracket_layer_from_emitter_stack()
985 { /* Bracket layers that don't fence off any values will not have been tracked. */
986 if (emitter_sp < 2) return;
987 if (expr_trace_level >= 2)
988 printf("Removing bracket layer\n");
989 if (emitter_bracket_counts[emitter_sp-2] <= 0)
990 { compiler_error("SR error: Attempt to remove a nonexistent bracket layer from the emitter stack");
991 return;
993 --emitter_bracket_counts[emitter_sp-2];
996 static void emit_token(token_data t)
997 { assembly_operand o1, o2; int arity, stack_size, i;
998 int op_node_number, operand_node_number, previous_node_number;
999 int32 x;
1001 if (expr_trace_level >= 2)
1002 { printf("Output: %-19s%21s ", t.text, "");
1003 for (i=0; i<emitter_sp; i++)
1004 { print_operand(emitter_stack[i]); printf(" ");
1005 if (emitter_markers[i] == FUNCTION_VALUE_MARKER) printf(":FUNCTION ");
1006 if (emitter_markers[i] == ARGUMENT_VALUE_MARKER) printf(":ARGUMENT ");
1007 if (emitter_markers[i] == OR_VALUE_MARKER) printf(":OR ");
1008 if (emitter_bracket_counts[i]) printf(":BRACKETS(%d) ", emitter_bracket_counts[i]);
1010 printf("\n");
1013 if (t.type == SUBOPEN_TT) return;
1015 stack_size = 0;
1016 while ((stack_size < emitter_sp) &&
1017 !emitter_markers[emitter_sp-stack_size-1] &&
1018 !emitter_bracket_counts[emitter_sp-stack_size-1])
1019 stack_size++;
1021 if (t.type == SUBCLOSE_TT)
1022 { if (stack_size < emitter_sp && emitter_bracket_counts[emitter_sp-stack_size-1])
1023 { if (stack_size == 0)
1024 { error("No expression between brackets '(' and ')'");
1025 emitter_stack[emitter_sp] = zero_operand;
1026 emitter_markers[emitter_sp] = 0;
1027 emitter_bracket_counts[emitter_sp] = 0;
1028 ++emitter_sp;
1030 else if (stack_size < 1)
1031 compiler_error("SR error: emitter stack empty in subexpression");
1032 else if (stack_size > 1)
1033 compiler_error("SR error: emitter stack overfull in subexpression");
1034 remove_bracket_layer_from_emitter_stack();
1036 return;
1039 if (t.type != OP_TT)
1040 { emitter_markers[emitter_sp] = 0;
1041 emitter_bracket_counts[emitter_sp] = 0;
1043 if (emitter_sp == MAX_EXPRESSION_NODES)
1044 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1045 if (!evaluate_term(t, &(emitter_stack[emitter_sp++])))
1046 compiler_error_named("Emit token error:", t.text);
1047 return;
1050 /* A comma is argument-separating if it follows an argument (or a function
1051 call, since we ignore spurious leading commas in function argument lists)
1052 with no intervening brackets. Function calls are variadic, so we don't
1053 apply argument-separating commas. */
1054 if (t.value == COMMA_OP &&
1055 stack_size < emitter_sp &&
1056 (emitter_markers[emitter_sp-stack_size-1] == ARGUMENT_VALUE_MARKER ||
1057 emitter_markers[emitter_sp-stack_size-1] == FUNCTION_VALUE_MARKER) &&
1058 !emitter_bracket_counts[emitter_sp-stack_size-1])
1059 { if (expr_trace_level >= 2)
1060 printf("Treating comma as argument-separating\n");
1061 return;
1064 if (t.value == OR_OP)
1065 return;
1067 arity = 1;
1068 if (t.value == FCALL_OP)
1069 { if (expr_trace_level >= 3)
1070 { printf("FCALL_OP finds marker stack: ");
1071 for (x=0; x<emitter_sp; x++) printf("%d ", emitter_markers[x]);
1072 printf("\n");
1074 if (emitter_markers[emitter_sp-1] == ARGUMENT_VALUE_MARKER)
1075 warning("Ignoring spurious trailing comma");
1076 while (emitter_markers[emitter_sp-arity] != FUNCTION_VALUE_MARKER)
1078 if ((glulx_mode &&
1079 emitter_stack[emitter_sp-arity].type == SYSFUN_OT) ||
1080 (!glulx_mode &&
1081 emitter_stack[emitter_sp-arity].type == VARIABLE_OT &&
1082 emitter_stack[emitter_sp-arity].value >= 256 &&
1083 emitter_stack[emitter_sp-arity].value < 288))
1084 { int index = emitter_stack[emitter_sp-arity].value;
1085 if(!glulx_mode)
1086 index -= 256;
1087 if(index > 0 && index < NUMBER_SYSTEM_FUNCTIONS)
1088 error_named("System function name used as a value:", system_functions.keywords[index]);
1089 else
1090 compiler_error("Found unnamed system function used as a value");
1091 emitter_stack[emitter_sp-arity] = zero_operand;
1093 ++arity;
1096 else
1097 { arity = 1;
1098 if (operators[t.value].usage == IN_U) arity = 2;
1100 if (operators[t.value].precedence == 3)
1101 { arity = 2;
1102 x = emitter_sp-1;
1103 if(!emitter_markers[x] && !emitter_bracket_counts[x])
1104 { for (--x; emitter_markers[x] == OR_VALUE_MARKER && !emitter_bracket_counts[x]; --x)
1105 { ++arity;
1106 ++stack_size;
1108 for (;x >= 0 && !emitter_markers[x] && !emitter_bracket_counts[x]; --x)
1109 ++stack_size;
1113 if (arity > stack_size)
1114 { error_named("Missing operand for", t.text);
1115 while (arity > stack_size)
1116 { if (emitter_sp == MAX_EXPRESSION_NODES)
1117 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1118 emitter_markers[emitter_sp] = 0;
1119 emitter_bracket_counts[emitter_sp] = 0;
1120 emitter_stack[emitter_sp] = zero_operand;
1121 emitter_sp++;
1122 stack_size++;
1127 /* pseudo-typecheck in 6.30 */
1128 for (i = 1; i <= arity; i++)
1130 o1 = emitter_stack[emitter_sp - i];
1131 if (is_property_t(o1.symtype) ) {
1132 switch(t.value)
1134 case FCALL_OP:
1135 case SETEQUALS_OP: case NOTEQUAL_OP:
1136 case CONDEQUALS_OP:
1137 case PROVIDES_OP: case NOTPROVIDES_OP:
1138 case PROP_ADD_OP: case PROP_NUM_OP:
1139 case SUPERCLASS_OP:
1140 case MPROP_ADD_OP: case MESSAGE_OP:
1141 case PROPERTY_OP:
1142 if (i < arity) break;
1143 case GE_OP: case LE_OP:
1144 if ((i < arity) && (o1.symflags & STAR_SFLAG)) break;
1145 default:
1146 warning("Property name in expression is not qualified by object");
1148 } /* if (is_property_t */
1151 switch(arity)
1152 { case 1:
1153 o1 = emitter_stack[emitter_sp - 1];
1154 if ((o1.marker == 0) && is_constant_ot(o1.type))
1155 { switch(t.value)
1156 { case UNARY_MINUS_OP: x = -o1.value; goto FoldConstant;
1157 case ARTNOT_OP:
1158 if (!glulx_mode)
1159 x = (~o1.value) & 0xffff;
1160 else
1161 x = (~o1.value) & 0xffffffff;
1162 goto FoldConstant;
1163 case LOGNOT_OP:
1164 if (o1.value != 0) x=0; else x=1;
1165 goto FoldConstant;
1168 break;
1170 case 2:
1171 o1 = emitter_stack[emitter_sp - 2];
1172 o2 = emitter_stack[emitter_sp - 1];
1174 if ((o1.marker == 0) && (o2.marker == 0)
1175 && is_constant_ot(o1.type) && is_constant_ot(o2.type))
1177 int32 ov1, ov2;
1178 if (glulx_mode)
1179 { ov1 = o1.value;
1180 ov2 = o2.value;
1182 else
1183 { ov1 = (o1.value >= 0x8000) ? (o1.value - 0x10000) : o1.value;
1184 ov2 = (o2.value >= 0x8000) ? (o2.value - 0x10000) : o2.value;
1187 switch(t.value)
1189 case PLUS_OP: x = ov1 + ov2; goto FoldConstantC;
1190 case MINUS_OP: x = ov1 - ov2; goto FoldConstantC;
1191 case TIMES_OP: x = ov1 * ov2; goto FoldConstantC;
1192 case DIVIDE_OP:
1193 case REMAINDER_OP:
1194 if (ov2 == 0)
1195 error("Division of constant by zero");
1196 else
1197 if (t.value == DIVIDE_OP) {
1198 if (ov2 < 0) {
1199 ov1 = -ov1;
1200 ov2 = -ov2;
1202 if (ov1 >= 0)
1203 x = ov1 / ov2;
1204 else
1205 x = -((-ov1) / ov2);
1207 else {
1208 if (ov2 < 0) {
1209 ov2 = -ov2;
1211 if (ov1 >= 0)
1212 x = ov1 % ov2;
1213 else
1214 x = -((-ov1) % ov2);
1216 goto FoldConstant;
1217 case ARTAND_OP: x = o1.value & o2.value; goto FoldConstant;
1218 case ARTOR_OP: x = o1.value | o2.value; goto FoldConstant;
1219 case CONDEQUALS_OP:
1220 if (o1.value == o2.value) x = 1; else x = 0;
1221 goto FoldConstant;
1222 case NOTEQUAL_OP:
1223 if (o1.value != o2.value) x = 1; else x = 0;
1224 goto FoldConstant;
1225 case GE_OP:
1226 if (o1.value >= o2.value) x = 1; else x = 0;
1227 goto FoldConstant;
1228 case GREATER_OP:
1229 if (o1.value > o2.value) x = 1; else x = 0;
1230 goto FoldConstant;
1231 case LE_OP:
1232 if (o1.value <= o2.value) x = 1; else x = 0;
1233 goto FoldConstant;
1234 case LESS_OP:
1235 if (o1.value < o2.value) x = 1; else x = 0;
1236 goto FoldConstant;
1237 case LOGAND_OP:
1238 if ((o1.value != 0) && (o2.value != 0)) x=1; else x=0;
1239 goto FoldConstant;
1240 case LOGOR_OP:
1241 if ((o1.value != 0) || (o2.value != 0)) x=1; else x=0;
1242 goto FoldConstant;
1248 op_node_number = ET_used++;
1249 if (op_node_number == MAX_EXPRESSION_NODES)
1250 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1252 ET[op_node_number].operator_number = t.value;
1253 ET[op_node_number].up = -1;
1254 ET[op_node_number].down = -1;
1255 ET[op_node_number].right = -1;
1257 /* This statement is redundant, but prevents compilers from wrongly
1258 issuing a "used before it was assigned a value" error: */
1259 previous_node_number = 0;
1261 for (i = emitter_sp-arity; i != emitter_sp; i++)
1263 if (expr_trace_level >= 3)
1264 printf("i=%d, emitter_sp=%d, arity=%d, ETU=%d\n",
1265 i, emitter_sp, arity, ET_used);
1266 if (emitter_stack[i].type == EXPRESSION_OT)
1267 operand_node_number = emitter_stack[i].value;
1268 else
1269 { operand_node_number = ET_used++;
1270 if (operand_node_number == MAX_EXPRESSION_NODES)
1271 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1272 ET[operand_node_number].down = -1;
1273 ET[operand_node_number].value = emitter_stack[i];
1275 ET[operand_node_number].up = op_node_number;
1276 ET[operand_node_number].right = -1;
1277 if (i == emitter_sp - arity)
1278 { ET[op_node_number].down = operand_node_number;
1280 else
1281 { ET[previous_node_number].right = operand_node_number;
1283 previous_node_number = operand_node_number;
1286 emitter_sp = emitter_sp - arity + 1;
1288 emitter_stack[emitter_sp - 1].type = EXPRESSION_OT;
1289 emitter_stack[emitter_sp - 1].value = op_node_number;
1290 emitter_stack[emitter_sp - 1].marker = 0;
1291 emitter_markers[emitter_sp - 1] = 0;
1292 emitter_bracket_counts[emitter_sp - 1] = 0;
1293 /* Remove the marker for the brackets implied by operator precedence */
1294 remove_bracket_layer_from_emitter_stack();
1296 return;
1298 FoldConstantC:
1300 /* In Glulx, skip this test; we can't check out-of-range errors
1301 for 32-bit arithmetic. */
1303 if (!glulx_mode && ((x<-32768) || (x > 32767)))
1304 { char folding_error[40];
1305 int32 ov1 = (o1.value >= 0x8000) ? (o1.value - 0x10000) : o1.value;
1306 int32 ov2 = (o2.value >= 0x8000) ? (o2.value - 0x10000) : o2.value;
1307 switch(t.value)
1309 case PLUS_OP:
1310 sprintf(folding_error, "%d + %d = %d", ov1, ov2, x);
1311 break;
1312 case MINUS_OP:
1313 sprintf(folding_error, "%d - %d = %d", ov1, ov2, x);
1314 break;
1315 case TIMES_OP:
1316 sprintf(folding_error, "%d * %d = %d", ov1, ov2, x);
1317 break;
1319 error_named("Signed arithmetic on compile-time constants overflowed \
1320 the range -32768 to +32767:", folding_error);
1323 FoldConstant:
1325 if (!glulx_mode) {
1326 while (x < 0) x = x + 0x10000;
1327 x = x & 0xffff;
1329 else {
1330 x = x & 0xffffffff;
1333 emitter_sp = emitter_sp - arity + 1;
1335 if (!glulx_mode) {
1336 if (x<256)
1337 emitter_stack[emitter_sp - 1].type = SHORT_CONSTANT_OT;
1338 else emitter_stack[emitter_sp - 1].type = LONG_CONSTANT_OT;
1340 else {
1341 if (x == 0)
1342 emitter_stack[emitter_sp - 1].type = ZEROCONSTANT_OT;
1343 else if (x >= -128 && x <= 127)
1344 emitter_stack[emitter_sp - 1].type = BYTECONSTANT_OT;
1345 else if (x >= -32768 && x <= 32767)
1346 emitter_stack[emitter_sp - 1].type = HALFCONSTANT_OT;
1347 else
1348 emitter_stack[emitter_sp - 1].type = CONSTANT_OT;
1351 emitter_stack[emitter_sp - 1].value = x;
1352 emitter_stack[emitter_sp - 1].marker = 0;
1353 emitter_markers[emitter_sp - 1] = 0;
1354 emitter_bracket_counts[emitter_sp - 1] = 0;
1356 if (expr_trace_level >= 2)
1357 { printf("Folding constant to: ");
1358 print_operand(emitter_stack[emitter_sp - 1]);
1359 printf("\n");
1362 /* Remove the marker for the brackets implied by operator precedence */
1363 remove_bracket_layer_from_emitter_stack();
1364 return;
1367 /* --- Pretty printing ----------------------------------------------------- */
1369 static void show_node(int n, int depth, int annotate)
1370 { int j;
1371 for (j=0; j<2*depth+2; j++) printf(" ");
1373 if (ET[n].down == -1)
1374 { print_operand(ET[n].value);
1375 if (annotate && (ET[n].value.marker != 0))
1376 printf(" [%s]", describe_mv(ET[n].value.marker));
1377 printf("\n");
1379 else
1380 { printf("%s ", operators[ET[n].operator_number].description);
1381 j = operators[ET[n].operator_number].precedence;
1382 if ((annotate) && ((j==2) || (j==3)))
1383 { printf(" %d|%d ", ET[n].true_label, ET[n].false_label);
1384 if (ET[n].label_after != -1) printf(" def %d after ",
1385 ET[n].label_after);
1386 if (ET[n].to_expression) printf(" con to expr ");
1388 printf("\n");
1389 show_node(ET[n].down, depth+1, annotate);
1392 if (ET[n].right != -1) show_node(ET[n].right, depth, annotate);
1395 extern void show_tree(assembly_operand AO, int annotate)
1396 { if (AO.type == EXPRESSION_OT) show_node(AO.value, 0, annotate);
1397 else
1398 { printf("Constant: "); print_operand(AO);
1399 if (annotate && (AO.marker != 0))
1400 printf(" [%s]", describe_mv(AO.marker));
1401 printf("\n");
1405 /* --- Lvalue transformations ---------------------------------------------- */
1407 /* This only gets called in Z-code, since Glulx doesn't distinguish
1408 individual property operators from general ones. */
1409 static void check_property_operator(int from_node)
1410 { int below = ET[from_node].down;
1411 int opnum = ET[from_node].operator_number;
1413 ASSERT_ZCODE();
1415 if (veneer_mode) return;
1417 if ((below != -1) && (ET[below].right != -1))
1418 { int n = ET[below].right, flag = FALSE;
1420 if ((ET[n].down == -1)
1421 && ((ET[n].value.type == LONG_CONSTANT_OT)
1422 || (ET[n].value.type == SHORT_CONSTANT_OT))
1423 && ((ET[n].value.value > 0) && (ET[n].value.value < 64))
1424 && ((!module_switch) || (ET[n].value.marker == 0)))
1425 flag = TRUE;
1427 if (!flag)
1428 { switch(opnum)
1429 { case PROPERTY_OP: opnum = MESSAGE_OP; break;
1430 case PROP_ADD_OP: opnum = MPROP_ADD_OP; break;
1431 case PROP_NUM_OP: opnum = MPROP_NUM_OP; break;
1435 ET[from_node].operator_number = opnum;
1438 if (below != -1)
1439 check_property_operator(below);
1440 if (ET[from_node].right != -1)
1441 check_property_operator(ET[from_node].right);
1444 static void check_lvalues(int from_node)
1445 { int below = ET[from_node].down;
1446 int opnum = ET[from_node].operator_number, opnum_below;
1447 int lvalue_form, i, j;
1449 if (below != -1)
1451 if ((opnum == FCALL_OP) && (ET[below].down != -1))
1452 { opnum_below = ET[below].operator_number;
1453 if ((opnum_below == PROPERTY_OP) || (opnum_below == MESSAGE_OP))
1454 { i = ET[ET[from_node].down].right;
1455 ET[from_node].down = ET[below].down;
1456 ET[ET[below].down].up = from_node;
1457 ET[ET[ET[below].down].right].up = from_node;
1458 ET[ET[ET[below].down].right].right = i;
1459 opnum = PROP_CALL_OP;
1460 ET[from_node].operator_number = opnum;
1464 if (operators[opnum].requires_lvalue)
1465 { opnum_below = ET[below].operator_number;
1467 if (ET[below].down == -1)
1468 { if (!is_variable_ot(ET[below].value.type))
1469 { error("'=' applied to undeclared variable");
1470 goto LvalueError;
1473 else
1474 { lvalue_form=0;
1475 switch(opnum)
1476 { case SETEQUALS_OP:
1477 switch(opnum_below)
1478 { case ARROW_OP: lvalue_form = ARROW_SETEQUALS_OP; break;
1479 case DARROW_OP: lvalue_form = DARROW_SETEQUALS_OP; break;
1480 case MESSAGE_OP: lvalue_form = MESSAGE_SETEQUALS_OP; break;
1481 case PROPERTY_OP: lvalue_form = PROPERTY_SETEQUALS_OP; break;
1483 break;
1484 case INC_OP:
1485 switch(opnum_below)
1486 { case ARROW_OP: lvalue_form = ARROW_INC_OP; break;
1487 case DARROW_OP: lvalue_form = DARROW_INC_OP; break;
1488 case MESSAGE_OP: lvalue_form = MESSAGE_INC_OP; break;
1489 case PROPERTY_OP: lvalue_form = PROPERTY_INC_OP; break;
1491 break;
1492 case POST_INC_OP:
1493 switch(opnum_below)
1494 { case ARROW_OP: lvalue_form = ARROW_POST_INC_OP; break;
1495 case DARROW_OP: lvalue_form = DARROW_POST_INC_OP; break;
1496 case MESSAGE_OP: lvalue_form = MESSAGE_POST_INC_OP; break;
1497 case PROPERTY_OP: lvalue_form = PROPERTY_POST_INC_OP; break;
1499 break;
1500 case DEC_OP:
1501 switch(opnum_below)
1502 { case ARROW_OP: lvalue_form = ARROW_DEC_OP; break;
1503 case DARROW_OP: lvalue_form = DARROW_DEC_OP; break;
1504 case MESSAGE_OP: lvalue_form = MESSAGE_DEC_OP; break;
1505 case PROPERTY_OP: lvalue_form = PROPERTY_DEC_OP; break;
1507 break;
1508 case POST_DEC_OP:
1509 switch(opnum_below)
1510 { case ARROW_OP: lvalue_form = ARROW_POST_DEC_OP; break;
1511 case DARROW_OP: lvalue_form = DARROW_POST_DEC_OP; break;
1512 case MESSAGE_OP: lvalue_form = MESSAGE_POST_DEC_OP; break;
1513 case PROPERTY_OP: lvalue_form = PROPERTY_POST_DEC_OP; break;
1515 break;
1517 if (lvalue_form == 0)
1518 { error_named("'=' applied to",
1519 (char *) operators[opnum_below].description);
1520 goto LvalueError;
1523 /* Transform from_node from_node
1524 | \ | \\\ \
1525 below value to value
1526 | \\\
1529 ET[from_node].operator_number = lvalue_form;
1530 i = ET[below].down;
1531 ET[from_node].down = i;
1532 while (i != -1)
1533 { ET[i].up = from_node;
1534 j = i;
1535 i = ET[i].right;
1537 ET[j].right = ET[below].right;
1540 check_lvalues(below);
1542 if (ET[from_node].right != -1)
1543 check_lvalues(ET[from_node].right);
1544 return;
1546 LvalueError:
1547 ET[from_node].down = -1;
1548 ET[from_node].value = zero_operand;
1549 if (ET[from_node].right != -1)
1550 check_lvalues(ET[from_node].right);
1553 /* --- Tree surgery for conditionals --------------------------------------- */
1555 static void negate_condition(int n)
1556 { int i;
1558 if (ET[n].right != -1) negate_condition(ET[n].right);
1559 if (ET[n].down == -1) return;
1560 i = operators[ET[n].operator_number].negation;
1561 if (i!=0) ET[n].operator_number = i;
1562 if (operators[i].precedence==2) negate_condition(ET[n].down);
1565 static void delete_negations(int n, int context)
1567 /* Recursively apply
1569 ~~(x && y) = ~~x || ~~y
1570 ~~(x || y) = ~~x && ~~y
1571 ~~(x == y) = x ~= y
1573 (etc) to delete the ~~ operator from the tree. Since this is
1574 depth first, the ~~ being deleted has no ~~s beneath it, which
1575 is important to make "negate_condition" work. */
1577 int i;
1579 if (ET[n].right != -1) delete_negations(ET[n].right, context);
1580 if (ET[n].down == -1) return;
1581 delete_negations(ET[n].down, context);
1583 if (ET[n].operator_number == LOGNOT_OP)
1584 { negate_condition(ET[n].down);
1585 ET[n].operator_number
1586 = ET[ET[n].down].operator_number;
1587 ET[n].down = ET[ET[n].down].down;
1588 i = ET[n].down;
1589 while(i != -1) { ET[i].up = n; i = ET[i].right; }
1593 static void insert_exp_to_cond(int n, int context)
1595 /* Insert a ~= test when an expression is used as a condition.
1597 Check for possible confusion over = and ==, e.g. "if (a = 1) ..." */
1599 int new, i;
1601 if (ET[n].right != -1) insert_exp_to_cond(ET[n].right, context);
1603 if (ET[n].down == -1)
1604 { if (context==CONDITION_CONTEXT)
1605 { new = ET_used++;
1606 if (new == MAX_EXPRESSION_NODES)
1607 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1608 ET[new] = ET[n];
1609 ET[n].down = new; ET[n].operator_number = NONZERO_OP;
1610 ET[new].up = n; ET[new].right = -1;
1612 return;
1615 switch(operators[ET[n].operator_number].precedence)
1616 { case 3: /* Conditionals have level 3 */
1617 context = QUANTITY_CONTEXT;
1618 break;
1619 case 2: /* Logical operators level 2 */
1620 context = CONDITION_CONTEXT;
1621 break;
1622 case 1: /* Forms of '=' have level 1 */
1623 if (context == CONDITION_CONTEXT)
1624 warning("'=' used as condition: '==' intended?");
1625 default:
1626 if (context != CONDITION_CONTEXT) break;
1628 new = ET_used++;
1629 if (new == MAX_EXPRESSION_NODES)
1630 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1631 ET[new] = ET[n];
1632 ET[n].down = new; ET[n].operator_number = NONZERO_OP;
1633 ET[new].up = n; ET[new].right = -1;
1635 i = ET[new].down;
1636 while (i!= -1) { ET[i].up = new; i = ET[i].right; }
1637 context = QUANTITY_CONTEXT; n = new;
1640 insert_exp_to_cond(ET[n].down, context);
1643 static unsigned int etoken_num_children(int n)
1645 int count = 0;
1646 int i;
1647 i = ET[n].down;
1648 if (i == -1) { return 0; }
1649 do {
1650 count++;
1651 i = ET[i].right;
1652 } while (i!=-1);
1653 return count;
1656 static void func_args_on_stack(int n, int context)
1658 /* Make sure that the arguments of every function-call expression
1659 are stored to the stack. If any aren't (ie, if any arguments are
1660 constants or variables), cover them with push operators.
1661 (The very first argument does not need to be so treated, because
1662 it's the function address, not a function argument. We also
1663 skip the treatment for most system functions.) */
1665 int new, pn, fnaddr, opnum;
1667 ASSERT_GLULX();
1669 if (ET[n].right != -1)
1670 func_args_on_stack(ET[n].right, context);
1671 if (ET[n].down == -1) {
1672 pn = ET[n].up;
1673 if (pn != -1) {
1674 opnum = ET[pn].operator_number;
1675 if (opnum == FCALL_OP
1676 || opnum == MESSAGE_CALL_OP
1677 || opnum == PROP_CALL_OP) {
1678 /* If it's an FCALL, get the operand which contains the function
1679 address (or system-function number) */
1680 if (opnum == MESSAGE_CALL_OP
1681 || opnum == PROP_CALL_OP
1682 || ((fnaddr=ET[pn].down) != n
1683 && (ET[fnaddr].value.type != SYSFUN_OT
1684 || ET[fnaddr].value.value == INDIRECT_SYSF
1685 || ET[fnaddr].value.value == GLK_SYSF))) {
1686 if (etoken_num_children(pn) > (unsigned int)(opnum == FCALL_OP ? 4:3)) {
1687 new = ET_used++;
1688 if (new == MAX_EXPRESSION_NODES)
1689 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1690 ET[new] = ET[n];
1691 ET[n].down = new;
1692 ET[n].operator_number = PUSH_OP;
1693 ET[new].up = n;
1694 ET[new].right = -1;
1699 return;
1702 func_args_on_stack(ET[n].down, context);
1705 static assembly_operand check_conditions(assembly_operand AO, int context)
1706 { int n;
1708 if (AO.type != EXPRESSION_OT)
1709 { if (context != CONDITION_CONTEXT) return AO;
1710 n = ET_used++;
1711 if (n == MAX_EXPRESSION_NODES)
1712 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1713 ET[n].down = -1;
1714 ET[n].up = -1;
1715 ET[n].right = -1;
1716 ET[n].value = AO;
1717 AO.type = EXPRESSION_OT;
1718 AO.value = n;
1719 AO.marker = 0;
1722 insert_exp_to_cond(AO.value, context);
1723 delete_negations(AO.value, context);
1725 if (glulx_mode)
1726 func_args_on_stack(AO.value, context);
1728 return AO;
1731 /* --- Shift-reduce parser ------------------------------------------------- */
1733 static int sr_sp;
1734 static token_data *sr_stack;
1736 extern assembly_operand parse_expression(int context)
1738 /* Parses an expression, evaluating it as a constant if possible.
1740 Possible contexts are:
1742 VOID_CONTEXT the expression is used as a statement, so that
1743 its value will be thrown away and it only
1744 needs to exist for any resulting side-effects
1745 (function calls and assignments)
1747 CONDITION_CONTEXT the result must be a condition
1749 CONSTANT_CONTEXT there is required to be a constant result
1750 (so that, for instance, comma becomes illegal)
1752 QUANTITY_CONTEXT the default: a quantity is to be specified
1754 ACTION_Q_CONTEXT like QUANTITY_CONTEXT, but postfixed brackets
1755 at the top level do not indicate function call:
1756 used for e.g.
1757 <Insert button (random(pocket1, pocket2))>
1759 RETURN_Q_CONTEXT like QUANTITY_CONTEXT, but a single property
1760 name does not generate a warning
1762 ASSEMBLY_CONTEXT a quantity which cannot use the '->' operator
1763 (needed for assembly language to indicate
1764 store destinations)
1766 FORINIT_CONTEXT a quantity which cannot use an (unbracketed)
1767 '::' operator
1769 ARRAY_CONTEXT like CONSTANT_CONTEXT, but where an unbracketed
1770 minus sign is ambiguous, and brackets always
1771 indicate subexpressions, not function calls
1773 Return value: an assembly operand.
1775 If the type is OMITTED_OT, then the expression has no resulting value.
1777 If the type is EXPRESSION_OT, then the value will need to be
1778 calculated at run-time by code compiled from the expression tree
1779 whose root node-number is the operand value.
1781 Otherwise the assembly operand is the value of the expression, which
1782 is constant and thus known at compile time.
1784 If an error has occurred in the expression, which recovery from was
1785 not possible, then the return is (short constant) 0. This should
1786 minimise the chance of a cascade of further error messages.
1789 token_data a, b, pop; int i;
1790 assembly_operand AO;
1792 superclass_allowed = (context != FORINIT_CONTEXT);
1793 if (context == FORINIT_CONTEXT) context = VOID_CONTEXT;
1795 comma_allowed = (context == VOID_CONTEXT);
1796 arrow_allowed = (context != ASSEMBLY_CONTEXT);
1797 bare_prop_allowed = (context == RETURN_Q_CONTEXT);
1798 array_init_ambiguity = ((context == ARRAY_CONTEXT) ||
1799 (context == ASSEMBLY_CONTEXT));
1801 action_ambiguity = (context == ACTION_Q_CONTEXT);
1803 if (context == ASSEMBLY_CONTEXT) context = QUANTITY_CONTEXT;
1804 if (context == ACTION_Q_CONTEXT) context = QUANTITY_CONTEXT;
1805 if (context == RETURN_Q_CONTEXT) context = QUANTITY_CONTEXT;
1806 if (context == ARRAY_CONTEXT) context = CONSTANT_CONTEXT;
1808 etoken_count = 0;
1809 inserting_token = FALSE;
1811 emitter_sp = 0;
1812 bracket_level = 0;
1814 previous_token.text = "$";
1815 previous_token.type = ENDEXP_TT;
1816 previous_token.value = 0;
1818 sr_sp = 1;
1819 sr_stack[0] = previous_token;
1821 AO = zero_operand;
1823 statements.enabled = FALSE;
1824 directives.enabled = FALSE;
1826 if (get_next_etoken() == FALSE)
1827 { ebf_error("expression", token_text);
1828 return AO;
1832 { if (expr_trace_level >= 2)
1833 { printf("Input: %-20s", current_token.text);
1834 for (i=0; i<sr_sp; i++) printf("%s ", sr_stack[i].text);
1835 printf("\n");
1837 if (expr_trace_level >= 3) printf("ET_used = %d\n", ET_used);
1839 if (sr_sp == 0)
1840 { compiler_error("SR error: stack empty");
1841 return(AO);
1844 a = sr_stack[sr_sp-1]; b = current_token;
1846 if ((a.type == ENDEXP_TT) && (b.type == ENDEXP_TT))
1847 { if (emitter_sp == 0)
1848 { compiler_error("SR error: emitter stack empty");
1849 return AO;
1851 if (emitter_sp > 1)
1852 { compiler_error("SR error: emitter stack overfull");
1853 return AO;
1856 AO = emitter_stack[0];
1857 if (AO.type == EXPRESSION_OT)
1858 { if (expr_trace_level >= 3)
1859 { printf("Tree before lvalue checking:\n");
1860 show_tree(AO, FALSE);
1862 if (!glulx_mode)
1863 check_property_operator(AO.value);
1864 check_lvalues(AO.value);
1865 ET[AO.value].up = -1;
1867 else {
1868 if ((context != CONSTANT_CONTEXT) && is_property_t(AO.symtype)
1869 && (arrow_allowed) && (!bare_prop_allowed))
1870 warning("Bare property name found. \"self.prop\" intended?");
1873 check_conditions(AO, context);
1875 if (context == CONSTANT_CONTEXT)
1876 if (!is_constant_ot(AO.type))
1877 { AO = zero_operand;
1878 ebf_error("constant", "<expression>");
1880 put_token_back();
1882 return(AO);
1885 switch(find_prec(a,b))
1887 case e5: /* Associativity error */
1888 error_named("Brackets mandatory to clarify order of:",
1889 a.text);
1891 case LOWER_P:
1892 case EQUAL_P:
1893 if (sr_sp == MAX_EXPRESSION_NODES)
1894 memoryerror("MAX_EXPRESSION_NODES", MAX_EXPRESSION_NODES);
1895 sr_stack[sr_sp++] = b;
1896 switch(b.type)
1898 case SUBOPEN_TT:
1899 if (sr_sp >= 2 && sr_stack[sr_sp-2].type == OP_TT && sr_stack[sr_sp-2].value == FCALL_OP)
1900 mark_top_of_emitter_stack(FUNCTION_VALUE_MARKER, b);
1901 else
1902 add_bracket_layer_to_emitter_stack(0);
1903 break;
1904 case OP_TT:
1905 switch(b.value){
1906 case OR_OP:
1907 if (sr_stack[sr_sp-2].type == OP_TT &&
1908 operators[sr_stack[sr_sp-2].value].precedence == 3)
1909 mark_top_of_emitter_stack(OR_VALUE_MARKER, b);
1910 else
1911 { error("'or' not between values to the right of a condition");
1912 /* Convert to + for error recovery purposes */
1913 sr_stack[sr_sp-1].value = PLUS_OP;
1915 break;
1916 case COMMA_OP:
1918 /* A comma separates arguments only if the shallowest open bracket belongs to a function call. */
1919 int shallowest_open_bracket_index = sr_sp - 2;
1920 while (shallowest_open_bracket_index > 0 && sr_stack[shallowest_open_bracket_index].type != SUBOPEN_TT)
1921 --shallowest_open_bracket_index;
1922 if (shallowest_open_bracket_index > 0 &&
1923 sr_stack[shallowest_open_bracket_index-1].type == OP_TT &&
1924 sr_stack[shallowest_open_bracket_index-1].value == FCALL_OP)
1925 { mark_top_of_emitter_stack(ARGUMENT_VALUE_MARKER, b);
1926 break;
1928 /* Non-argument-separating commas get treated like any other operator; we fall through to the default case. */
1930 default:
1932 /* Add a marker for the brackets implied by operator precedence */
1933 int operands_on_left = (operators[b.value].usage == PRE_U) ? 0 : 1;
1934 add_bracket_layer_to_emitter_stack(operands_on_left);
1938 get_next_etoken();
1939 break;
1940 case GREATER_P:
1942 { pop = sr_stack[sr_sp - 1];
1943 emit_token(pop);
1944 sr_sp--;
1945 } while (find_prec(sr_stack[sr_sp-1], pop) != LOWER_P);
1946 break;
1948 case e1: /* Missing operand error */
1949 error_named("Missing operand after", a.text);
1950 put_token_back();
1951 current_token.type = NUMBER_TT;
1952 current_token.value = 0;
1953 current_token.marker = 0;
1954 current_token.text = "0";
1955 break;
1957 case e2: /* Unexpected close bracket */
1958 error("Found '(' without matching ')'");
1959 get_next_etoken();
1960 break;
1962 case e3: /* Missing operator error */
1963 error("Missing operator: inserting '+'");
1964 put_token_back();
1965 current_token.type = OP_TT;
1966 current_token.value = PLUS_OP;
1967 current_token.marker = 0;
1968 current_token.text = "+";
1969 break;
1971 case e4: /* Expression ends with an open bracket */
1972 error("Found '(' without matching ')'");
1973 sr_sp--;
1974 break;
1978 while (TRUE);
1981 /* --- Test for simple ++ or -- usage: used to optimise "for" loop code ---- */
1983 extern int test_for_incdec(assembly_operand AO)
1984 { int s = 0;
1985 if (AO.type != EXPRESSION_OT) return 0;
1986 if (ET[AO.value].down == -1) return 0;
1987 switch(ET[AO.value].operator_number)
1988 { case INC_OP: s = 1; break;
1989 case POST_INC_OP: s = 1; break;
1990 case DEC_OP: s = -1; break;
1991 case POST_DEC_OP: s = -1; break;
1993 if (s==0) return 0;
1994 if (ET[ET[AO.value].down].down != -1) return 0;
1995 if (!is_variable_ot(ET[ET[AO.value].down].value.type)) return 0;
1996 return s*(ET[ET[AO.value].down].value.value);
1999 /* ========================================================================= */
2000 /* Data structure management routines */
2001 /* ------------------------------------------------------------------------- */
2003 extern void init_expressp_vars(void)
2004 { int i;
2005 /* make_operands(); */
2006 make_lexical_interface_tables();
2007 for (i=0;i<32;i++) system_function_usage[i] = 0;
2010 extern void expressp_begin_pass(void)
2014 extern void expressp_allocate_arrays(void)
2015 { ET = my_calloc(sizeof(expression_tree_node), MAX_EXPRESSION_NODES,
2016 "expression parse trees");
2017 emitter_markers = my_calloc(sizeof(int), MAX_EXPRESSION_NODES,
2018 "emitter markers");
2019 emitter_bracket_counts = my_calloc(sizeof(int), MAX_EXPRESSION_NODES,
2020 "emitter bracket layer counts");
2021 emitter_stack = my_calloc(sizeof(assembly_operand), MAX_EXPRESSION_NODES,
2022 "emitter stack");
2023 sr_stack = my_calloc(sizeof(token_data), MAX_EXPRESSION_NODES,
2024 "shift-reduce parser stack");
2027 extern void expressp_free_arrays(void)
2028 { my_free(&ET, "expression parse trees");
2029 my_free(&emitter_markers, "emitter markers");
2030 my_free(&emitter_bracket_counts, "emitter bracket layer counts");
2031 my_free(&emitter_stack, "emitter stack");
2032 my_free(&sr_stack, "shift-reduce parser stack");
2035 /* ========================================================================= */