tagged release 0.7.1
[parrot.git] / compilers / pirc / new / pir.y
blob595b2773c72e0fd730e3d06efd412dc63438902d
1 %{
3 /*
4 * $Id$
5 * Copyright (C) 2007-2008, The Perl Foundation.
6 */
8 /*
10 =head1 NAME
12 pir.y - Bison specification for the PIR assembly language parser.
14 =head1 DESCRIPTION
16 This file implements the parser for the PIR assembly language. During the
17 parsing phase, data structures are created that represent the input. These
18 data structures are defined in C<pircompunit.h>.
20 The parser implements strength reduction and constant folding. Strength
21 reduction refers to the selection of instructions that have the same
22 effect as the instruction written by the PIR programmer, but are more
23 efficient. For instance:
25 add $P0, $P0, $P1
27 can be reduced to:
29 add $P0, $P1
31 as C<$P0> was an IN/OUT operand.
33 Constant folding refers to the compile-time evaluation of expressions,
34 if possible. For instance:
36 add $I0, 10, 20
38 can be written as:
40 set $I0, 30
42 as we can evaluate this result during compile time. Likewise, conditional
43 branch instructions may become unconditional branch instructions (if the
44 condition evaluates to I<true> during compile time) or it may become a C<noop>
45 (no op) instruction (if the condition evaluates to I<false> during compile time).
47 =cut
49 TODO:
50 1. improve memory management (free it!)
51 2. test the parser.
52 3. generate PBC, using Parrot_PackFile (and related) data structures.
53 4. handle branch/invoke instructions to calculate offsets etc.
54 5. handle freezing of PMC constants (representing subs)
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <assert.h>
63 #include "pirparser.h"
64 #include "pircompiler.h"
65 #include "pircompunit.h"
66 #include "pirsymbol.h"
69 /* prevent inclusion of <unistd.h> on windows */
70 #define YY_NO_UNISTD_H
72 /* define YY_DECL, so that in "pirlexer.h" it won't be defined */
73 #define YY_DECL int yylex(YYSTYPE *yylval, yyscan_t yyscanner)
75 /* include "pirlexer.h" before "piryy.h" */
76 #include "pirlexer.h"
77 #include "piryy.h"
79 /* declare yylex(); do this I<after> including "pirlexer.h" */
80 extern YY_DECL;
84 /* Enumeration of mathematical operator types; these are used to index the opnames array. */
85 typedef enum pir_math_operators {
86 OP_ADD = 0, /* make sure counting starts at 0 */
87 OP_INC, /* special case for OP_ADD; must be 1 position after OP_ADD */
88 OP_SUB,
89 OP_DEC, /* special case for OP_DEC; must be 1 position after OP_SUB */
90 OP_DIV,
91 OP_MUL,
92 OP_MOD,
93 OP_BOR,
94 OP_BAND,
95 OP_BXOR,
96 OP_POW,
97 OP_CONCAT,
98 OP_LSR,
99 OP_SHR,
100 OP_SHL,
101 OP_OR,
102 OP_AND,
103 OP_FDIV,
104 OP_XOR,
105 OP_ISEQ,
106 OP_ISLE,
107 OP_ISLT,
108 OP_ISGE,
109 OP_ISGT,
110 OP_ISNE
112 } pir_math_operator;
114 /* relational operator types */
115 typedef enum pir_rel_operators {
116 OP_NE = OP_ISNE + 1, /* continue after OP_ISNE in pir_math_operator. */
117 OP_EQ,
118 OP_LT,
119 OP_LE,
120 OP_GT,
121 OP_GE
123 } pir_rel_operator;
125 /* names of the binary operators */
126 static char * const opnames[] = {
127 "add",
128 "inc", /* use this when "add"ing 1 */
129 "sub",
130 "dec", /* use this when "sub"ing 1 */
131 "div",
132 "mul",
133 "mod",
134 "bor",
135 "band",
136 "bxor",
137 "pow",
138 "concat",
139 "lsr",
140 "shr",
141 "shl",
142 "or",
143 "and",
144 "fdiv",
145 "xor",
146 "iseq",
147 "isle",
148 "islt",
149 "isge",
150 "isgt",
151 "isne",
152 /* note that from here on it's rel. ops; see pir_rel_operator enumeration */
153 "ne",
154 "eq",
155 "lt",
156 "le",
157 "gt",
158 "ge"
161 /* prototypes for constant folding and compile-time evaluation functions */
162 static constant *fold_i_i(yyscan_t yyscanner, int a, pir_math_operator op, int b);
163 static constant *fold_n_i(yyscan_t yyscanner, double a, pir_math_operator op, int b);
164 static constant *fold_i_n(yyscan_t yyscanner, int a, pir_math_operator op, double b);
165 static constant *fold_n_n(yyscan_t yyscanner, double a, pir_math_operator op, double b);
166 static constant *fold_s_s(yyscan_t yyscanner, char *a, pir_math_operator op, char *b);
168 static int evaluate_i_i(int a, pir_rel_operator op, int b);
169 static int evaluate_n_n(double a, pir_rel_operator op, double b);
170 static int evaluate_i_n(int a, pir_rel_operator op, double b);
171 static int evaluate_n_i(double a, pir_rel_operator op, int b);
172 static int evaluate_s_s(char * const a, pir_rel_operator op, char * const b);
174 static int evaluate_s(char * const s);
175 static int evaluate_c(constant * const c);
177 static char *concat_strings(char *a, char *b);
179 static void create_if_instr(yyscan_t yyscanner, lexer_state * const lexer, int invert,
180 int hasnull, char * const name, char * const label);
182 static void do_strength_reduction(lexer_state * const lexer);
183 static int check_value(constant * const c, int val);
186 /* enable debugging of generated parser */
187 #define YYDEBUG 1
189 /* enable slightly more helpful error messages */
190 #define YYERROR_VERBOSE 1
192 /* keep MSVC happy */
193 #ifndef YYENABLE_NLS
194 # define YYENABLE_NLS 0
195 #endif
197 /* keep MSVC happy */
198 #ifndef YYLTYPE_IS_TRIVIAL
199 # define YYLTYPE_IS_TRIVIAL 0
200 #endif
203 /* the parser aborts if there are more than 10 errors */
204 #define MAX_NUM_ERRORS 10
209 /* list all types that can be moved from lexer to parser, or from
210 * rule to rule in the parser only.
212 %union {
213 double dval;
214 int ival;
215 char *sval;
216 struct constant *cval;
217 struct instruction *instr;
218 struct expression *expr;
219 struct target *targ;
220 struct argument *argm;
221 struct invocation *invo;
222 struct key *key;
223 struct symbol *symb;
227 %token TK_NL "\n"
229 %token TK_HLL ".HLL"
230 TK_HLL_MAP ".HLL_map"
231 TK_LOADLIB ".loadlib"
233 %token TK_SUB ".sub"
234 TK_END ".end"
235 TK_PARAM ".param"
236 TK_LEX ".lex"
237 TK_LOCAL ".local"
238 TK_NAMESPACE ".namespace"
239 TK_INVOCANT ".invocant"
240 TK_METH_CALL ".meth_call"
241 TK_GLOBALCONST ".globalconst"
242 TK_CONST ".const"
243 TK_RETURN ".return"
244 TK_YIELD ".yield"
245 TK_SET_YIELD ".set_yield"
246 TK_SET_RETURN ".set_return"
247 TK_BEGIN_YIELD ".begin_yield"
248 TK_END_YIELD ".end_yield"
249 TK_BEGIN_RETURN ".begin_return"
250 TK_END_RETURN ".end_return"
251 TK_BEGIN_CALL ".begin_call"
252 TK_END_CALL ".end_call"
253 TK_GET_RESULTS ".get_results"
254 TK_CALL ".call"
255 TK_SET_ARG ".set_arg"
256 TK_GET_RESULT ".get_result"
257 TK_NCI_CALL ".nci_call"
258 TK_TAILCALL ".tailcall"
260 %token <sval> TK_LABEL "label"
261 <sval> TK_IDENT "identifier"
262 <sval> TK_INT "int"
263 <sval> TK_NUM "num"
264 <sval> TK_PMC "pmc"
265 <sval> TK_STRING "string"
266 <sval> TK_IF "if"
267 <sval> TK_UNLESS "unless"
268 <sval> TK_NULL "null"
269 <sval> TK_GOTO "goto"
271 <sval> TK_STRINGC "string constant"
272 <ival> TK_INTC "integer constant"
273 <dval> TK_NUMC "number constant"
274 <ival> TK_PREG "PMC register"
275 <ival> TK_NREG "number register"
276 <ival> TK_SREG "string register"
277 <ival> TK_IREG "integer register"
279 %token TK_ARROW "=>"
280 TK_NE "!="
281 TK_EQ "=="
282 TK_LT "<"
283 TK_LE "<="
284 TK_GT ">"
285 TK_GE ">="
287 TK_USHIFT ">>>"
288 TK_RSHIFT ">>"
289 TK_LSHIFT "<<"
290 TK_FDIV "//"
291 TK_OR "||"
292 TK_AND "&&"
293 TK_XOR "~~"
294 TK_CONC "."
295 TK_ASSIGN_USHIFT ">>>="
296 TK_ASSIGN_RSHIFT ">>="
297 TK_ASSIGN_LSHIFT "<<="
299 %token TK_ASSIGN_INC "+="
300 TK_ASSIGN_DEC "-="
301 TK_ASSIGN_MUL "*="
302 TK_ASSIGN_MOD "%="
303 TK_ASSIGN_POW "**="
304 TK_ASSIGN_DIV "/="
305 TK_ASSIGN_BOR "|="
306 TK_ASSIGN_BAND "&="
307 TK_ASSIGN_FDIV "//="
308 TK_ASSIGN_BNOT "~="
309 TK_ASSIGN_CONC ".="
311 %token TK_FLAG_INIT ":init"
312 TK_FLAG_LOAD ":load"
313 TK_FLAG_MAIN ":main"
314 TK_FLAG_ANON ":anon"
315 TK_FLAG_METHOD ":method"
316 TK_FLAG_OUTER ":outer"
317 TK_FLAG_VTABLE ":vtable"
318 TK_FLAG_LEX ":lex"
319 TK_FLAG_MULTI ":multi"
320 TK_FLAG_POSTCOMP ":postcomp"
321 TK_FLAG_IMMEDIATE ":immediate"
322 TK_FLAG_LEXID ":lexid"
323 TK_INSTANCEOF ":instanceof"
325 %token TK_FLAG_UNIQUE_REG ":unique_reg"
326 TK_FLAG_NAMED ":named"
327 TK_FLAG_SLURPY ":slurpy"
328 TK_FLAG_FLAT ":flat"
329 TK_FLAG_OPTIONAL ":optional"
330 TK_FLAG_OPT_FLAG ":opt_flag"
331 TK_FLAG_INVOCANT ":invocant"
333 %type <sval> unop
334 identifier
335 sub_id
336 opt_paren_string
337 paren_string
338 local_var_name
339 keyword
340 parrot_op
342 %type <targ> sub
343 method
344 pmc_object
345 opt_ret_cont
346 target
347 param
348 symbol
349 result_target
350 long_result
351 long_results
352 opt_long_results
353 opt_target_list
354 opt_list
355 target_list
356 keyaccess
359 %type <symb> local_id
360 local_id_list
362 %type <argm> named_arg
363 short_arg
364 arguments
365 argument
367 opt_arguments_list
368 arguments_list
369 opt_yield_expressions
370 yield_expressions
371 yield_expression
372 opt_return_expressions
373 return_expressions
374 return_expression
375 opt_long_arguments
376 long_arguments
377 long_argument
379 %type <expr> expression
380 namespace_slice
382 %type <key> keys
383 keylist
384 opt_namespace
385 namespace
387 %type <ival> has_unique_reg
388 type
389 target_flags
390 target_flag
391 param_flags
392 param_flag
393 invocant_param
394 arg_flags
395 arg_flag
396 if_unless
397 binop
398 rel_op
399 condition
400 augmented_op
401 unique_reg_flag
403 %type <invo> long_invocation
404 long_invocation_stat
405 short_invocation_stat
406 methodcall
407 subcall
408 simple_invocation
409 long_yield_stat
410 long_return_stat
411 short_yield_stat
412 short_return_stat
413 invocation
414 short_invocation
415 return_instr
417 %type <cval> const_tail
418 constant
421 /* needed for reentrancy */
422 %pure-parser
424 /* name of generated parser */
425 %output="pirparser.c"
427 /* move around a yyscan_t object */
428 %parse-param {yyscan_t yyscanner}
429 %lex-param {yyscan_t yyscanner}
431 /* The parser is invoked with this extra parameter */
432 %parse-param {struct lexer_state * const lexer}
434 /* Note: don't declare the lexer_state argument as a %lex-param,
435 * this object is stored in the yyscan_t structure, and can be
436 * accessed through yy{set,get}_extra().
440 /* A note on the layout of the grammar rules.
442 * All actions should be written on the next line, with a fixed indention
443 * w.r.t. colon (':'). This is true for all rules, except:
445 * 1: simple actions that return a flag. For instance, the <has_unique_reg> rule.
446 * 2: simple rules that have only single tokens as alternatives. See the operators
447 * for an example.
449 * + Do not write embedded actions; instead, refactor the grammar by adding
450 * a new rule, so that the previously-embedded action becomes a 'normal'
451 * action.
454 /* Top-level rule */
455 %start TOP
461 /* Top-level rules */
463 TOP : opt_nl
464 pir_chunks
465 opt_nl
468 opt_nl : /* empty */
469 | "\n"
472 pir_chunks : pir_chunk
473 | pir_chunks "\n" pir_chunk
476 pir_chunk : sub_def
477 | const_decl
478 | namespace_decl
479 | hll_specifier
480 | hll_mapping
481 | loadlib
484 loadlib : ".loadlib" TK_STRINGC
485 { load_library(lexer, $2); }
488 /* HLL stuff */
490 hll_specifier : ".HLL" TK_STRINGC
491 { set_hll(lexer, $2); }
494 hll_mapping : ".HLL_map" TK_STRINGC '=' TK_STRINGC
495 { set_hll_map(lexer, $2, $4); }
498 namespace_decl : ".namespace" '[' opt_namespace ']'
499 { set_namespace(lexer, $3); }
502 opt_namespace : /* empty */
503 { $$ = NULL; }
504 | namespace
505 { $$ = $1; }
508 namespace : namespace_slice
509 { $$ = new_key($1); }
510 | namespace ';' namespace_slice
511 { $$ = add_key($1, $3); }
514 namespace_slice : TK_STRINGC
515 { $$ = expr_from_const(new_const(STRING_TYPE, $1)); }
518 sub_def : sub_head sub_flags "\n"
519 parameters
520 instructions
521 ".end"
524 sub_head : ".sub" sub_id
525 { new_subr(lexer, $2); }
528 sub_id : identifier
529 | TK_STRINGC
532 sub_flags : /* empty */
533 | sub_flags sub_flag
536 sub_flag : ":anon"
537 { set_sub_flag(lexer, SUB_FLAG_ANON);}
538 | ":init"
539 { set_sub_flag(lexer, SUB_FLAG_INIT); }
540 | ":load"
541 { set_sub_flag(lexer, SUB_FLAG_LOAD); }
542 | ":main"
543 { set_sub_flag(lexer, SUB_FLAG_MAIN); }
544 | ":method"
545 { set_sub_flag(lexer, SUB_FLAG_METHOD); }
546 | ":lex"
547 { set_sub_flag(lexer, SUB_FLAG_LEX); }
548 | ":postcomp"
549 { set_sub_flag(lexer, SUB_FLAG_POSTCOMP); }
550 | ":immediate"
551 { set_sub_flag(lexer, SUB_FLAG_IMMEDIATE); }
552 | ":multi"
553 { set_sub_flag(lexer, SUB_FLAG_MULTI); }
554 | ":outer" '(' sub_id ')'
555 { set_sub_outer(lexer, $3); }
556 | ":vtable" opt_paren_string
557 { set_sub_vtable(lexer, $2); }
558 | ":lexid" paren_string
559 { set_sub_lexid(lexer, $2); }
560 | ":instanceof" paren_string
561 { set_sub_instanceof(lexer, $2); }
564 multi_type : identifier
565 | TK_STRINGC
566 | keylist
569 parameters : /* empty */
570 | parameters parameter
573 parameter : ".param" param param_flags "\n"
574 { set_param_flag(lexer, $2, $3); }
577 param : type identifier
578 { $$ = add_param(lexer, $1, $2); }
581 param_flags : /* empty */
582 { $$ = 0; }
583 | param_flags param_flag
584 { SET_FLAG($$, $2); }
587 param_flag : target_flag
588 | invocant_param
589 | unique_reg_flag
592 invocant_param : ":invocant" '(' multi_type ')'
593 { $$ = TARGET_FLAG_INVOCANT; /* XXX handle multi_type */}
596 unique_reg_flag : ":unique_reg"
597 { $$ = TARGET_FLAG_UNIQUE_REG; }
600 /* Instructions */
602 instructions : /* empty */
603 | instructions instruction
606 instruction : TK_LABEL "\n"
607 { set_label(lexer, $1); }
608 | TK_LABEL statement
609 { set_label(lexer, $1); }
610 | statement
613 statement : conditional_stat
614 | goto_stat
615 | local_decl
616 | lex_decl
617 | const_decl_stat
618 | return_stat
619 | invocation_stat
620 | assignment_stat
621 | parrot_stat
622 | getresults_stat
623 | null_stat
624 | error_stat
627 /* "error" is a built-in rule; used for trying to recover. */
628 error_stat : error "\n"
630 if (lexer->parse_errors > MAX_NUM_ERRORS) {
631 fprintf(stderr, "Too many errors. Compilation aborted.\n");
632 exit(EXIT_FAILURE); /* fix: bail out and free() all memory */
634 yyerrok;
638 null_stat : "null" target "\n"
639 { set_instrf(lexer, "null", "%T", $2); }
642 getresults_stat : ".get_results" opt_target_list "\n"
643 { set_instrf(lexer, "get_results", "%T", $2); }
646 parrot_stat : parrot_instruction "\n"
649 parrot_instruction: parrot_op opt_op_args
652 parrot_op : TK_IDENT
653 { /* at this point, TK_IDENT may in fact be a symbol identifier,
654 * not an op, so don't do any checks like is_parrot_op() just yet.
656 set_instr(lexer, $1);
657 $$ = $1;
661 opt_op_args : op_args
662 { /* when this rule is activated, the initial identifier must
663 * be a parrot op. Check that, and if not, emit an error message.
665 char * const instr = get_instr(lexer);
666 if (!is_parrot_op(lexer, instr))
667 yyerror(yyscanner, lexer, "'%s' is not a parrot instruction", instr);
668 else
669 do_strength_reduction(lexer);
671 | keylist_assignment
674 op_args : /* empty */
675 | parrot_op_args
678 parrot_op_args : op_arg
679 | parrot_op_args ',' op_arg
682 keylist_assignment: keylist '=' expression
684 /* the "instruction" that was set now appears to be
685 * an identifier; get the name, and check its type.
687 char * const instr = get_instr(lexer);
688 symbol *sym = find_symbol(lexer, instr);
689 target *obj;
691 /* find the symbol for the object being indexed;
692 * it must have been declared
694 if (sym == NULL) {
695 yyerror(yyscanner, lexer, "indexed object '%s' not declared", instr);
696 sym = new_symbol(instr, PMC_TYPE);
698 else if (sym->type != PMC_TYPE) /* found symbol, now check it's a PMC */
699 yyerror(yyscanner, lexer,
700 "indexed object '%s' must be of type 'pmc'", instr);
702 /* convert the symbol into a target */
703 obj = target_from_symbol(sym);
705 /* set the key on the target */
706 set_target_key(obj, $1);
707 /* indexed operation is a "set" opcode */
708 set_instrf(lexer, "set", "%T%E", obj, $3);
712 op_arg : expression
713 { push_operand(lexer, $1); }
714 | keylist
715 { push_operand(lexer, expr_from_key($1)); }
716 | keyaccess
717 { push_operand(lexer, expr_from_target($1)); }
720 keyaccess : pmc_object keylist
722 /* if $1 is a register, just return that */
723 if (TEST_FLAG($1->flags, TARGET_FLAG_IS_REG))
724 $$ = $1;
725 else { /* it's not a register, so it must be a declared symbol */
726 symbol *sym = find_symbol(lexer, target_name($1));
727 if (sym == NULL) {
728 yyerror(yyscanner, lexer,
729 "indexed object '%s' not declared", target_name($1));
730 /* make sure sym is a valid pointer */
731 sym = new_symbol(target_name($1), PMC_TYPE);
733 else if (sym->type != PMC_TYPE)
734 yyerror(yyscanner, lexer,
735 "indexed object '%s' is not of type 'pmc'",
736 target_name($1));
738 /* create a target node based on the symbol node;
739 * sym already has a PASM register, so through
740 * this the target will get that too.
742 $$ = target_from_symbol(sym);
745 /* in both cases (register or symbol), set the key on this target */
746 set_target_key($$, $2);
750 keylist : '[' keys ']'
751 { $$ = $2; }
754 keys : expression
755 { $$ = new_key($1); }
756 | keys ';' expression
757 { $$ = add_key($1, $3); }
761 assignment_stat : assignment "\n"
764 assignment : target '=' TK_INTC
766 if ($3 == 0)
767 set_instrf(lexer, "null", "%T", $1);
768 else
769 set_instrf(lexer, "set", "%T%i", $1, $3);
771 | target '=' TK_NUMC
773 if ($3 == 0.0)
774 set_instrf(lexer, "null", "%T", $1);
775 else
776 set_instrf(lexer, "set", "%T%n", $1, $3);
778 | target '=' TK_STRINGC
779 { set_instrf(lexer, "set", "%T%s", $1, $3); }
780 | target '=' binary_expr
781 { unshift_operand(lexer, expr_from_target($1)); }
782 | target '=' parrot_op
784 symbol *sym = find_symbol(lexer, $3);
785 if (sym == NULL) {
786 if (!is_parrot_op(lexer, $3))
787 yyerror(yyscanner, lexer, "'%s' is neither a declared symbol "
788 "nor a parrot opcode", $3);
789 else /* handle it as an op */
790 unshift_operand(lexer, expr_from_target($1));
792 else /* handle it as a symbol */
793 set_instrf(lexer, "set", "%T%T", $1, target_from_symbol(sym));
796 | target '=' parrot_op expression ',' parrot_op_args
798 if (!is_parrot_op(lexer, $3))
799 yyerror(yyscanner, lexer, "'%s' is not a parrot op", $3);
800 else {
801 unshift_operand(lexer, $4);
802 unshift_operand(lexer, expr_from_target($1));
803 do_strength_reduction(lexer);
806 | target '=' parrot_op expression
808 if (!is_parrot_op(lexer, $3))
809 yyerror(yyscanner, lexer, "'%s' is not a parrot op", $3);
810 else {
811 set_instrf(lexer, $3, "%T%E", $1, $4);
812 do_strength_reduction(lexer);
815 | target '=' parrot_op keylist
817 symbol *sym = find_symbol(lexer, $3);
818 target *t;
819 if (sym == NULL) {
820 if (is_parrot_op(lexer, $3))
821 set_instrf(lexer, $3, "%T%E", $1, expr_from_key($4));
822 else
823 yyerror(yyscanner, lexer, "indexed object '%s' not declared", $3);
825 /* create a symbol node anyway, so we can continue with instr. gen. */
826 sym = new_symbol($3, PMC_TYPE);
828 /* at this point, sym is not NULL, even if there was an error */
829 if (sym->type != PMC_TYPE)
830 yyerror(yyscanner, lexer,
831 "indexed object '%s' must be of type 'pmc'", $3);
833 t = target_from_symbol(sym);
834 set_target_key(t, $4);
835 set_instrf(lexer, "set", "%T%T", $1, t);
838 | target '=' parrot_op keylist ',' parrot_op_args
840 unshift_operand(lexer, expr_from_key($4));
841 unshift_operand(lexer, expr_from_target($1));
843 | target '=' keyword keylist
845 symbol *sym = find_symbol(lexer, $3);
846 target *t;
848 if (sym == NULL) {
849 yyerror(yyscanner, lexer, "indexed object '%s' not declared", $3);
850 sym = new_symbol($3, PMC_TYPE);
852 else if (sym->type != PMC_TYPE)
853 yyerror(yyscanner, lexer,
854 "indexed object '%s' must be of type 'pmc'", $3);
856 t = target_from_symbol(sym);
857 set_target_key(t, $4);
858 set_instrf(lexer, "set", "%T%T", $1, t);
860 | target '=' TK_PREG keylist
862 target *preg = new_reg(lexer, PMC_TYPE, $3);
863 set_target_key(preg, $4);
864 set_instrf(lexer, "set", "%T%T", $1, preg);
866 | target augmented_op expression
867 { set_instrf(lexer, opnames[$2], "%T%E", $1, $3); }
868 | target "+=" TK_INTC
870 if ($3 == 1)
871 set_instrf(lexer, "inc", "%T", $1);
872 else if ($3 == 0)
873 set_instr(lexer, "noop");
874 else
875 set_instrf(lexer, "add", "%T%i", $1, $3);
877 | target "+=" TK_NUMC
879 if ($3 == 1.0)
880 set_instrf(lexer, "inc", "%T", $1);
881 else if ($3 == 0.0)
882 set_instr(lexer, "noop");
883 else
884 set_instrf(lexer, "add", "%T%n", $1, $3);
886 | target "-=" TK_INTC
888 if ($3 == 1)
889 set_instrf(lexer, "dec", "%T", $1);
890 else if ($3 == 0)
891 set_instr(lexer, "noop");
892 else
893 set_instrf(lexer, "sub", "%T%i", $1, $3);
895 | target "-=" TK_NUMC
897 if ($3 == 1.0)
898 set_instrf(lexer, "dec", "%T", $1);
899 else if ($3 == 0.0)
900 set_instr(lexer, "noop");
901 else
902 set_instrf(lexer, "sub", "%T%n", $1, $3);
904 | target "+=" target
905 { set_instrf(lexer, "add", "%T%T", $1, $3); }
906 | target "-=" target
907 { set_instrf(lexer, "sub", "%T%T", $1, $3); }
908 | target '=' unop expression
909 { set_instrf(lexer, $3, "%T%E", $1, $4); }
910 | target '=' target binop target
912 if (targets_equal($1, $3)) /* $P0 = $P0 + $P1 ==> $P0 += $P1 */
913 set_instrf(lexer, opnames[$4], "%T%T", $1, $5);
914 else
915 set_instrf(lexer, opnames[$4], "%T%T%T", $1, $3, $5);
917 | keyword keylist '=' expression
919 symbol *sym = find_symbol(lexer, $1);
920 target *t;
922 if (sym == NULL) {
923 yyerror(yyscanner, lexer, "indexed object '%s' not declared", $1);
924 /* create a dummy symbol so we can continue without seg. faults */
925 sym = new_symbol($1, PMC_TYPE);
927 else if (sym->type != PMC_TYPE)
928 yyerror(yyscanner, lexer,
929 "indexed object '%s' must be of type 'pmc'", $1);
930 /* at this point sym is a valid (possibly dummy) object for sure */
931 t = target_from_symbol(sym);
932 set_target_key(t, $2);
933 set_instrf(lexer, "set", "%T%E", t, $4);
935 | TK_PREG keylist '=' expression
937 target *preg = new_reg(lexer, PMC_TYPE, $1);
938 set_target_key(preg, $2);
939 set_instrf(lexer, "set", "%T%E", preg, $4);
946 =head1 CONSTANT FOLDING
948 A binary expression consists of two expressions, separated by a binary
949 operator. An expression can be a C<target> or a literal value. Such a
950 value can be an C<integer>, C<floating-point> or C<string> literal.
952 In the case that both operands are constants, we can pre-evaluate
953 the result of these constants, effectively preventing any calculation
954 during runtime. For instance:
956 $I0 = 42 + 1
958 can be pre-evaluated as
960 $I0 = 43
962 which will be assembled using the C<set> opcode. Likewise, concatenation
963 of two strings is done during compile time.
965 =cut
968 binary_expr : TK_INTC binop target
969 { set_instrf(lexer, opnames[$2], "%i%T", $1, $3); }
970 | TK_NUMC binop target
971 { set_instrf(lexer, opnames[$2], "%n%T", $1, $3); }
972 | TK_STRINGC binop target
973 { set_instrf(lexer, opnames[$2], "%s%T", $1, $3); }
974 | TK_STRINGC binop TK_STRINGC
975 { set_instrf(lexer, "set", "%C", fold_s_s(yyscanner, $1, $2, $3)); }
976 | TK_INTC binop TK_INTC
977 { set_instrf(lexer, "set", "%C", fold_i_i(yyscanner, $1, $2, $3)); }
978 | TK_NUMC binop TK_NUMC
979 { set_instrf(lexer, "set", "%C", fold_n_n(yyscanner, $1, $2, $3)); }
980 | TK_INTC binop TK_NUMC
981 { set_instrf(lexer, "set", "%C", fold_i_n(yyscanner, $1, $2, $3)); }
982 | TK_NUMC binop TK_INTC
983 { set_instrf(lexer, "set", "%C", fold_n_i(yyscanner, $1, $2, $3)); }
987 conditional_stat : conditional_instr "\n"
991 /* In order to allow all keywords (data type names and words such as "if", "null", etc.
992 * a lot of special cases must be distinguished; this is necessary in order to
993 * do a correct parse and prevent shift/reduce conflicts.
995 conditional_instr : if_unless "null" TK_IDENT "goto" identifier
996 { create_if_instr(yyscanner, lexer, $1, 1, $3, $4); }
997 | if_unless "null" "int" "goto" identifier
998 { create_if_instr(yyscanner, lexer, $1, 1, "int", $4); }
999 | if_unless "null" "num" "goto" identifier
1000 { create_if_instr(yyscanner, lexer, $1, 1, "num", $4); }
1001 | if_unless "null" "pmc" "goto" identifier
1002 { create_if_instr(yyscanner, lexer, $1, 1, "pmc", $4); }
1003 | if_unless "null" "string" "goto" identifier
1004 { create_if_instr(yyscanner, lexer, $1, 1, "string", $4); }
1005 | if_unless "null" "if" "goto" identifier
1006 { create_if_instr(yyscanner, lexer, $1, 1, "if", $4); }
1007 | if_unless "null" "unless" "goto" identifier
1008 { create_if_instr(yyscanner, lexer, $1, 1, "unless", $4); }
1009 | if_unless "null" "goto" "goto" identifier
1010 { create_if_instr(yyscanner, lexer, $1, 1, "goto", $4); }
1011 | if_unless "null" "null" "goto" identifier
1012 { create_if_instr(yyscanner, lexer, $1, 1, "null", $4); }
1013 | if_unless constant then identifier
1015 int istrue = evaluate_c($2);
1016 /* if "unless", invert the true-ness */
1017 istrue = $1 ? !istrue : istrue;
1018 if (istrue)
1019 set_instrf(lexer, "branch", "%I", $4);
1020 else
1021 set_instr(lexer, "noop");
1023 | if_unless TK_IDENT then identifier
1024 { create_if_instr(yyscanner, lexer, $1, 0, $2, $4); }
1025 | if_unless "int" then identifier
1026 { create_if_instr(yyscanner, lexer, $1, 0, "int", $4); }
1027 | if_unless "num" then identifier
1028 { create_if_instr(yyscanner, lexer, $1, 0, "num", $4); }
1029 | if_unless "pmc" then identifier
1030 { create_if_instr(yyscanner, lexer, $1, 0, "pmc", $4); }
1031 | if_unless "string" then identifier
1032 { create_if_instr(yyscanner, lexer, $1, 0, "string", $4); }
1033 | if_unless "if" then identifier
1034 { create_if_instr(yyscanner, lexer, $1, 0, "if", $4); }
1035 | if_unless "unless" then identifier
1036 { create_if_instr(yyscanner, lexer, $1, 0, "unless", $4); }
1037 | if_unless "goto" "goto" identifier
1038 { create_if_instr(yyscanner, lexer, $1, 0, "goto", $4); }
1039 | if_unless "goto" ',' identifier
1040 { create_if_instr(yyscanner, lexer, $1, 0, "goto", $4); }
1041 | if_unless "null" "goto" identifier
1042 { create_if_instr(yyscanner, lexer, $1, 0, "null", $4); }
1043 | if_unless "null" ',' identifier
1044 { create_if_instr(yyscanner, lexer, $1, 0, "null", $4); }
1045 | if_unless condition "goto" identifier
1047 if ($2 == -1) { /* -1 means the condition is evaluated during runtime */
1048 if ($1) /* "unless"? if so, invert the instruction. */
1049 invert_instr(lexer);
1051 push_operand(lexer, expr_from_ident($4));
1053 else { /* evaluation during compile time */
1054 /* if the result was false but the instr. was "unless", or,
1055 * if the result was true and the instr. was "if",
1056 * do an unconditional jump.
1058 if ( (($2 == 0) && $1) || (($2 == 1) && !$1) )
1059 set_instrf(lexer, "branch", "%I", $4);
1060 else
1061 set_instr(lexer, "noop");
1067 /* the condition rule returns -1 if the condition can't be evaluated yet, so
1068 * it must be done during runtime. Otherwise, if the condition evaluates to
1069 * "false", 0 is returned, and if true, 1 is returned.
1071 condition : target rel_op expression
1073 set_instrf(lexer, opnames[$2], "%T%E", $1, $3);
1074 $$ = -1; /* -1 indicates this is evaluated at runtime */
1076 | TK_INTC rel_op target
1078 set_instrf(lexer, opnames[$2], "%i%T", $1, $3);
1079 $$ = -1;
1081 | TK_NUMC rel_op target
1083 set_instrf(lexer, opnames[$2], "%n%T", $1, $3);
1084 $$ = -1;
1086 | TK_STRINGC rel_op target
1088 set_instrf(lexer, opnames[$2], "%s%T", $1, $3);
1089 $$ = -1;
1091 | TK_INTC rel_op TK_INTC
1092 { $$ = evaluate_i_i($1, $2, $3); }
1093 | TK_INTC rel_op TK_NUMC
1094 { $$ = evaluate_i_n($1, $2, $3); }
1095 | TK_NUMC rel_op TK_INTC
1096 { $$ = evaluate_n_i($1, $2, $3); }
1097 | TK_NUMC rel_op TK_NUMC
1098 { $$ = evaluate_n_n($1, $2, $3); }
1099 | TK_STRINGC rel_op TK_STRINGC
1100 { $$ = evaluate_s_s($1, $2, $3); }
1103 if_unless : "if" { $$ = 0; /* no need to invert */ }
1104 | "unless" { $$ = 1; /* yes, invert opname */ }
1107 then : "goto" /* PIR mode */
1108 | ',' /* PASM mode*/
1111 goto_stat : "goto" identifier "\n"
1112 { set_instrf(lexer, "branch", "%I", $2); }
1115 local_decl : ".local" type local_id_list "\n"
1116 { declare_local(lexer, $2, $3); }
1119 local_id_list : local_id
1120 { $$ = $1; }
1121 | local_id_list ',' local_id
1122 { $$ = add_local($1, $3); }
1125 local_id : local_var_name has_unique_reg
1126 { $$ = new_local($1, $2); }
1129 local_var_name : identifier
1130 { /* try to find symbol for this id; if found, it was already declared */
1131 symbol *sym = find_symbol(lexer, $1);
1132 if (sym)
1133 yyerror(yyscanner, lexer, "symbol '%s' is already declared", $1);
1134 $$ = $1;
1138 has_unique_reg : /* empty */ { $$ = 0; }
1139 | ":unique_reg" { $$ = 1; }
1142 lex_decl : ".lex" TK_STRINGC ',' pmc_object "\n"
1143 { /* if $4 is not a register, it must be a declared symbol */
1144 if (!TEST_FLAG($4->flags, TARGET_FLAG_IS_REG)) {
1145 symbol *sym = find_symbol(lexer, target_name($4));
1147 if (sym == NULL) /* check declaration */
1148 yyerror(yyscanner, lexer, "lexical '%s' is not declared",
1149 target_name($4));
1150 else if (sym->type != PMC_TYPE) /* a .lex must be a PMC */
1151 yyerror(yyscanner, lexer, "lexical '%s' must be of type 'pmc'",
1152 target_name($4));
1154 set_lex_flag($4, $2);
1158 /* Sub/method invocation */
1161 invocation_stat : invocation
1162 { convert_inv_to_instr(lexer, $1); }
1165 invocation : long_invocation_stat
1166 | short_invocation_stat
1169 long_invocation_stat : ".begin_call" "\n"
1170 opt_long_arguments
1171 long_invocation "\n"
1172 opt_long_results
1173 ".end_call" "\n"
1174 { /* $4 contains an invocation object */
1175 set_invocation_args($4, $3);
1176 set_invocation_results($4, $6);
1177 $$ = $4;
1181 opt_long_arguments : /* empty */
1182 { $$ = NULL; }
1183 | long_arguments
1184 { $$ = $1; }
1187 long_arguments : long_argument
1188 { $$ = $1; }
1189 | long_arguments long_argument
1190 { $$ = add_arg($1, $2); }
1193 long_argument : ".set_arg" short_arg "\n"
1194 { $$ = $2; }
1197 long_invocation : ".call" pmc_object opt_ret_cont
1198 { $$ = invoke(lexer, CALL_PCC, $2, $3); }
1199 | ".nci_call" pmc_object
1200 { $$ = invoke(lexer, CALL_NCI, $2); }
1201 | ".invocant" pmc_object "\n"
1202 ".meth_call" method
1203 { $$ = invoke(lexer, CALL_METHOD, $2, $5); }
1206 opt_ret_cont : /* empty */
1207 { $$ = NULL; }
1208 | ',' pmc_object
1209 { $$ = $2; }
1212 opt_long_results : /* empty */
1213 { $$ = NULL; }
1214 | long_results
1215 { $$ = $1; }
1218 long_results : long_result
1219 { $$ = $1; }
1220 | long_results long_result
1221 { $$ = add_target(lexer, $1, $2); }
1224 long_result : ".get_result" result_target "\n"
1225 { $$ = $2; }
1226 | local_decl
1227 { $$ = NULL; }
1230 short_invocation_stat: short_invocation "\n"
1234 short_invocation : opt_target_list '=' simple_invocation
1235 { set_invocation_results($3, $1);
1236 $$ = $3;
1238 | target '=' simple_invocation
1239 { set_invocation_results($3, $1);
1240 $$ = $3;
1242 | simple_invocation
1243 { set_invocation_results($1, NULL);
1244 $$ = $1;
1248 simple_invocation : subcall
1249 | methodcall
1252 methodcall : pmc_object '.' method arguments
1254 /* if $1 is not a register, check whether the symbol was declared */
1255 if (!TEST_FLAG($1->flags, TARGET_FLAG_IS_REG)) {
1256 symbol *sym = find_symbol(lexer, target_name($1));
1257 if (sym == NULL)
1258 yyerror(yyscanner, lexer, "object '%s' not declared",
1259 target_name($1));
1261 else if (sym->type != PMC_TYPE)
1262 yyerror(yyscanner, lexer,
1263 "cannot invoke method: '%s' is not of type 'pmc'",
1264 target_name($1));
1267 $$ = invoke(lexer, CALL_METHOD, $1, $3);
1268 set_invocation_args($$, $4);
1272 subcall : sub arguments
1274 $$ = invoke(lexer, CALL_PCC, $1, NULL);
1275 set_invocation_args($$, $2);
1279 sub : pmc_object
1280 { $$ = $1; }
1281 | TK_STRINGC
1282 { $$ = target_from_string($1); }
1285 method : identifier
1286 { /* check that this identifier was declared */
1287 symbol *sym = find_symbol(lexer, $1);
1289 if (sym == NULL) {
1290 yyerror(yyscanner, lexer,
1291 "method identifier '%s' not declared", $1);
1292 /* make sure sym is not NULL */
1293 sym = new_symbol($1, PMC_TYPE);
1295 else if (sym->type != PMC_TYPE && sym->type != STRING_TYPE)
1296 yyerror(yyscanner, lexer,
1297 "method '%s' must be of type 'pmc' or 'string'", $1);
1299 $$ = target_from_symbol(sym);
1301 | TK_PREG
1302 { $$ = new_reg(lexer, PMC_TYPE, $1); }
1303 | TK_SREG
1304 { $$ = new_reg(lexer, STRING_TYPE, $1); }
1305 | TK_STRINGC
1306 { $$ = target_from_string($1); }
1309 pmc_object : identifier
1310 { $$ = target_from_ident(PMC_TYPE, $1); }
1311 | TK_PREG
1312 { $$ = new_reg(lexer, PMC_TYPE, $1); }
1316 opt_target_list : '(' opt_list ')'
1317 { $$ = $2; }
1320 opt_list : /* empty */
1321 { $$ = NULL; }
1322 | target_list
1323 { $$ = $1; }
1326 target_list : result_target
1327 { $$ = $1; }
1328 | target_list ',' result_target
1329 { $$ = add_target(lexer, $1, $3); }
1332 result_target : target target_flags
1333 { $$ = set_param_flag(lexer, $1, $2); }
1336 target_flags : /* empty */
1337 { $$ = 0; }
1338 | target_flags target_flag
1339 { SET_FLAG($$, $2); }
1342 target_flag : ":optional"
1343 { $$ = TARGET_FLAG_OPTIONAL; }
1344 | ":opt_flag"
1345 { $$ = TARGET_FLAG_OPT_FLAG; }
1346 | ":slurpy"
1347 { $$ = TARGET_FLAG_SLURPY; }
1348 | ":named" opt_paren_string
1350 $$ = TARGET_FLAG_NAMED;
1351 set_param_alias(lexer, $2);
1355 /* Returning and Yielding */
1358 return_stat : return_instr
1359 { convert_inv_to_instr(lexer, $1); }
1362 return_instr : short_return_stat
1363 | long_return_stat
1364 | short_yield_stat
1365 | long_yield_stat
1368 short_return_stat : ".return" arguments "\n"
1370 $$ = invoke(lexer, CALL_RETURN);
1371 set_invocation_args($$, $2);
1373 | ".tailcall" simple_invocation "\n"
1374 { /* was the invocation a method call? then it becomes a method tail
1375 * call, otherwise it's just a normal (sub) tail call.
1377 set_invocation_type($2, ($2->type == CALL_METHOD)
1378 ? CALL_METHOD_TAILCALL
1379 : CALL_TAILCALL);
1380 $$ = $2;
1384 short_yield_stat : ".yield" arguments "\n"
1386 $$ = invoke(lexer, CALL_YIELD);
1387 set_invocation_args($$, $2);
1391 arguments : '(' opt_arguments_list ')'
1392 { $$ = $2; }
1395 opt_arguments_list : /* empty */
1396 { $$ = NULL; }
1397 | arguments_list
1398 { $$ = $1; }
1401 arguments_list : argument
1402 { $$ = $1; }
1403 | arguments_list ',' argument
1404 { $$ = add_arg($1, $3); }
1407 argument : short_arg
1408 | named_arg
1411 named_arg : TK_STRINGC "=>" arg
1412 { $$ = set_arg_alias(lexer, $1); }
1415 short_arg : arg arg_flags
1416 { $$ = set_arg_flag($$, $2); }
1419 arg : expression
1420 { $$ = set_curarg(lexer, new_argument($1)); }
1423 long_return_stat : ".begin_return" "\n"
1424 opt_return_expressions
1425 ".end_return" "\n"
1427 $$ = invoke(lexer, CALL_RETURN);
1428 set_invocation_args($$, $3);
1432 long_yield_stat : ".begin_yield" "\n"
1433 opt_yield_expressions
1434 ".end_yield" "\n"
1436 $$ = invoke(lexer, CALL_YIELD);
1437 set_invocation_args($$, $3);
1441 opt_yield_expressions : /* empty */
1442 { $$ = NULL; }
1443 | yield_expressions
1444 { $$ = $1; }
1448 yield_expressions : yield_expression
1449 { $$ = $1; }
1450 | yield_expressions yield_expression
1451 { $$ = add_arg($1, $2); }
1455 yield_expression : ".set_yield" short_arg "\n"
1456 { $$ = $2; }
1459 opt_return_expressions: /* empty */
1460 { $$ = NULL; }
1461 | return_expressions
1462 { $$ = $1; }
1465 return_expressions : return_expression
1466 { $$ = $1; }
1467 | return_expressions return_expression
1468 { $$ = add_arg($1, $2); }
1471 return_expression : ".set_return" short_arg "\n"
1472 { $$ = $2; }
1476 arg_flags : /* empty */
1477 { $$ = 0; }
1478 | arg_flags arg_flag
1479 { SET_FLAG($$, $2); }
1482 arg_flag : ":flat"
1483 { $$ = ARG_FLAG_FLAT; }
1484 | ":named" opt_paren_string
1486 $$ = ARG_FLAG_NAMED;
1487 set_arg_alias(lexer, $2);
1491 opt_paren_string : /* empty */
1492 { $$ = NULL; }
1493 | paren_string
1494 { $$ = $1; }
1497 paren_string : '(' TK_STRINGC ')'
1498 { $$ = $2; }
1501 const_decl_stat : const_stat "\n"
1504 const_stat : const_decl
1505 | globalconst_decl
1508 const_decl : ".const" const_tail
1509 { store_global_const(lexer, $2); }
1512 globalconst_decl : ".globalconst" const_tail
1513 { /* XXX is .globalconst to be kept? */ }
1516 const_tail : "int" identifier '=' TK_INTC
1517 { $$ = new_named_const(INT_TYPE, $2, $4); }
1518 | "num" identifier '=' TK_NUMC
1519 { $$ = new_named_const(NUM_TYPE, $2, $4); }
1520 | "string" identifier '=' TK_STRINGC
1521 { $$ = new_named_const(STRING_TYPE, $2, $4); }
1522 | "pmc" identifier '=' TK_STRINGC
1523 { $$ = new_named_const(PMC_TYPE, $2, $4); }
1525 | "Sub" identifier '=' TK_STRINGC
1526 | "Coroutine" identifier '=' TK_STRINGC
1529 /* this might be useful, for:
1530 .const "Sub" foo = "foo" # make a Sub PMC of subr. "foo"
1531 .const "Float" PI = 3.14 # make a Float PMC for 3.14
1533 Is: .const pmc x = 'foo' any useful? Type of x is not clear.
1535 | TK_STRINGC identifier '=' constant
1536 { $$ = new_pmc_const($1, $2, $4); }
1542 /* Expressions, variables and operators */
1544 expression : target { $$ = expr_from_target($1); }
1545 | constant { $$ = expr_from_const($1); }
1549 constant : TK_STRINGC { $$ = new_const(STRING_TYPE, $1); }
1550 | TK_INTC { $$ = new_const(INT_TYPE, $1); }
1551 | TK_NUMC { $$ = new_const(NUM_TYPE, $1); }
1554 rel_op : "!=" { $$ = OP_NE; }
1555 | "==" { $$ = OP_EQ; }
1556 | "<" { $$ = OP_LT; }
1557 | "<=" { $$ = OP_LE; }
1558 | ">=" { $$ = OP_GE; }
1559 | ">" { $$ = OP_GT; }
1562 type : "int" { $$ = INT_TYPE; }
1563 | "num" { $$ = NUM_TYPE; }
1564 | "pmc" { $$ = PMC_TYPE; }
1565 | "string" { $$ = STRING_TYPE; }
1568 /* helper rule to set the symbol's target node as the "current"; this is needed for
1569 * setting flags etc. Instead of duplicating this code for each register type and
1570 * also for symbols, use this extra rule to do it once.
1573 target : symbol { set_curtarget(lexer, $1); }
1576 symbol : TK_PREG { $$ = new_reg(lexer, PMC_TYPE, $1); }
1577 | TK_NREG { $$ = new_reg(lexer, NUM_TYPE, $1); }
1578 | TK_IREG { $$ = new_reg(lexer, INT_TYPE, $1); }
1579 | TK_SREG { $$ = new_reg(lexer, STRING_TYPE, $1); }
1580 | identifier { /* a symbol must have been declared; check that at this point. */
1581 symbol *sym = find_symbol(lexer, $1);
1582 if (sym == NULL) {
1583 yyerror(yyscanner, lexer, "symbol '%s' not declared", $1);
1585 /* make sure sym is not NULL */
1586 sym = new_symbol($1, UNKNOWN_TYPE);
1588 $$ = target_from_symbol(sym);
1593 /* an identifier can be any sequence of characters or one of the keywords */
1594 identifier : TK_IDENT
1595 | keyword
1598 keyword : "if" { $$ = dupstr(lexer, "if"); }
1599 | "unless" { $$ = dupstr(lexer, "unless"); }
1600 | "goto" { $$ = dupstr(lexer, "goto"); }
1601 | "int" { $$ = dupstr(lexer, "int"); }
1602 | "num" { $$ = dupstr(lexer, "num"); }
1603 | "string" { $$ = dupstr(lexer, "string"); }
1604 | "pmc" { $$ = dupstr(lexer, "pmc"); }
1605 | "null" { $$ = dupstr(lexer, "null"); }
1608 unop : '-' { $$ = "neg"; }
1609 | '!' { $$ = "not"; }
1610 | '~' { $$ = "bnot"; }
1613 binop : '+' { $$ = OP_ADD; }
1614 | '-' { $$ = OP_SUB; }
1615 | '/' { $$ = OP_DIV; }
1616 | '*' { $$ = OP_MUL; }
1617 | '%' { $$ = OP_MOD; }
1618 | '|' { $$ = OP_BOR; }
1619 | '&' { $$ = OP_BAND; }
1620 | '~' { $$ = OP_BXOR; }
1621 | "**" { $$ = OP_POW; }
1622 | "." { $$ = OP_CONCAT; }
1623 | ">>>" { $$ = OP_LSR; }
1624 | ">>" { $$ = OP_SHR; }
1625 | "<<" { $$ = OP_SHL; }
1626 | "||" { $$ = OP_OR; }
1627 | "&&" { $$ = OP_AND; }
1628 | "//" { $$ = OP_FDIV; }
1629 | "~~" { $$ = OP_XOR; }
1630 | "==" { $$ = OP_ISEQ; }
1631 | "<=" { $$ = OP_ISLE; }
1632 | "<" { $$ = OP_ISLT; }
1633 | ">=" { $$ = OP_ISGE; }
1634 | ">" { $$ = OP_ISGT; }
1635 | "!=" { $$ = OP_ISNE; }
1638 /* note that += and -= are separated, because when adding/subtracting 1,
1639 * this is optimized by using the "inc"/"dec" instructions.
1641 augmented_op: "*=" { $$ = OP_MUL; }
1642 | "%=" { $$ = OP_MOD; }
1643 | "**=" { $$ = OP_POW; }
1644 | "/=" { $$ = OP_DIV; }
1645 | "//=" { $$ = OP_FDIV; }
1646 | "|=" { $$ = OP_BOR; }
1647 | "&=" { $$ = OP_BAND; }
1648 | "~=" { $$ = OP_BXOR; }
1649 | ".=" { $$ = OP_CONCAT; }
1650 | ">>=" { $$ = OP_SHR; }
1651 | "<<=" { $$ = OP_SHL; }
1652 | ">>>=" { $$ = OP_LSR; }
1657 #include <math.h>
1658 #include <assert.h>
1662 =head1 FUNCTIONS
1664 =over 4
1666 =item C<static constant *
1667 fold_i_i(yyscan_t yyscanner, int a, pir_math_operator op, int b)>
1669 Evaluates the expression C<a op b> and returns a constant node
1670 containing the result value. Both C<a> and C<b> are integer values.
1672 =cut
1675 static constant *
1676 fold_i_i(yyscan_t yyscanner, int a, pir_math_operator op, int b) {
1677 int result;
1679 switch (op) {
1680 case OP_ADD:
1681 result = a + b;
1682 break;
1683 case OP_SUB:
1684 result = a - b;
1685 break;
1686 case OP_DIV:
1687 if (b == 0)
1688 yyerror(yyscanner, yyget_extra(yyscanner), "cannot divide by 0!");
1689 else
1690 result = a / b;
1691 break;
1692 case OP_MUL:
1693 result = a * b;
1694 break;
1695 case OP_MOD:
1696 result = a % b;
1697 break;
1698 case OP_BOR:
1699 result = (a | b);
1700 break;
1701 case OP_BAND:
1702 result = (a & b);
1703 break;
1704 case OP_BXOR:
1705 result = (a ^ b);
1706 break;
1707 case OP_POW:
1708 result = pow(a, b);
1709 break;
1710 case OP_CONCAT:
1711 yyerror(yyscanner, yyget_extra(yyscanner),
1712 "cannot concatenate operands of type 'int' and 'int'");
1713 break;
1714 case OP_LSR:
1715 /* from bits.ops: $1 = (INTVAL)((UINTVAL)$2 >> $3); */
1716 result = (int)((unsigned)a >> b);
1717 break;
1718 case OP_SHR:
1719 result = a >> b;
1720 break;
1721 case OP_SHL:
1722 result = a << b;
1723 break;
1724 case OP_OR:
1725 result = (a || b);
1726 break;
1727 case OP_AND:
1728 result = (a && b);
1729 break;
1730 case OP_FDIV:
1731 /* 7 // 2 -> 3, this is what integer division does (rounding down to whole integers) */
1732 result = a / b;
1733 break;
1734 case OP_XOR:
1735 result = a ^ b;
1736 break;
1737 case OP_ISEQ:
1738 result = (a == b);
1739 break;
1740 case OP_ISLE:
1741 result = (a <= b);
1742 break;
1743 case OP_ISLT:
1744 result = (a < b);
1745 break;
1746 case OP_ISGE:
1747 result = (a >= b);
1748 break;
1749 case OP_ISGT:
1750 result = (a > b);
1751 break;
1752 case OP_ISNE:
1753 result = (a != b);
1754 break;
1756 /* OP_INC and OP_DEC are here only to keep the C compiler happy */
1757 case OP_INC:
1758 case OP_DEC:
1759 panic("detected 'inc' or 'dec' in fold_i_i()");
1760 break;
1762 return new_const(INT_TYPE, result);
1767 =item C<static constant *
1768 fold_n_i(yyscan_t yyscanner, double a, pir_math_operator op, int b)>
1770 Same as C<fold_i_i>, except C<a> is of type double.
1772 =cut
1775 static constant *
1776 fold_n_i(yyscan_t yyscanner, double a, pir_math_operator op, int b) {
1777 double result;
1778 switch (op) {
1779 case OP_ADD:
1780 result = a + b;
1781 break;
1782 case OP_SUB:
1783 result = a - b;
1784 break;
1785 case OP_DIV:
1786 if (b == 0)
1787 yyerror(yyscanner, yyget_extra(yyscanner), "cannot divide by 0!");
1788 else
1789 result = a / b;
1790 break;
1791 case OP_MUL:
1792 result = a * b;
1793 break;
1794 case OP_MOD:
1795 case OP_BOR:
1796 case OP_BAND:
1797 case OP_BXOR:
1798 case OP_SHR:
1799 case OP_SHL:
1800 case OP_LSR:
1801 case OP_XOR:
1802 case OP_CONCAT:
1803 yyerror(yyscanner, yyget_extra(yyscanner),
1804 "cannot apply binary operator '%s' to types 'num' and 'int'", opnames[op]);
1805 break;
1806 case OP_POW:
1807 result = pow(a, b);
1808 break;
1809 case OP_OR:
1810 result = (a || b);
1811 break;
1812 case OP_AND:
1813 result = (a && b);
1814 break;
1815 case OP_FDIV:
1816 result = floor(a / b);
1817 break;
1818 case OP_ISEQ:
1819 result = (a == b);
1820 break;
1821 case OP_ISLE:
1822 result = (a <= b);
1823 break;
1824 case OP_ISLT:
1825 result = (a < b);
1826 break;
1827 case OP_ISGE:
1828 result = (a >= b);
1829 break;
1830 case OP_ISGT:
1831 result = (a > b);
1832 break;
1833 case OP_ISNE:
1834 result = (a != b);
1835 break;
1837 /* OP_INC and OP_DEC are here only to keep the C compiler happy */
1838 case OP_INC:
1839 case OP_DEC:
1840 panic("detected 'inc' or 'dec' in fold_n_i()");
1841 break;
1843 return new_const(NUM_TYPE, result);
1848 =item C<static constant *
1849 fold_i_n(yyscan_t yyscanner, int a, pir_math_operator op, double b)>
1851 Same as C<fold_i_i>, except C<b> is of type double.
1853 =cut
1856 static constant *
1857 fold_i_n(yyscan_t yyscanner, int a, pir_math_operator op, double b) {
1858 double result;
1860 switch (op) {
1861 case OP_ADD:
1862 result = a + b;
1863 break;
1864 case OP_SUB:
1865 result = a - b;
1866 break;
1867 case OP_DIV:
1868 if (b == 0)
1869 yyerror(yyscanner, yyget_extra(yyscanner), "cannot divide by 0!");
1870 else
1871 result = a / b;
1872 break;
1873 case OP_MUL:
1874 result = a * b;
1875 break;
1876 case OP_MOD:
1877 case OP_BOR:
1878 case OP_BAND:
1879 case OP_BXOR:
1880 case OP_LSR:
1881 case OP_SHR:
1882 case OP_SHL:
1883 case OP_XOR:
1884 case OP_CONCAT:
1885 yyerror(yyscanner, yyget_extra(yyscanner),
1886 "cannot apply binary operator '%s' to types 'int' and 'num'", opnames[op]);
1887 break;
1888 case OP_POW:
1889 result = pow(a, b);
1890 break;
1891 case OP_OR:
1892 result = (a || b);
1893 break;
1894 case OP_AND:
1895 result = (a && b);
1896 break;
1897 case OP_FDIV:
1898 result = floor(a / b);
1899 break;
1900 case OP_ISEQ:
1901 result = (a == b);
1902 break;
1903 case OP_ISLE:
1904 result = (a <= b);
1905 break;
1906 case OP_ISLT:
1907 result = (a < b);
1908 break;
1909 case OP_ISGE:
1910 result = (a >= b);
1911 break;
1912 case OP_ISGT:
1913 result = (a > b);
1914 break;
1915 case OP_ISNE:
1916 result = (a != b);
1917 break;
1919 /* OP_INC and OP_DEC are here only to keep the C compiler happy */
1920 case OP_INC:
1921 case OP_DEC:
1922 panic("detected 'inc' or 'dec' in fold_i_n()");
1923 break;
1925 return new_const(NUM_TYPE, result);
1930 =item C<static constant *
1931 fold_n_n(yyscan_t yyscanner, double a, pir_math_operator op, double b)>
1933 Same as C<fold_i_i>, except that both C<a> and C<b> are of type double.
1935 =cut
1938 static constant *
1939 fold_n_n(yyscan_t yyscanner, double a, pir_math_operator op, double b) {
1940 double result;
1941 switch (op) {
1942 case OP_ADD:
1943 result = a + b;
1944 break;
1945 case OP_SUB:
1946 result = a - b;
1947 break;
1948 case OP_DIV:
1949 if (b == 0) /* throw exception ? */
1950 yyerror(yyscanner, yyget_extra(yyscanner), "cannot divide by 0");
1951 else
1952 result = a / b;
1953 break;
1954 case OP_MUL:
1955 result = a * b;
1956 break;
1957 case OP_POW:
1958 result = pow(a, b);
1959 break;
1960 case OP_MOD:
1961 case OP_BOR:
1962 case OP_BAND:
1963 case OP_BXOR:
1964 case OP_CONCAT:
1965 case OP_LSR:
1966 case OP_SHR:
1967 case OP_SHL:
1968 case OP_XOR:
1969 yyerror(yyscanner, yyget_extra(yyscanner),
1970 "cannot apply binary operator '%s' to arguments of type number", opnames[op]);
1971 break;
1972 case OP_OR:
1973 result = (a || b);
1974 break;
1975 case OP_AND:
1976 result = (a && b);
1977 break;
1978 case OP_FDIV:
1979 if (b == 0)
1980 yyerror(yyscanner, yyget_extra(yyscanner), "cannot divide by 0");
1981 else
1982 result = floor(a / b);
1983 break;
1984 case OP_ISEQ:
1985 result = (a == b);
1986 break;
1987 case OP_ISLE:
1988 result = (a <= b);
1989 break;
1990 case OP_ISLT:
1991 result = (a < b);
1992 break;
1993 case OP_ISGE:
1994 result = (a >= b);
1995 break;
1996 case OP_ISGT:
1997 result = (a > b);
1998 break;
1999 case OP_ISNE:
2000 result = (a != b);
2001 break;
2003 /* OP_INC and OP_DEC are here only to keep the C compiler happy */
2004 case OP_INC:
2005 case OP_DEC:
2006 break;
2008 return new_const(NUM_TYPE, result);
2013 =item C<static constant *
2014 fold_s_s(yyscan_t yyscanner, char *a, pir_math_operator op, char *b)>
2016 Evaluate the expression C<a op b>, where both C<a> and C<b> are
2017 strings. Only the concatenation and comparison operators are implemented;
2018 other operators will result in an error.
2020 =cut
2023 static constant *
2024 fold_s_s(yyscan_t yyscanner, char *a, pir_math_operator op, char *b) {
2025 switch (op) {
2026 case OP_CONCAT:
2027 return new_const(STRING_TYPE, concat_strings(a, b));
2029 case OP_ADD:
2030 case OP_SUB:
2031 case OP_DIV:
2032 case OP_MUL:
2033 case OP_POW:
2034 case OP_MOD:
2035 case OP_BOR:
2036 case OP_BAND:
2037 case OP_BXOR:
2038 case OP_LSR:
2039 case OP_SHR:
2040 case OP_SHL:
2041 case OP_XOR:
2042 case OP_OR:
2043 case OP_AND:
2044 case OP_FDIV:
2045 yyerror(yyscanner, yyget_extra(yyscanner),
2046 "cannot apply binary operator '%s' to arguments of type number", opnames[op]);
2047 return new_const(STRING_TYPE, a);
2049 case OP_ISEQ:
2050 case OP_ISLE:
2051 case OP_ISLT:
2052 case OP_ISGE:
2053 case OP_ISGT:
2054 case OP_ISNE:
2055 return new_const(INT_TYPE, (1 == evaluate_s_s(a, op, b)));
2058 /* OP_INC and OP_DEC are here only to keep the C compiler happy */
2059 case OP_INC:
2060 case OP_DEC:
2061 panic("detected 'inc' or 'dec' in fold_s_s()");
2062 break;
2064 return NULL;
2069 =item C<static int
2070 evaluate_i_i(int a, pir_rel_operator op, double b)>
2072 Compare C<a> with C<b> according to the relational operator C<op>.
2073 Wrapper for C<evaluate_n_n>, which takes arguments of type double.
2075 =cut
2078 static int
2079 evaluate_i_i(int a, pir_rel_operator op, int b) {
2080 return evaluate_n_n(a, op, b);
2085 =item C<static int
2086 evaluate_n_i(int a, pir_rel_operator op, double b)>
2088 Compare C<a> with C<b> according to the relational operator C<op>.
2089 Wrapper for C<evaluate_n_n>, which takes arguments of type double.
2091 =cut
2094 static int
2095 evaluate_n_i(double a, pir_rel_operator op, int b) {
2096 return evaluate_n_n(a, op, b);
2101 =item C<static int
2102 evaluate_i_n(int a, pir_rel_operator op, double b)>
2104 Compare C<a> with C<b> according to the relational operator C<op>.
2105 Wrapper for C<evaluate_n_n>, which takes arguments of type double.
2107 =cut
2110 static int
2111 evaluate_i_n(int a, pir_rel_operator op, double b) {
2112 return evaluate_n_n(a, op, b);
2117 =item C<static int
2118 evaluate_n_n(double a, pir_rel_operator op, double b)>
2120 Compare C<a> with C<b> according to the relational operator C<op>.
2121 C<op> can be C<<!=>>, C<<==>>, C<< < >>, C<< <= >>, C<< > >> or C<< >= >>.
2123 =cut
2126 static int
2127 evaluate_n_n(double a, pir_rel_operator op, double b) {
2128 switch (op) {
2129 case OP_NE:
2130 return (a != b);
2131 case OP_EQ:
2132 return (a == b);
2133 case OP_LT:
2134 return (a < b);
2135 case OP_LE:
2136 return (a <= b);
2137 case OP_GT:
2138 return (a > b);
2139 case OP_GE:
2140 return (a >= b);
2141 default:
2142 return 0;
2148 =item C<static int
2149 evaluate_s_s(char *a, pir_rel_operator op, char *b)>
2151 Compare string C<a> with string C<b> using the operator C<op>.
2152 The function uses C's C<strcmp> function. Based on that result,
2153 which can be -1 (smaller), 0 (equal) or 1 (larger), a boolean
2154 result is returned.
2156 =cut
2159 static int
2160 evaluate_s_s(char * const a, pir_rel_operator op, char * const b) {
2161 int result = strcmp(a, b);
2163 switch (op) {
2164 case OP_NE:
2165 return (result != 0);
2166 case OP_EQ:
2167 return (result == 0);
2168 case OP_LT:
2169 return (result < 0);
2170 case OP_LE:
2171 return (result <= 0);
2172 case OP_GT:
2173 return (result > 0);
2174 case OP_GE:
2175 return (result >= 0);
2176 default:
2177 return -1;
2183 =item C<static int
2184 evaluate_s(char * const s)>
2186 Evaluate a string in boolean context; if the string's length is 0, it's false.
2187 If the string equals "0", ".0", "0." or "0.0", it's false.
2188 Otherwise, it's true.
2190 =cut
2193 static int
2194 evaluate_s(char * const s) {
2195 int strlen_s = strlen(s);
2197 if (strlen_s > 0) {
2198 if (strlen_s <= 3) { /* if strlen > 3, (max. nr of characters to represent "0")
2199 no need to do expensive string comparison; it must be true. */
2200 if ((strcmp(s, "0") == 0) || (strcmp(s, ".0") == 0)
2201 || (strcmp(s, "0.") == 0) || (strcmp(s, "0.0") == 0)) {
2202 return 0;
2204 else /* short string but not equal to "0.0" or a variant */
2205 return 1;
2207 else /* strlen > 3, so does not contain "0.0" or a variant */
2208 return 1;
2210 return 0; /* strlen is not larger than 0 */
2215 =item C<static int
2216 evaluate_c(constant * const c)>
2218 Evaluate a constant node in boolean context; if the constant is numeric,
2219 it must be non-zero to be true; if it's a string, C<evaluate_s> is invoked
2220 to evaluate the string.
2222 =cut
2225 static int
2226 evaluate_c(constant * const c) {
2227 switch (c->type) {
2228 case INT_TYPE:
2229 return (c->val.ival != 0);
2230 case NUM_TYPE:
2231 return (c->val.nval != 0);
2232 case STRING_TYPE:
2233 return evaluate_s(c->val.sval);
2234 case PMC_TYPE:
2235 case UNKNOWN_TYPE:
2236 panic("impossible constant type in evaluate_c()");
2237 break;
2239 return 0; /* keep compiler happy; will never happen. */
2244 =item C<static char *
2245 concat_strings(char *a, char *b)>
2247 Concatenates two strings into a new buffer; frees all memory
2248 of the old strings. The new string is returned.
2250 =cut
2253 static char *
2254 concat_strings(char *a, char *b) {
2255 int strlen_a = strlen(a);
2256 char *newstr = (char *)calloc(strlen_a + strlen(b) + 1, sizeof (char));
2257 assert(newstr != NULL);
2258 strcpy(newstr, a);
2259 strcpy(newstr + strlen_a, b);
2260 free(a);
2261 free(b);
2262 a = b = NULL;
2263 return newstr;
2268 =item C<static int
2269 is_parrot_op(lexer_state * const lexer, char const * const spelling)>
2271 =cut
2275 static int
2276 is_parrot_op(lexer_state * const lexer, char const * const spelling)
2278 char const * ops[] = {
2279 "print",
2280 "new",
2281 "newclass",
2282 "end",
2283 "set",
2284 "find_global",
2285 "set_hll_global",
2286 "get_hll_global",
2287 "setfile",
2288 "setline",
2289 "add",
2290 "sub",
2291 NULL
2295 char const **iter = ops;
2297 while (*iter != NULL) {
2298 if (strcmp(spelling, *iter) == 0)
2299 return 1;
2300 iter++;
2303 return 0;
2309 =item C<static void
2310 create_if_instr(yyscan_t yyscanner, lexer_state *lexer, int invert, int hasnull,
2311 char * const name, char * const label)>
2313 Create an C<if> or C<unless> instruction; if C<invert> is non-zero (true), the
2314 C<if> instruction is inverted, effectively becoming C<unless>.
2316 If C<hasnull> is non-zero (true), the C<if> instruction becomes C<if_null>; again,
2317 if C<invert> is non-zero, the instruction becomes C<unless_null>.
2319 C<name> is the name of the variable that is checked during this instruction
2321 =cut
2324 static void
2325 create_if_instr(yyscan_t yyscanner, lexer_state * const lexer, int invert, int hasnull,
2326 char * const name,
2327 char * const label)
2329 /* try to find the symbol; if it was declared it will be found; otherwise emit an error. */
2330 symbol *sym = find_symbol(lexer, name);
2331 if (sym == NULL) {
2332 sym = new_symbol(name, UNKNOWN_TYPE);
2333 yyerror(yyscanner, lexer, "symbol '%s' not declared'", name);
2335 /* if there was a keyword "null", use the if/unless_null instruction variants. */
2336 if (hasnull)
2337 set_instrf(lexer, invert ? "unless_null" : "if_null", "%T%I", target_from_symbol(sym),
2338 label);
2339 else
2340 set_instrf(lexer, invert ? "unless" : "if", "%T%I", target_from_symbol(sym), label);
2345 =item C<static int
2346 check_value(constant * const c, int val)>
2348 Check whether the current value of the constant C<c> equals C<val>.
2349 For our purposes, it is sufficient to check for integer values (including
2350 a check against 1.0 or 0.0). If the values are indeed equal, true is returned,
2351 false otherwise. If the constant is not numeric, it returns always false.
2353 =cut
2356 static int
2357 check_value(constant * const c, int val) {
2358 switch(c->type) {
2359 case INT_TYPE:
2360 return (c->val.ival == val);
2361 case NUM_TYPE:
2362 return (c->val.nval == val);
2363 default:
2364 break;
2366 return 0;
2371 =item C<static void
2372 do_strength_reduction(lexer_state * const lexer)>
2374 Implement strength reduction for the math operators C<add>, C<sub>, C<mul>, C<div> and C<fdiv>.
2375 If the current instruction is any of these, then the first two operands are checked; if both
2376 are targets and are equal, the second operand is removed; this means that the first operand
2377 will be an IN/OUT operand. For instance:
2379 add $I0, $I0, $I1
2381 becomes:
2383 add $I0, $I1
2387 add $I0, 1
2389 becomes:
2391 inc $I0
2393 =cut
2396 static void
2397 do_strength_reduction(lexer_state * const lexer) {
2398 char * const instr = get_instr(lexer);
2399 int op = -1;
2400 int num_operands;
2401 expression *arg1, *arg2;
2403 /* if the instruction is "add", "sub", "mul", "div" or "fdiv", do continue... */
2404 if (strcmp(instr, "add") == 0)
2405 op = OP_ADD;
2406 else if (strcmp(instr, "sub") == 0)
2407 op = OP_SUB;
2408 else if (strcmp(instr, "mul") == 0)
2409 op = OP_MUL;
2410 else if (strcmp(instr, "div") == 0)
2411 op = OP_DIV;
2412 else if (strcmp(instr, "fdiv") == 0)
2413 op = OP_FDIV;
2414 else
2415 return;
2417 num_operands = get_operand_count(lexer);
2418 if (num_operands > 2) {
2419 /* get the operands */
2420 expression *op1, *op2;
2421 get_operands(lexer, 2, &op1, &op2);
2423 /* check whether operands are in fact targets */
2424 if ((op1->type == EXPR_TARGET) && (op2->type == EXPR_TARGET)) {
2426 /* check whether targets are equal */
2427 if (targets_equal(op1->expr.t, op2->expr.t)) {
2428 /* in that case, remove the second one */
2429 remove_operand(lexer, 2);
2430 free(op2);
2431 --num_operands;
2436 /* don't even try to change "add $I0, 1" into "inc $I0" if number of operands is not 2 */
2437 if (num_operands != 2)
2438 return;
2440 arg1 = arg2 = NULL;
2441 get_operands(lexer, 2, &arg1, &arg2);
2442 assert(arg1);
2443 assert(arg2);
2445 switch (op) {
2446 case OP_ADD:
2447 case OP_SUB:
2448 if (arg2->type == EXPR_CONSTANT) {
2449 if (check_value(arg2->expr.c, 0)) {
2450 update_instr(lexer, "noop");
2451 remove_all_operands(lexer);
2453 else if (check_value(arg2->expr.c, 1)) {
2454 update_instr(lexer, opnames[op + 1]);
2455 remove_operand(lexer, 2);
2458 break;
2459 case OP_MUL:
2460 if (arg2->type == EXPR_CONSTANT) {
2461 if (check_value(arg2->expr.c, 0)) {
2462 update_instr(lexer, "null");
2463 remove_operand(lexer, 2);
2465 else if (check_value(arg2->expr.c, 1)) {
2466 update_instr(lexer, "noop");
2467 remove_all_operands(lexer);
2470 break;
2471 case OP_DIV:
2472 case OP_FDIV:
2473 if (arg2->type == EXPR_CONSTANT) {
2474 if (check_value(arg2->expr.c, 0))
2475 pirerror(lexer, "cannot divide by 0");
2476 else if (check_value(arg2->expr.c, 1)) {
2477 update_instr(lexer, "noop");
2478 remove_all_operands(lexer);
2481 break;
2482 default:
2483 break;
2491 =back
2493 =cut
2499 * Local variables:
2500 * c-file-style: "parrot"
2501 * End:
2502 * vim: expandtab shiftwidth=4: