Automatic date update in version.in
[binutils-gdb.git] / gdb / go-exp.y
blob1a6ebbe135bebe744124e2043cad4919f9425f45
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include <ctype.h>
55 #include "expression.h"
56 #include "value.h"
57 #include "parser-defs.h"
58 #include "language.h"
59 #include "c-lang.h"
60 #include "go-lang.h"
61 #include "charset.h"
62 #include "block.h"
63 #include "expop.h"
65 #define parse_type(ps) builtin_type (ps->gdbarch ())
67 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
68 etc). */
69 #define GDB_YY_REMAP_PREFIX go_
70 #include "yy-remap.h"
72 /* The state of the parser, used internally when we are parsing the
73 expression. */
75 static struct parser_state *pstate = NULL;
77 int yyparse (void);
79 static int yylex (void);
81 static void yyerror (const char *);
85 /* Although the yacc "value" of an expression is not used,
86 since the result is stored in the structure being created,
87 other node types do have values. */
89 %union
91 LONGEST lval;
92 struct {
93 LONGEST val;
94 struct type *type;
95 } typed_val_int;
96 struct {
97 gdb_byte val[16];
98 struct type *type;
99 } typed_val_float;
100 struct stoken sval;
101 struct symtoken ssym;
102 struct type *tval;
103 struct typed_stoken tsval;
104 struct ttype tsym;
105 int voidval;
106 enum exp_opcode opcode;
107 struct internalvar *ivar;
108 struct stoken_vector svec;
112 /* YYSTYPE gets defined by %union. */
113 static int parse_number (struct parser_state *,
114 const char *, int, int, YYSTYPE *);
116 using namespace expr;
119 %type <voidval> exp exp1 type_exp start variable lcurly
120 %type <lval> rcurly
121 %type <tval> type
123 %token <typed_val_int> INT
124 %token <typed_val_float> FLOAT
126 /* Both NAME and TYPENAME tokens represent symbols in the input,
127 and both convey their data as strings.
128 But a TYPENAME is a string that happens to be defined as a type
129 or builtin type name (such as int or char)
130 and a NAME is any other symbol.
131 Contexts where this distinction is not important can use the
132 nonterminal "name", which matches either NAME or TYPENAME. */
134 %token <tsval> RAW_STRING
135 %token <tsval> STRING
136 %token <tsval> CHAR
137 %token <ssym> NAME
138 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
139 %token <voidval> COMPLETE
140 /*%type <sval> name*/
141 %type <svec> string_exp
142 %type <ssym> name_not_typename
144 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
145 but which would parse as a valid number in the current input radix.
146 E.g. "c" when input_radix==16. Depending on the parse, it will be
147 turned into a name or into a number. */
148 %token <ssym> NAME_OR_INT
150 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
151 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
152 %token SIZEOF_KEYWORD
153 %token LEN_KEYWORD CAP_KEYWORD
154 %token NEW_KEYWORD
155 %token IOTA_KEYWORD NIL_KEYWORD
156 %token CONST_KEYWORD
157 %token DOTDOTDOT
158 %token ENTRY
159 %token ERROR
161 /* Special type cases. */
162 %token BYTE_KEYWORD /* An alias of uint8. */
164 %token <sval> DOLLAR_VARIABLE
166 %token <opcode> ASSIGN_MODIFY
168 %left ','
169 %left ABOVE_COMMA
170 %right '=' ASSIGN_MODIFY
171 %right '?'
172 %left OROR
173 %left ANDAND
174 %left '|'
175 %left '^'
176 %left '&'
177 %left ANDNOT
178 %left EQUAL NOTEQUAL
179 %left '<' '>' LEQ GEQ
180 %left LSH RSH
181 %left '@'
182 %left '+' '-'
183 %left '*' '/' '%'
184 %right UNARY INCREMENT DECREMENT
185 %right LEFT_ARROW '.' '[' '('
190 start : exp1
191 | type_exp
194 type_exp: type
195 { pstate->push_new<type_operation> ($1); }
198 /* Expressions, including the comma operator. */
199 exp1 : exp
200 | exp1 ',' exp
201 { pstate->wrap2<comma_operation> (); }
204 /* Expressions, not including the comma operator. */
205 exp : '*' exp %prec UNARY
206 { pstate->wrap<unop_ind_operation> (); }
209 exp : '&' exp %prec UNARY
210 { pstate->wrap<unop_addr_operation> (); }
213 exp : '-' exp %prec UNARY
214 { pstate->wrap<unary_neg_operation> (); }
217 exp : '+' exp %prec UNARY
218 { pstate->wrap<unary_plus_operation> (); }
221 exp : '!' exp %prec UNARY
222 { pstate->wrap<unary_logical_not_operation> (); }
225 exp : '^' exp %prec UNARY
226 { pstate->wrap<unary_complement_operation> (); }
229 exp : exp INCREMENT %prec UNARY
230 { pstate->wrap<postinc_operation> (); }
233 exp : exp DECREMENT %prec UNARY
234 { pstate->wrap<postdec_operation> (); }
237 /* foo->bar is not in Go. May want as a gdb extension. Later. */
239 exp : exp '.' name_not_typename
241 pstate->push_new<structop_operation>
242 (pstate->pop (), copy_name ($3.stoken));
246 exp : exp '.' name_not_typename COMPLETE
248 structop_base_operation *op
249 = new structop_operation (pstate->pop (),
250 copy_name ($3.stoken));
251 pstate->mark_struct_expression (op);
252 pstate->push (operation_up (op));
256 exp : exp '.' COMPLETE
258 structop_base_operation *op
259 = new structop_operation (pstate->pop (), "");
260 pstate->mark_struct_expression (op);
261 pstate->push (operation_up (op));
265 exp : exp '[' exp1 ']'
266 { pstate->wrap2<subscript_operation> (); }
269 exp : exp '('
270 /* This is to save the value of arglist_len
271 being accumulated by an outer function call. */
272 { pstate->start_arglist (); }
273 arglist ')' %prec LEFT_ARROW
275 std::vector<operation_up> args
276 = pstate->pop_vector (pstate->end_arglist ());
277 pstate->push_new<funcall_operation>
278 (pstate->pop (), std::move (args));
282 lcurly : '{'
283 { pstate->start_arglist (); }
286 arglist :
289 arglist : exp
290 { pstate->arglist_len = 1; }
293 arglist : arglist ',' exp %prec ABOVE_COMMA
294 { pstate->arglist_len++; }
297 rcurly : '}'
298 { $$ = pstate->end_arglist () - 1; }
301 exp : lcurly type rcurly exp %prec UNARY
303 pstate->push_new<unop_memval_operation>
304 (pstate->pop (), $2);
308 exp : type '(' exp ')' %prec UNARY
310 pstate->push_new<unop_cast_operation>
311 (pstate->pop (), $1);
315 exp : '(' exp1 ')'
319 /* Binary operators in order of decreasing precedence. */
321 exp : exp '@' exp
322 { pstate->wrap2<repeat_operation> (); }
325 exp : exp '*' exp
326 { pstate->wrap2<mul_operation> (); }
329 exp : exp '/' exp
330 { pstate->wrap2<div_operation> (); }
333 exp : exp '%' exp
334 { pstate->wrap2<rem_operation> (); }
337 exp : exp '+' exp
338 { pstate->wrap2<add_operation> (); }
341 exp : exp '-' exp
342 { pstate->wrap2<sub_operation> (); }
345 exp : exp LSH exp
346 { pstate->wrap2<lsh_operation> (); }
349 exp : exp RSH exp
350 { pstate->wrap2<rsh_operation> (); }
353 exp : exp EQUAL exp
354 { pstate->wrap2<equal_operation> (); }
357 exp : exp NOTEQUAL exp
358 { pstate->wrap2<notequal_operation> (); }
361 exp : exp LEQ exp
362 { pstate->wrap2<leq_operation> (); }
365 exp : exp GEQ exp
366 { pstate->wrap2<geq_operation> (); }
369 exp : exp '<' exp
370 { pstate->wrap2<less_operation> (); }
373 exp : exp '>' exp
374 { pstate->wrap2<gtr_operation> (); }
377 exp : exp '&' exp
378 { pstate->wrap2<bitwise_and_operation> (); }
381 exp : exp '^' exp
382 { pstate->wrap2<bitwise_xor_operation> (); }
385 exp : exp '|' exp
386 { pstate->wrap2<bitwise_ior_operation> (); }
389 exp : exp ANDAND exp
390 { pstate->wrap2<logical_and_operation> (); }
393 exp : exp OROR exp
394 { pstate->wrap2<logical_or_operation> (); }
397 exp : exp '?' exp ':' exp %prec '?'
399 operation_up last = pstate->pop ();
400 operation_up mid = pstate->pop ();
401 operation_up first = pstate->pop ();
402 pstate->push_new<ternop_cond_operation>
403 (std::move (first), std::move (mid),
404 std::move (last));
408 exp : exp '=' exp
409 { pstate->wrap2<assign_operation> (); }
412 exp : exp ASSIGN_MODIFY exp
414 operation_up rhs = pstate->pop ();
415 operation_up lhs = pstate->pop ();
416 pstate->push_new<assign_modify_operation>
417 ($2, std::move (lhs), std::move (rhs));
421 exp : INT
423 pstate->push_new<long_const_operation>
424 ($1.type, $1.val);
428 exp : CHAR
430 struct stoken_vector vec;
431 vec.len = 1;
432 vec.tokens = &$1;
433 pstate->push_c_string ($1.type, &vec);
437 exp : NAME_OR_INT
438 { YYSTYPE val;
439 parse_number (pstate, $1.stoken.ptr,
440 $1.stoken.length, 0, &val);
441 pstate->push_new<long_const_operation>
442 (val.typed_val_int.type,
443 val.typed_val_int.val);
448 exp : FLOAT
450 float_data data;
451 std::copy (std::begin ($1.val), std::end ($1.val),
452 std::begin (data));
453 pstate->push_new<float_const_operation> ($1.type, data);
457 exp : variable
460 exp : DOLLAR_VARIABLE
462 pstate->push_dollar ($1);
466 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
468 /* TODO(dje): Go objects in structs. */
469 /* TODO(dje): What's the right type here? */
470 struct type *size_type
471 = parse_type (pstate)->builtin_unsigned_int;
472 $3 = check_typedef ($3);
473 pstate->push_new<long_const_operation>
474 (size_type, (LONGEST) $3->length ());
478 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
480 /* TODO(dje): Go objects in structs. */
481 pstate->wrap<unop_sizeof_operation> ();
484 string_exp:
485 STRING
487 /* We copy the string here, and not in the
488 lexer, to guarantee that we do not leak a
489 string. */
490 /* Note that we NUL-terminate here, but just
491 for convenience. */
492 struct typed_stoken *vec = XNEW (struct typed_stoken);
493 $$.len = 1;
494 $$.tokens = vec;
496 vec->type = $1.type;
497 vec->length = $1.length;
498 vec->ptr = (char *) malloc ($1.length + 1);
499 memcpy (vec->ptr, $1.ptr, $1.length + 1);
502 | string_exp '+' STRING
504 /* Note that we NUL-terminate here, but just
505 for convenience. */
506 char *p;
507 ++$$.len;
508 $$.tokens = XRESIZEVEC (struct typed_stoken,
509 $$.tokens, $$.len);
511 p = (char *) malloc ($3.length + 1);
512 memcpy (p, $3.ptr, $3.length + 1);
514 $$.tokens[$$.len - 1].type = $3.type;
515 $$.tokens[$$.len - 1].length = $3.length;
516 $$.tokens[$$.len - 1].ptr = p;
520 exp : string_exp %prec ABOVE_COMMA
522 int i;
524 /* Always utf8. */
525 pstate->push_c_string (0, &$1);
526 for (i = 0; i < $1.len; ++i)
527 free ($1.tokens[i].ptr);
528 free ($1.tokens);
532 exp : TRUE_KEYWORD
533 { pstate->push_new<bool_operation> ($1); }
536 exp : FALSE_KEYWORD
537 { pstate->push_new<bool_operation> ($1); }
540 variable: name_not_typename ENTRY
541 { struct symbol *sym = $1.sym.symbol;
543 if (sym == NULL
544 || !sym->is_argument ()
545 || !symbol_read_needs_frame (sym))
546 error (_("@entry can be used only for function "
547 "parameters, not for \"%s\""),
548 copy_name ($1.stoken).c_str ());
550 pstate->push_new<var_entry_value_operation> (sym);
554 variable: name_not_typename
555 { struct block_symbol sym = $1.sym;
557 if (sym.symbol)
559 if (symbol_read_needs_frame (sym.symbol))
560 pstate->block_tracker->update (sym);
562 pstate->push_new<var_value_operation> (sym);
564 else if ($1.is_a_field_of_this)
566 /* TODO(dje): Can we get here?
567 E.g., via a mix of c++ and go? */
568 gdb_assert_not_reached ("go with `this' field");
570 else
572 struct bound_minimal_symbol msymbol;
573 std::string arg = copy_name ($1.stoken);
575 msymbol =
576 lookup_bound_minimal_symbol (arg.c_str ());
577 if (msymbol.minsym != NULL)
578 pstate->push_new<var_msym_value_operation>
579 (msymbol);
580 else if (!have_full_symbols ()
581 && !have_partial_symbols ())
582 error (_("No symbol table is loaded. "
583 "Use the \"file\" command."));
584 else
585 error (_("No symbol \"%s\" in current context."),
586 arg.c_str ());
591 /* TODO
592 method_exp: PACKAGENAME '.' name '.' name
598 type /* Implements (approximately): [*] type-specifier */
599 : '*' type
600 { $$ = lookup_pointer_type ($2); }
601 | TYPENAME
602 { $$ = $1.type; }
604 | STRUCT_KEYWORD name
605 { $$ = lookup_struct (copy_name ($2),
606 expression_context_block); }
608 | BYTE_KEYWORD
609 { $$ = builtin_go_type (pstate->gdbarch ())
610 ->builtin_uint8; }
613 /* TODO
614 name : NAME { $$ = $1.stoken; }
615 | TYPENAME { $$ = $1.stoken; }
616 | NAME_OR_INT { $$ = $1.stoken; }
620 name_not_typename
621 : NAME
622 /* These would be useful if name_not_typename was useful, but it is just
623 a fake for "variable", so these cause reduce/reduce conflicts because
624 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
625 =exp) or just an exp. If name_not_typename was ever used in an lvalue
626 context where only a name could occur, this might be useful.
627 | NAME_OR_INT
633 /* Take care of parsing a number (anything that starts with a digit).
634 Set yylval and return the token type; update lexptr.
635 LEN is the number of characters in it. */
637 /* FIXME: Needs some error checking for the float case. */
638 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
639 That will require moving the guts into a function that we both call
640 as our YYSTYPE is different than c-exp.y's */
642 static int
643 parse_number (struct parser_state *par_state,
644 const char *p, int len, int parsed_float, YYSTYPE *putithere)
646 ULONGEST n = 0;
647 ULONGEST prevn = 0;
649 int i = 0;
650 int c;
651 int base = input_radix;
652 int unsigned_p = 0;
654 /* Number of "L" suffixes encountered. */
655 int long_p = 0;
657 /* We have found a "L" or "U" suffix. */
658 int found_suffix = 0;
660 if (parsed_float)
662 const struct builtin_go_type *builtin_go_types
663 = builtin_go_type (par_state->gdbarch ());
665 /* Handle suffixes: 'f' for float32, 'l' for long double.
666 FIXME: This appears to be an extension -- do we want this? */
667 if (len >= 1 && tolower (p[len - 1]) == 'f')
669 putithere->typed_val_float.type
670 = builtin_go_types->builtin_float32;
671 len--;
673 else if (len >= 1 && tolower (p[len - 1]) == 'l')
675 putithere->typed_val_float.type
676 = parse_type (par_state)->builtin_long_double;
677 len--;
679 /* Default type for floating-point literals is float64. */
680 else
682 putithere->typed_val_float.type
683 = builtin_go_types->builtin_float64;
686 if (!parse_float (p, len,
687 putithere->typed_val_float.type,
688 putithere->typed_val_float.val))
689 return ERROR;
690 return FLOAT;
693 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
694 if (p[0] == '0' && len > 1)
695 switch (p[1])
697 case 'x':
698 case 'X':
699 if (len >= 3)
701 p += 2;
702 base = 16;
703 len -= 2;
705 break;
707 case 'b':
708 case 'B':
709 if (len >= 3)
711 p += 2;
712 base = 2;
713 len -= 2;
715 break;
717 case 't':
718 case 'T':
719 case 'd':
720 case 'D':
721 if (len >= 3)
723 p += 2;
724 base = 10;
725 len -= 2;
727 break;
729 default:
730 base = 8;
731 break;
734 while (len-- > 0)
736 c = *p++;
737 if (c >= 'A' && c <= 'Z')
738 c += 'a' - 'A';
739 if (c != 'l' && c != 'u')
740 n *= base;
741 if (c >= '0' && c <= '9')
743 if (found_suffix)
744 return ERROR;
745 n += i = c - '0';
747 else
749 if (base > 10 && c >= 'a' && c <= 'f')
751 if (found_suffix)
752 return ERROR;
753 n += i = c - 'a' + 10;
755 else if (c == 'l')
757 ++long_p;
758 found_suffix = 1;
760 else if (c == 'u')
762 unsigned_p = 1;
763 found_suffix = 1;
765 else
766 return ERROR; /* Char not a digit */
768 if (i >= base)
769 return ERROR; /* Invalid digit in this base. */
771 if (c != 'l' && c != 'u')
773 /* Test for overflow. */
774 if (n == 0 && prevn == 0)
776 else if (prevn >= n)
777 error (_("Numeric constant too large."));
779 prevn = n;
782 /* An integer constant is an int, a long, or a long long. An L
783 suffix forces it to be long; an LL suffix forces it to be long
784 long. If not forced to a larger size, it gets the first type of
785 the above that it fits in. To figure out whether it fits, we
786 shift it right and see whether anything remains. Note that we
787 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
788 operation, because many compilers will warn about such a shift
789 (which always produces a zero result). Sometimes gdbarch_int_bit
790 or gdbarch_long_bit will be that big, sometimes not. To deal with
791 the case where it is we just always shift the value more than
792 once, with fewer bits each time. */
794 int int_bits = gdbarch_int_bit (par_state->gdbarch ());
795 int long_bits = gdbarch_long_bit (par_state->gdbarch ());
796 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
797 bool have_signed = !unsigned_p;
798 bool have_int = long_p == 0;
799 bool have_long = long_p <= 1;
800 if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
801 putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
802 else if (have_int && fits_in_type (1, n, int_bits, false))
803 putithere->typed_val_int.type
804 = parse_type (par_state)->builtin_unsigned_int;
805 else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
806 putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
807 else if (have_long && fits_in_type (1, n, long_bits, false))
808 putithere->typed_val_int.type
809 = parse_type (par_state)->builtin_unsigned_long;
810 else if (have_signed && fits_in_type (1, n, long_long_bits, true))
811 putithere->typed_val_int.type
812 = parse_type (par_state)->builtin_long_long;
813 else if (fits_in_type (1, n, long_long_bits, false))
814 putithere->typed_val_int.type
815 = parse_type (par_state)->builtin_unsigned_long_long;
816 else
817 error (_("Numeric constant too large."));
818 putithere->typed_val_int.val = n;
820 return INT;
823 /* Temporary obstack used for holding strings. */
824 static struct obstack tempbuf;
825 static int tempbuf_init;
827 /* Parse a string or character literal from TOKPTR. The string or
828 character may be wide or unicode. *OUTPTR is set to just after the
829 end of the literal in the input string. The resulting token is
830 stored in VALUE. This returns a token value, either STRING or
831 CHAR, depending on what was parsed. *HOST_CHARS is set to the
832 number of host characters in the literal. */
834 static int
835 parse_string_or_char (const char *tokptr, const char **outptr,
836 struct typed_stoken *value, int *host_chars)
838 int quote;
840 /* Build the gdb internal form of the input string in tempbuf. Note
841 that the buffer is null byte terminated *only* for the
842 convenience of debugging gdb itself and printing the buffer
843 contents when the buffer contains no embedded nulls. Gdb does
844 not depend upon the buffer being null byte terminated, it uses
845 the length string instead. This allows gdb to handle C strings
846 (as well as strings in other languages) with embedded null
847 bytes */
849 if (!tempbuf_init)
850 tempbuf_init = 1;
851 else
852 obstack_free (&tempbuf, NULL);
853 obstack_init (&tempbuf);
855 /* Skip the quote. */
856 quote = *tokptr;
857 ++tokptr;
859 *host_chars = 0;
861 while (*tokptr)
863 char c = *tokptr;
864 if (c == '\\')
866 ++tokptr;
867 *host_chars += c_parse_escape (&tokptr, &tempbuf);
869 else if (c == quote)
870 break;
871 else
873 obstack_1grow (&tempbuf, c);
874 ++tokptr;
875 /* FIXME: this does the wrong thing with multi-byte host
876 characters. We could use mbrlen here, but that would
877 make "set host-charset" a bit less useful. */
878 ++*host_chars;
882 if (*tokptr != quote)
884 if (quote == '"')
885 error (_("Unterminated string in expression."));
886 else
887 error (_("Unmatched single quote."));
889 ++tokptr;
891 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
892 value->ptr = (char *) obstack_base (&tempbuf);
893 value->length = obstack_object_size (&tempbuf);
895 *outptr = tokptr;
897 return quote == '\'' ? CHAR : STRING;
900 struct go_token
902 const char *oper;
903 int token;
904 enum exp_opcode opcode;
907 static const struct go_token tokentab3[] =
909 {">>=", ASSIGN_MODIFY, BINOP_RSH},
910 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
911 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
912 {"...", DOTDOTDOT, OP_NULL},
915 static const struct go_token tokentab2[] =
917 {"+=", ASSIGN_MODIFY, BINOP_ADD},
918 {"-=", ASSIGN_MODIFY, BINOP_SUB},
919 {"*=", ASSIGN_MODIFY, BINOP_MUL},
920 {"/=", ASSIGN_MODIFY, BINOP_DIV},
921 {"%=", ASSIGN_MODIFY, BINOP_REM},
922 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
923 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
924 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
925 {"++", INCREMENT, OP_NULL},
926 {"--", DECREMENT, OP_NULL},
927 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */
928 {"<-", LEFT_ARROW, OP_NULL},
929 {"&&", ANDAND, OP_NULL},
930 {"||", OROR, OP_NULL},
931 {"<<", LSH, OP_NULL},
932 {">>", RSH, OP_NULL},
933 {"==", EQUAL, OP_NULL},
934 {"!=", NOTEQUAL, OP_NULL},
935 {"<=", LEQ, OP_NULL},
936 {">=", GEQ, OP_NULL},
937 /*{"&^", ANDNOT, OP_NULL}, TODO */
940 /* Identifier-like tokens. */
941 static const struct go_token ident_tokens[] =
943 {"true", TRUE_KEYWORD, OP_NULL},
944 {"false", FALSE_KEYWORD, OP_NULL},
945 {"nil", NIL_KEYWORD, OP_NULL},
946 {"const", CONST_KEYWORD, OP_NULL},
947 {"struct", STRUCT_KEYWORD, OP_NULL},
948 {"type", TYPE_KEYWORD, OP_NULL},
949 {"interface", INTERFACE_KEYWORD, OP_NULL},
950 {"chan", CHAN_KEYWORD, OP_NULL},
951 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
952 {"len", LEN_KEYWORD, OP_NULL},
953 {"cap", CAP_KEYWORD, OP_NULL},
954 {"new", NEW_KEYWORD, OP_NULL},
955 {"iota", IOTA_KEYWORD, OP_NULL},
958 /* This is set if a NAME token appeared at the very end of the input
959 string, with no whitespace separating the name from the EOF. This
960 is used only when parsing to do field name completion. */
961 static int saw_name_at_eof;
963 /* This is set if the previously-returned token was a structure
964 operator -- either '.' or ARROW. This is used only when parsing to
965 do field name completion. */
966 static int last_was_structop;
968 /* Depth of parentheses. */
969 static int paren_depth;
971 /* Read one token, getting characters through lexptr. */
973 static int
974 lex_one_token (struct parser_state *par_state)
976 int c;
977 int namelen;
978 const char *tokstart;
979 int saw_structop = last_was_structop;
981 last_was_structop = 0;
983 retry:
985 par_state->prev_lexptr = par_state->lexptr;
987 tokstart = par_state->lexptr;
988 /* See if it is a special token of length 3. */
989 for (const auto &token : tokentab3)
990 if (strncmp (tokstart, token.oper, 3) == 0)
992 par_state->lexptr += 3;
993 yylval.opcode = token.opcode;
994 return token.token;
997 /* See if it is a special token of length 2. */
998 for (const auto &token : tokentab2)
999 if (strncmp (tokstart, token.oper, 2) == 0)
1001 par_state->lexptr += 2;
1002 yylval.opcode = token.opcode;
1003 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1004 setting last_was_structop here. */
1005 return token.token;
1008 switch (c = *tokstart)
1010 case 0:
1011 if (saw_name_at_eof)
1013 saw_name_at_eof = 0;
1014 return COMPLETE;
1016 else if (saw_structop)
1017 return COMPLETE;
1018 else
1019 return 0;
1021 case ' ':
1022 case '\t':
1023 case '\n':
1024 par_state->lexptr++;
1025 goto retry;
1027 case '[':
1028 case '(':
1029 paren_depth++;
1030 par_state->lexptr++;
1031 return c;
1033 case ']':
1034 case ')':
1035 if (paren_depth == 0)
1036 return 0;
1037 paren_depth--;
1038 par_state->lexptr++;
1039 return c;
1041 case ',':
1042 if (pstate->comma_terminates
1043 && paren_depth == 0)
1044 return 0;
1045 par_state->lexptr++;
1046 return c;
1048 case '.':
1049 /* Might be a floating point number. */
1050 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1052 if (pstate->parse_completion)
1053 last_was_structop = 1;
1054 goto symbol; /* Nope, must be a symbol. */
1056 [[fallthrough]];
1058 case '0':
1059 case '1':
1060 case '2':
1061 case '3':
1062 case '4':
1063 case '5':
1064 case '6':
1065 case '7':
1066 case '8':
1067 case '9':
1069 /* It's a number. */
1070 int got_dot = 0, got_e = 0, toktype;
1071 const char *p = tokstart;
1072 int hex = input_radix > 10;
1074 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1076 p += 2;
1077 hex = 1;
1080 for (;; ++p)
1082 /* This test includes !hex because 'e' is a valid hex digit
1083 and thus does not indicate a floating point number when
1084 the radix is hex. */
1085 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1086 got_dot = got_e = 1;
1087 /* This test does not include !hex, because a '.' always indicates
1088 a decimal floating point number regardless of the radix. */
1089 else if (!got_dot && *p == '.')
1090 got_dot = 1;
1091 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1092 && (*p == '-' || *p == '+'))
1093 /* This is the sign of the exponent, not the end of the
1094 number. */
1095 continue;
1096 /* We will take any letters or digits. parse_number will
1097 complain if past the radix, or if L or U are not final. */
1098 else if ((*p < '0' || *p > '9')
1099 && ((*p < 'a' || *p > 'z')
1100 && (*p < 'A' || *p > 'Z')))
1101 break;
1103 toktype = parse_number (par_state, tokstart, p - tokstart,
1104 got_dot|got_e, &yylval);
1105 if (toktype == ERROR)
1106 error (_("Invalid number \"%.*s\"."), (int) (p - tokstart),
1107 tokstart);
1108 par_state->lexptr = p;
1109 return toktype;
1112 case '@':
1114 const char *p = &tokstart[1];
1115 size_t len = strlen ("entry");
1117 while (isspace (*p))
1118 p++;
1119 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1120 && p[len] != '_')
1122 par_state->lexptr = &p[len];
1123 return ENTRY;
1126 [[fallthrough]];
1127 case '+':
1128 case '-':
1129 case '*':
1130 case '/':
1131 case '%':
1132 case '|':
1133 case '&':
1134 case '^':
1135 case '~':
1136 case '!':
1137 case '<':
1138 case '>':
1139 case '?':
1140 case ':':
1141 case '=':
1142 case '{':
1143 case '}':
1144 symbol:
1145 par_state->lexptr++;
1146 return c;
1148 case '\'':
1149 case '"':
1150 case '`':
1152 int host_len;
1153 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1154 &yylval.tsval, &host_len);
1155 if (result == CHAR)
1157 if (host_len == 0)
1158 error (_("Empty character constant."));
1159 else if (host_len > 2 && c == '\'')
1161 ++tokstart;
1162 namelen = par_state->lexptr - tokstart - 1;
1163 goto tryname;
1165 else if (host_len > 1)
1166 error (_("Invalid character constant."));
1168 return result;
1172 if (!(c == '_' || c == '$'
1173 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1174 /* We must have come across a bad character (e.g. ';'). */
1175 error (_("Invalid character '%c' in expression."), c);
1177 /* It's a name. See how long it is. */
1178 namelen = 0;
1179 for (c = tokstart[namelen];
1180 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1181 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1183 c = tokstart[++namelen];
1186 /* The token "if" terminates the expression and is NOT removed from
1187 the input stream. It doesn't count if it appears in the
1188 expansion of a macro. */
1189 if (namelen == 2
1190 && tokstart[0] == 'i'
1191 && tokstart[1] == 'f')
1193 return 0;
1196 /* For the same reason (breakpoint conditions), "thread N"
1197 terminates the expression. "thread" could be an identifier, but
1198 an identifier is never followed by a number without intervening
1199 punctuation.
1200 Handle abbreviations of these, similarly to
1201 breakpoint.c:find_condition_and_thread.
1202 TODO: Watch for "goroutine" here? */
1203 if (namelen >= 1
1204 && strncmp (tokstart, "thread", namelen) == 0
1205 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1207 const char *p = tokstart + namelen + 1;
1209 while (*p == ' ' || *p == '\t')
1210 p++;
1211 if (*p >= '0' && *p <= '9')
1212 return 0;
1215 par_state->lexptr += namelen;
1217 tryname:
1219 yylval.sval.ptr = tokstart;
1220 yylval.sval.length = namelen;
1222 /* Catch specific keywords. */
1223 std::string copy = copy_name (yylval.sval);
1224 for (const auto &token : ident_tokens)
1225 if (copy == token.oper)
1227 /* It is ok to always set this, even though we don't always
1228 strictly need to. */
1229 yylval.opcode = token.opcode;
1230 return token.token;
1233 if (*tokstart == '$')
1234 return DOLLAR_VARIABLE;
1236 if (pstate->parse_completion && *par_state->lexptr == '\0')
1237 saw_name_at_eof = 1;
1238 return NAME;
1241 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1242 struct go_token_and_value
1244 int token;
1245 YYSTYPE value;
1248 /* A FIFO of tokens that have been read but not yet returned to the
1249 parser. */
1250 static std::vector<go_token_and_value> token_fifo;
1252 /* Non-zero if the lexer should return tokens from the FIFO. */
1253 static int popping;
1255 /* Temporary storage for yylex; this holds symbol names as they are
1256 built up. */
1257 static auto_obstack name_obstack;
1259 /* Build "package.name" in name_obstack.
1260 For convenience of the caller, the name is NUL-terminated,
1261 but the NUL is not included in the recorded length. */
1263 static struct stoken
1264 build_packaged_name (const char *package, int package_len,
1265 const char *name, int name_len)
1267 struct stoken result;
1269 name_obstack.clear ();
1270 obstack_grow (&name_obstack, package, package_len);
1271 obstack_grow_str (&name_obstack, ".");
1272 obstack_grow (&name_obstack, name, name_len);
1273 obstack_grow (&name_obstack, "", 1);
1274 result.ptr = (char *) obstack_base (&name_obstack);
1275 result.length = obstack_object_size (&name_obstack) - 1;
1277 return result;
1280 /* Return non-zero if NAME is a package name.
1281 BLOCK is the scope in which to interpret NAME; this can be NULL
1282 to mean the global scope. */
1284 static int
1285 package_name_p (const char *name, const struct block *block)
1287 struct symbol *sym;
1288 struct field_of_this_result is_a_field_of_this;
1290 sym = lookup_symbol (name, block, SEARCH_TYPE_DOMAIN,
1291 &is_a_field_of_this).symbol;
1293 if (sym
1294 && sym->aclass () == LOC_TYPEDEF
1295 && sym->type ()->code () == TYPE_CODE_MODULE)
1296 return 1;
1298 return 0;
1301 /* Classify a (potential) function in the "unsafe" package.
1302 We fold these into "keywords" to keep things simple, at least until
1303 something more complex is warranted. */
1305 static int
1306 classify_unsafe_function (struct stoken function_name)
1308 std::string copy = copy_name (function_name);
1310 if (copy == "Sizeof")
1312 yylval.sval = function_name;
1313 return SIZEOF_KEYWORD;
1316 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ());
1319 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1320 The contents of the token are in `yylval'.
1321 Updates yylval and returns the new token type.
1323 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1325 static int
1326 classify_packaged_name (const struct block *block)
1328 struct block_symbol sym;
1329 struct field_of_this_result is_a_field_of_this;
1331 std::string copy = copy_name (yylval.sval);
1333 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1335 if (sym.symbol)
1337 yylval.ssym.sym = sym;
1338 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1341 return NAME;
1344 /* Classify a NAME token.
1345 The contents of the token are in `yylval'.
1346 Updates yylval and returns the new token type.
1347 BLOCK is the block in which lookups start; this can be NULL
1348 to mean the global scope.
1350 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1352 static int
1353 classify_name (struct parser_state *par_state, const struct block *block)
1355 struct type *type;
1356 struct block_symbol sym;
1357 struct field_of_this_result is_a_field_of_this;
1359 std::string copy = copy_name (yylval.sval);
1361 /* Try primitive types first so they win over bad/weird debug info. */
1362 type = language_lookup_primitive_type (par_state->language (),
1363 par_state->gdbarch (),
1364 copy.c_str ());
1365 if (type != NULL)
1367 /* NOTE: We take advantage of the fact that yylval coming in was a
1368 NAME, and that struct ttype is a compatible extension of struct
1369 stoken, so yylval.tsym.stoken is already filled in. */
1370 yylval.tsym.type = type;
1371 return TYPENAME;
1374 /* TODO: What about other types? */
1376 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1378 if (sym.symbol)
1380 yylval.ssym.sym = sym;
1381 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1382 return NAME;
1385 /* If we didn't find a symbol, look again in the current package.
1386 This is to, e.g., make "p global_var" work without having to specify
1387 the package name. We intentionally only looks for objects in the
1388 current package. */
1391 gdb::unique_xmalloc_ptr<char> current_package_name
1392 = go_block_package_name (block);
1394 if (current_package_name != NULL)
1396 struct stoken sval =
1397 build_packaged_name (current_package_name.get (),
1398 strlen (current_package_name.get ()),
1399 copy.c_str (), copy.size ());
1401 sym = lookup_symbol (sval.ptr, block, SEARCH_VFT,
1402 &is_a_field_of_this);
1403 if (sym.symbol)
1405 yylval.ssym.stoken = sval;
1406 yylval.ssym.sym = sym;
1407 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1408 return NAME;
1413 /* Input names that aren't symbols but ARE valid hex numbers, when
1414 the input radix permits them, can be names or numbers depending
1415 on the parse. Note we support radixes > 16 here. */
1416 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1417 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1419 YYSTYPE newlval; /* Its value is ignored. */
1420 int hextype = parse_number (par_state, copy.c_str (),
1421 yylval.sval.length, 0, &newlval);
1422 if (hextype == INT)
1424 yylval.ssym.sym.symbol = NULL;
1425 yylval.ssym.sym.block = NULL;
1426 yylval.ssym.is_a_field_of_this = 0;
1427 return NAME_OR_INT;
1431 yylval.ssym.sym.symbol = NULL;
1432 yylval.ssym.sym.block = NULL;
1433 yylval.ssym.is_a_field_of_this = 0;
1434 return NAME;
1437 /* This is taken from c-exp.y mostly to get something working.
1438 The basic structure has been kept because we may yet need some of it. */
1440 static int
1441 yylex (void)
1443 go_token_and_value current, next;
1445 if (popping && !token_fifo.empty ())
1447 go_token_and_value tv = token_fifo[0];
1448 token_fifo.erase (token_fifo.begin ());
1449 yylval = tv.value;
1450 /* There's no need to fall through to handle package.name
1451 as that can never happen here. In theory. */
1452 return tv.token;
1454 popping = 0;
1456 current.token = lex_one_token (pstate);
1458 /* TODO: Need a way to force specifying name1 as a package.
1459 .name1.name2 ? */
1461 if (current.token != NAME)
1462 return current.token;
1464 /* See if we have "name1 . name2". */
1466 current.value = yylval;
1467 next.token = lex_one_token (pstate);
1468 next.value = yylval;
1470 if (next.token == '.')
1472 go_token_and_value name2;
1474 name2.token = lex_one_token (pstate);
1475 name2.value = yylval;
1477 if (name2.token == NAME)
1479 /* Ok, we have "name1 . name2". */
1480 std::string copy = copy_name (current.value.sval);
1482 if (copy == "unsafe")
1484 popping = 1;
1485 return classify_unsafe_function (name2.value.sval);
1488 if (package_name_p (copy.c_str (), pstate->expression_context_block))
1490 popping = 1;
1491 yylval.sval = build_packaged_name (current.value.sval.ptr,
1492 current.value.sval.length,
1493 name2.value.sval.ptr,
1494 name2.value.sval.length);
1495 return classify_packaged_name (pstate->expression_context_block);
1499 token_fifo.push_back (next);
1500 token_fifo.push_back (name2);
1502 else
1503 token_fifo.push_back (next);
1505 /* If we arrive here we don't have a package-qualified name. */
1507 popping = 1;
1508 yylval = current.value;
1509 return classify_name (pstate, pstate->expression_context_block);
1512 /* See language.h. */
1515 go_language::parser (struct parser_state *par_state) const
1517 /* Setting up the parser state. */
1518 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1519 gdb_assert (par_state != NULL);
1520 pstate = par_state;
1522 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1523 par_state->debug);
1525 /* Initialize some state used by the lexer. */
1526 last_was_structop = 0;
1527 saw_name_at_eof = 0;
1528 paren_depth = 0;
1530 token_fifo.clear ();
1531 popping = 0;
1532 name_obstack.clear ();
1534 int result = yyparse ();
1535 if (!result)
1536 pstate->set_operation (pstate->pop ());
1537 return result;
1540 static void
1541 yyerror (const char *msg)
1543 pstate->parse_error (msg);