[gdb/symtab] Revert "Change handling of DW_TAG_enumeration_type in DWARF scanner"
[binutils-gdb.git] / gdb / go-exp.y
blob8fd673750ea8e06ef258ccae87379e4fcee9be03
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include <ctype.h>
55 #include "expression.h"
56 #include "value.h"
57 #include "parser-defs.h"
58 #include "language.h"
59 #include "c-lang.h"
60 #include "go-lang.h"
61 #include "charset.h"
62 #include "block.h"
63 #include "expop.h"
65 #define parse_type(ps) builtin_type (ps->gdbarch ())
67 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
68 etc). */
69 #define GDB_YY_REMAP_PREFIX go_
70 #include "yy-remap.h"
72 /* The state of the parser, used internally when we are parsing the
73 expression. */
75 static struct parser_state *pstate = NULL;
77 int yyparse (void);
79 static int yylex (void);
81 static void yyerror (const char *);
85 /* Although the yacc "value" of an expression is not used,
86 since the result is stored in the structure being created,
87 other node types do have values. */
89 %union
91 LONGEST lval;
92 struct {
93 LONGEST val;
94 struct type *type;
95 } typed_val_int;
96 struct {
97 gdb_byte val[16];
98 struct type *type;
99 } typed_val_float;
100 struct stoken sval;
101 struct symtoken ssym;
102 struct type *tval;
103 struct typed_stoken tsval;
104 struct ttype tsym;
105 int voidval;
106 enum exp_opcode opcode;
107 struct internalvar *ivar;
108 struct stoken_vector svec;
112 /* YYSTYPE gets defined by %union. */
113 static int parse_number (struct parser_state *,
114 const char *, int, int, YYSTYPE *);
116 using namespace expr;
119 %type <voidval> exp exp1 type_exp start variable lcurly
120 %type <lval> rcurly
121 %type <tval> type
123 %token <typed_val_int> INT
124 %token <typed_val_float> FLOAT
126 /* Both NAME and TYPENAME tokens represent symbols in the input,
127 and both convey their data as strings.
128 But a TYPENAME is a string that happens to be defined as a type
129 or builtin type name (such as int or char)
130 and a NAME is any other symbol.
131 Contexts where this distinction is not important can use the
132 nonterminal "name", which matches either NAME or TYPENAME. */
134 %token <tsval> RAW_STRING
135 %token <tsval> STRING
136 %token <tsval> CHAR
137 %token <ssym> NAME
138 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
139 %token <voidval> COMPLETE
140 /*%type <sval> name*/
141 %type <svec> string_exp
142 %type <ssym> name_not_typename
144 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
145 but which would parse as a valid number in the current input radix.
146 E.g. "c" when input_radix==16. Depending on the parse, it will be
147 turned into a name or into a number. */
148 %token <ssym> NAME_OR_INT
150 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
151 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
152 %token SIZEOF_KEYWORD
153 %token LEN_KEYWORD CAP_KEYWORD
154 %token NEW_KEYWORD
155 %token IOTA_KEYWORD NIL_KEYWORD
156 %token CONST_KEYWORD
157 %token DOTDOTDOT
158 %token ENTRY
159 %token ERROR
161 /* Special type cases. */
162 %token BYTE_KEYWORD /* An alias of uint8. */
164 %token <sval> DOLLAR_VARIABLE
166 %token <opcode> ASSIGN_MODIFY
168 %left ','
169 %left ABOVE_COMMA
170 %right '=' ASSIGN_MODIFY
171 %right '?'
172 %left OROR
173 %left ANDAND
174 %left '|'
175 %left '^'
176 %left '&'
177 %left ANDNOT
178 %left EQUAL NOTEQUAL
179 %left '<' '>' LEQ GEQ
180 %left LSH RSH
181 %left '@'
182 %left '+' '-'
183 %left '*' '/' '%'
184 %right UNARY INCREMENT DECREMENT
185 %right LEFT_ARROW '.' '[' '('
190 start : exp1
191 | type_exp
194 type_exp: type
195 { pstate->push_new<type_operation> ($1); }
198 /* Expressions, including the comma operator. */
199 exp1 : exp
200 | exp1 ',' exp
201 { pstate->wrap2<comma_operation> (); }
204 /* Expressions, not including the comma operator. */
205 exp : '*' exp %prec UNARY
206 { pstate->wrap<unop_ind_operation> (); }
209 exp : '&' exp %prec UNARY
210 { pstate->wrap<unop_addr_operation> (); }
213 exp : '-' exp %prec UNARY
214 { pstate->wrap<unary_neg_operation> (); }
217 exp : '+' exp %prec UNARY
218 { pstate->wrap<unary_plus_operation> (); }
221 exp : '!' exp %prec UNARY
222 { pstate->wrap<unary_logical_not_operation> (); }
225 exp : '^' exp %prec UNARY
226 { pstate->wrap<unary_complement_operation> (); }
229 exp : exp INCREMENT %prec UNARY
230 { pstate->wrap<postinc_operation> (); }
233 exp : exp DECREMENT %prec UNARY
234 { pstate->wrap<postdec_operation> (); }
237 /* foo->bar is not in Go. May want as a gdb extension. Later. */
239 exp : exp '.' name_not_typename
241 pstate->push_new<structop_operation>
242 (pstate->pop (), copy_name ($3.stoken));
246 exp : exp '.' name_not_typename COMPLETE
248 structop_base_operation *op
249 = new structop_operation (pstate->pop (),
250 copy_name ($3.stoken));
251 pstate->mark_struct_expression (op);
252 pstate->push (operation_up (op));
256 exp : exp '.' COMPLETE
258 structop_base_operation *op
259 = new structop_operation (pstate->pop (), "");
260 pstate->mark_struct_expression (op);
261 pstate->push (operation_up (op));
265 exp : exp '[' exp1 ']'
266 { pstate->wrap2<subscript_operation> (); }
269 exp : exp '('
270 /* This is to save the value of arglist_len
271 being accumulated by an outer function call. */
272 { pstate->start_arglist (); }
273 arglist ')' %prec LEFT_ARROW
275 std::vector<operation_up> args
276 = pstate->pop_vector (pstate->end_arglist ());
277 pstate->push_new<funcall_operation>
278 (pstate->pop (), std::move (args));
282 lcurly : '{'
283 { pstate->start_arglist (); }
286 arglist :
289 arglist : exp
290 { pstate->arglist_len = 1; }
293 arglist : arglist ',' exp %prec ABOVE_COMMA
294 { pstate->arglist_len++; }
297 rcurly : '}'
298 { $$ = pstate->end_arglist () - 1; }
301 exp : lcurly type rcurly exp %prec UNARY
303 pstate->push_new<unop_memval_operation>
304 (pstate->pop (), $2);
308 exp : type '(' exp ')' %prec UNARY
310 pstate->push_new<unop_cast_operation>
311 (pstate->pop (), $1);
315 exp : '(' exp1 ')'
319 /* Binary operators in order of decreasing precedence. */
321 exp : exp '@' exp
322 { pstate->wrap2<repeat_operation> (); }
325 exp : exp '*' exp
326 { pstate->wrap2<mul_operation> (); }
329 exp : exp '/' exp
330 { pstate->wrap2<div_operation> (); }
333 exp : exp '%' exp
334 { pstate->wrap2<rem_operation> (); }
337 exp : exp '+' exp
338 { pstate->wrap2<add_operation> (); }
341 exp : exp '-' exp
342 { pstate->wrap2<sub_operation> (); }
345 exp : exp LSH exp
346 { pstate->wrap2<lsh_operation> (); }
349 exp : exp RSH exp
350 { pstate->wrap2<rsh_operation> (); }
353 exp : exp EQUAL exp
354 { pstate->wrap2<equal_operation> (); }
357 exp : exp NOTEQUAL exp
358 { pstate->wrap2<notequal_operation> (); }
361 exp : exp LEQ exp
362 { pstate->wrap2<leq_operation> (); }
365 exp : exp GEQ exp
366 { pstate->wrap2<geq_operation> (); }
369 exp : exp '<' exp
370 { pstate->wrap2<less_operation> (); }
373 exp : exp '>' exp
374 { pstate->wrap2<gtr_operation> (); }
377 exp : exp '&' exp
378 { pstate->wrap2<bitwise_and_operation> (); }
381 exp : exp '^' exp
382 { pstate->wrap2<bitwise_xor_operation> (); }
385 exp : exp '|' exp
386 { pstate->wrap2<bitwise_ior_operation> (); }
389 exp : exp ANDAND exp
390 { pstate->wrap2<logical_and_operation> (); }
393 exp : exp OROR exp
394 { pstate->wrap2<logical_or_operation> (); }
397 exp : exp '?' exp ':' exp %prec '?'
399 operation_up last = pstate->pop ();
400 operation_up mid = pstate->pop ();
401 operation_up first = pstate->pop ();
402 pstate->push_new<ternop_cond_operation>
403 (std::move (first), std::move (mid),
404 std::move (last));
408 exp : exp '=' exp
409 { pstate->wrap2<assign_operation> (); }
412 exp : exp ASSIGN_MODIFY exp
414 operation_up rhs = pstate->pop ();
415 operation_up lhs = pstate->pop ();
416 pstate->push_new<assign_modify_operation>
417 ($2, std::move (lhs), std::move (rhs));
421 exp : INT
423 pstate->push_new<long_const_operation>
424 ($1.type, $1.val);
428 exp : CHAR
430 struct stoken_vector vec;
431 vec.len = 1;
432 vec.tokens = &$1;
433 pstate->push_c_string ($1.type, &vec);
437 exp : NAME_OR_INT
438 { YYSTYPE val;
439 parse_number (pstate, $1.stoken.ptr,
440 $1.stoken.length, 0, &val);
441 pstate->push_new<long_const_operation>
442 (val.typed_val_int.type,
443 val.typed_val_int.val);
448 exp : FLOAT
450 float_data data;
451 std::copy (std::begin ($1.val), std::end ($1.val),
452 std::begin (data));
453 pstate->push_new<float_const_operation> ($1.type, data);
457 exp : variable
460 exp : DOLLAR_VARIABLE
462 pstate->push_dollar ($1);
466 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
468 /* TODO(dje): Go objects in structs. */
469 /* TODO(dje): What's the right type here? */
470 struct type *size_type
471 = parse_type (pstate)->builtin_unsigned_int;
472 $3 = check_typedef ($3);
473 pstate->push_new<long_const_operation>
474 (size_type, (LONGEST) $3->length ());
478 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
480 /* TODO(dje): Go objects in structs. */
481 pstate->wrap<unop_sizeof_operation> ();
484 string_exp:
485 STRING
487 /* We copy the string here, and not in the
488 lexer, to guarantee that we do not leak a
489 string. */
490 /* Note that we NUL-terminate here, but just
491 for convenience. */
492 struct typed_stoken *vec = XNEW (struct typed_stoken);
493 $$.len = 1;
494 $$.tokens = vec;
496 vec->type = $1.type;
497 vec->length = $1.length;
498 vec->ptr = (char *) malloc ($1.length + 1);
499 memcpy (vec->ptr, $1.ptr, $1.length + 1);
502 | string_exp '+' STRING
504 /* Note that we NUL-terminate here, but just
505 for convenience. */
506 char *p;
507 ++$$.len;
508 $$.tokens = XRESIZEVEC (struct typed_stoken,
509 $$.tokens, $$.len);
511 p = (char *) malloc ($3.length + 1);
512 memcpy (p, $3.ptr, $3.length + 1);
514 $$.tokens[$$.len - 1].type = $3.type;
515 $$.tokens[$$.len - 1].length = $3.length;
516 $$.tokens[$$.len - 1].ptr = p;
520 exp : string_exp %prec ABOVE_COMMA
522 int i;
524 /* Always utf8. */
525 pstate->push_c_string (0, &$1);
526 for (i = 0; i < $1.len; ++i)
527 free ($1.tokens[i].ptr);
528 free ($1.tokens);
532 exp : TRUE_KEYWORD
533 { pstate->push_new<bool_operation> ($1); }
536 exp : FALSE_KEYWORD
537 { pstate->push_new<bool_operation> ($1); }
540 variable: name_not_typename ENTRY
541 { struct symbol *sym = $1.sym.symbol;
543 if (sym == NULL
544 || !sym->is_argument ()
545 || !symbol_read_needs_frame (sym))
546 error (_("@entry can be used only for function "
547 "parameters, not for \"%s\""),
548 copy_name ($1.stoken).c_str ());
550 pstate->push_new<var_entry_value_operation> (sym);
554 variable: name_not_typename
555 { struct block_symbol sym = $1.sym;
557 if (sym.symbol)
559 if (symbol_read_needs_frame (sym.symbol))
560 pstate->block_tracker->update (sym);
562 pstate->push_new<var_value_operation> (sym);
564 else if ($1.is_a_field_of_this)
566 /* TODO(dje): Can we get here?
567 E.g., via a mix of c++ and go? */
568 gdb_assert_not_reached ("go with `this' field");
570 else
572 std::string arg = copy_name ($1.stoken);
574 bound_minimal_symbol msymbol =
575 lookup_minimal_symbol (current_program_space, arg.c_str ());
576 if (msymbol.minsym != NULL)
577 pstate->push_new<var_msym_value_operation>
578 (msymbol);
579 else if (!have_full_symbols (current_program_space)
580 && !have_partial_symbols (current_program_space))
581 error (_("No symbol table is loaded. "
582 "Use the \"file\" command."));
583 else
584 error (_("No symbol \"%s\" in current context."),
585 arg.c_str ());
590 /* TODO
591 method_exp: PACKAGENAME '.' name '.' name
597 type /* Implements (approximately): [*] type-specifier */
598 : '*' type
599 { $$ = lookup_pointer_type ($2); }
600 | TYPENAME
601 { $$ = $1.type; }
603 | STRUCT_KEYWORD name
604 { $$ = lookup_struct (copy_name ($2),
605 expression_context_block); }
607 | BYTE_KEYWORD
608 { $$ = builtin_go_type (pstate->gdbarch ())
609 ->builtin_uint8; }
612 /* TODO
613 name : NAME { $$ = $1.stoken; }
614 | TYPENAME { $$ = $1.stoken; }
615 | NAME_OR_INT { $$ = $1.stoken; }
619 name_not_typename
620 : NAME
621 /* These would be useful if name_not_typename was useful, but it is just
622 a fake for "variable", so these cause reduce/reduce conflicts because
623 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
624 =exp) or just an exp. If name_not_typename was ever used in an lvalue
625 context where only a name could occur, this might be useful.
626 | NAME_OR_INT
632 /* Take care of parsing a number (anything that starts with a digit).
633 Set yylval and return the token type; update lexptr.
634 LEN is the number of characters in it. */
636 /* FIXME: Needs some error checking for the float case. */
637 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
638 That will require moving the guts into a function that we both call
639 as our YYSTYPE is different than c-exp.y's */
641 static int
642 parse_number (struct parser_state *par_state,
643 const char *p, int len, int parsed_float, YYSTYPE *putithere)
645 ULONGEST n = 0;
646 ULONGEST prevn = 0;
648 int i = 0;
649 int c;
650 int base = input_radix;
651 int unsigned_p = 0;
653 /* Number of "L" suffixes encountered. */
654 int long_p = 0;
656 /* We have found a "L" or "U" suffix. */
657 int found_suffix = 0;
659 if (parsed_float)
661 const struct builtin_go_type *builtin_go_types
662 = builtin_go_type (par_state->gdbarch ());
664 /* Handle suffixes: 'f' for float32, 'l' for long double.
665 FIXME: This appears to be an extension -- do we want this? */
666 if (len >= 1 && tolower (p[len - 1]) == 'f')
668 putithere->typed_val_float.type
669 = builtin_go_types->builtin_float32;
670 len--;
672 else if (len >= 1 && tolower (p[len - 1]) == 'l')
674 putithere->typed_val_float.type
675 = parse_type (par_state)->builtin_long_double;
676 len--;
678 /* Default type for floating-point literals is float64. */
679 else
681 putithere->typed_val_float.type
682 = builtin_go_types->builtin_float64;
685 if (!parse_float (p, len,
686 putithere->typed_val_float.type,
687 putithere->typed_val_float.val))
688 return ERROR;
689 return FLOAT;
692 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
693 if (p[0] == '0' && len > 1)
694 switch (p[1])
696 case 'x':
697 case 'X':
698 if (len >= 3)
700 p += 2;
701 base = 16;
702 len -= 2;
704 break;
706 case 'b':
707 case 'B':
708 if (len >= 3)
710 p += 2;
711 base = 2;
712 len -= 2;
714 break;
716 case 't':
717 case 'T':
718 case 'd':
719 case 'D':
720 if (len >= 3)
722 p += 2;
723 base = 10;
724 len -= 2;
726 break;
728 default:
729 base = 8;
730 break;
733 while (len-- > 0)
735 c = *p++;
736 if (c >= 'A' && c <= 'Z')
737 c += 'a' - 'A';
738 if (c != 'l' && c != 'u')
739 n *= base;
740 if (c >= '0' && c <= '9')
742 if (found_suffix)
743 return ERROR;
744 n += i = c - '0';
746 else
748 if (base > 10 && c >= 'a' && c <= 'f')
750 if (found_suffix)
751 return ERROR;
752 n += i = c - 'a' + 10;
754 else if (c == 'l')
756 ++long_p;
757 found_suffix = 1;
759 else if (c == 'u')
761 unsigned_p = 1;
762 found_suffix = 1;
764 else
765 return ERROR; /* Char not a digit */
767 if (i >= base)
768 return ERROR; /* Invalid digit in this base. */
770 if (c != 'l' && c != 'u')
772 /* Test for overflow. */
773 if (n == 0 && prevn == 0)
775 else if (prevn >= n)
776 error (_("Numeric constant too large."));
778 prevn = n;
781 /* An integer constant is an int, a long, or a long long. An L
782 suffix forces it to be long; an LL suffix forces it to be long
783 long. If not forced to a larger size, it gets the first type of
784 the above that it fits in. To figure out whether it fits, we
785 shift it right and see whether anything remains. Note that we
786 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
787 operation, because many compilers will warn about such a shift
788 (which always produces a zero result). Sometimes gdbarch_int_bit
789 or gdbarch_long_bit will be that big, sometimes not. To deal with
790 the case where it is we just always shift the value more than
791 once, with fewer bits each time. */
793 int int_bits = gdbarch_int_bit (par_state->gdbarch ());
794 int long_bits = gdbarch_long_bit (par_state->gdbarch ());
795 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
796 bool have_signed = !unsigned_p;
797 bool have_int = long_p == 0;
798 bool have_long = long_p <= 1;
799 if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
800 putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
801 else if (have_int && fits_in_type (1, n, int_bits, false))
802 putithere->typed_val_int.type
803 = parse_type (par_state)->builtin_unsigned_int;
804 else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
805 putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
806 else if (have_long && fits_in_type (1, n, long_bits, false))
807 putithere->typed_val_int.type
808 = parse_type (par_state)->builtin_unsigned_long;
809 else if (have_signed && fits_in_type (1, n, long_long_bits, true))
810 putithere->typed_val_int.type
811 = parse_type (par_state)->builtin_long_long;
812 else if (fits_in_type (1, n, long_long_bits, false))
813 putithere->typed_val_int.type
814 = parse_type (par_state)->builtin_unsigned_long_long;
815 else
816 error (_("Numeric constant too large."));
817 putithere->typed_val_int.val = n;
819 return INT;
822 /* Temporary obstack used for holding strings. */
823 static struct obstack tempbuf;
824 static int tempbuf_init;
826 /* Parse a string or character literal from TOKPTR. The string or
827 character may be wide or unicode. *OUTPTR is set to just after the
828 end of the literal in the input string. The resulting token is
829 stored in VALUE. This returns a token value, either STRING or
830 CHAR, depending on what was parsed. *HOST_CHARS is set to the
831 number of host characters in the literal. */
833 static int
834 parse_string_or_char (const char *tokptr, const char **outptr,
835 struct typed_stoken *value, int *host_chars)
837 int quote;
839 /* Build the gdb internal form of the input string in tempbuf. Note
840 that the buffer is null byte terminated *only* for the
841 convenience of debugging gdb itself and printing the buffer
842 contents when the buffer contains no embedded nulls. Gdb does
843 not depend upon the buffer being null byte terminated, it uses
844 the length string instead. This allows gdb to handle C strings
845 (as well as strings in other languages) with embedded null
846 bytes */
848 if (!tempbuf_init)
849 tempbuf_init = 1;
850 else
851 obstack_free (&tempbuf, NULL);
852 obstack_init (&tempbuf);
854 /* Skip the quote. */
855 quote = *tokptr;
856 ++tokptr;
858 *host_chars = 0;
860 while (*tokptr)
862 char c = *tokptr;
863 if (c == '\\')
865 ++tokptr;
866 *host_chars += c_parse_escape (&tokptr, &tempbuf);
868 else if (c == quote)
869 break;
870 else
872 obstack_1grow (&tempbuf, c);
873 ++tokptr;
874 /* FIXME: this does the wrong thing with multi-byte host
875 characters. We could use mbrlen here, but that would
876 make "set host-charset" a bit less useful. */
877 ++*host_chars;
881 if (*tokptr != quote)
883 if (quote == '"')
884 error (_("Unterminated string in expression."));
885 else
886 error (_("Unmatched single quote."));
888 ++tokptr;
890 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
891 value->ptr = (char *) obstack_base (&tempbuf);
892 value->length = obstack_object_size (&tempbuf);
894 *outptr = tokptr;
896 return quote == '\'' ? CHAR : STRING;
899 struct go_token
901 const char *oper;
902 int token;
903 enum exp_opcode opcode;
906 static const struct go_token tokentab3[] =
908 {">>=", ASSIGN_MODIFY, BINOP_RSH},
909 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
910 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
911 {"...", DOTDOTDOT, OP_NULL},
914 static const struct go_token tokentab2[] =
916 {"+=", ASSIGN_MODIFY, BINOP_ADD},
917 {"-=", ASSIGN_MODIFY, BINOP_SUB},
918 {"*=", ASSIGN_MODIFY, BINOP_MUL},
919 {"/=", ASSIGN_MODIFY, BINOP_DIV},
920 {"%=", ASSIGN_MODIFY, BINOP_REM},
921 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
922 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
923 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
924 {"++", INCREMENT, OP_NULL},
925 {"--", DECREMENT, OP_NULL},
926 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */
927 {"<-", LEFT_ARROW, OP_NULL},
928 {"&&", ANDAND, OP_NULL},
929 {"||", OROR, OP_NULL},
930 {"<<", LSH, OP_NULL},
931 {">>", RSH, OP_NULL},
932 {"==", EQUAL, OP_NULL},
933 {"!=", NOTEQUAL, OP_NULL},
934 {"<=", LEQ, OP_NULL},
935 {">=", GEQ, OP_NULL},
936 /*{"&^", ANDNOT, OP_NULL}, TODO */
939 /* Identifier-like tokens. */
940 static const struct go_token ident_tokens[] =
942 {"true", TRUE_KEYWORD, OP_NULL},
943 {"false", FALSE_KEYWORD, OP_NULL},
944 {"nil", NIL_KEYWORD, OP_NULL},
945 {"const", CONST_KEYWORD, OP_NULL},
946 {"struct", STRUCT_KEYWORD, OP_NULL},
947 {"type", TYPE_KEYWORD, OP_NULL},
948 {"interface", INTERFACE_KEYWORD, OP_NULL},
949 {"chan", CHAN_KEYWORD, OP_NULL},
950 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
951 {"len", LEN_KEYWORD, OP_NULL},
952 {"cap", CAP_KEYWORD, OP_NULL},
953 {"new", NEW_KEYWORD, OP_NULL},
954 {"iota", IOTA_KEYWORD, OP_NULL},
957 /* This is set if a NAME token appeared at the very end of the input
958 string, with no whitespace separating the name from the EOF. This
959 is used only when parsing to do field name completion. */
960 static int saw_name_at_eof;
962 /* This is set if the previously-returned token was a structure
963 operator -- either '.' or ARROW. This is used only when parsing to
964 do field name completion. */
965 static int last_was_structop;
967 /* Depth of parentheses. */
968 static int paren_depth;
970 /* Read one token, getting characters through lexptr. */
972 static int
973 lex_one_token (struct parser_state *par_state)
975 int c;
976 int namelen;
977 const char *tokstart;
978 int saw_structop = last_was_structop;
980 last_was_structop = 0;
982 retry:
984 par_state->prev_lexptr = par_state->lexptr;
986 tokstart = par_state->lexptr;
987 /* See if it is a special token of length 3. */
988 for (const auto &token : tokentab3)
989 if (strncmp (tokstart, token.oper, 3) == 0)
991 par_state->lexptr += 3;
992 yylval.opcode = token.opcode;
993 return token.token;
996 /* See if it is a special token of length 2. */
997 for (const auto &token : tokentab2)
998 if (strncmp (tokstart, token.oper, 2) == 0)
1000 par_state->lexptr += 2;
1001 yylval.opcode = token.opcode;
1002 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1003 setting last_was_structop here. */
1004 return token.token;
1007 switch (c = *tokstart)
1009 case 0:
1010 if (saw_name_at_eof)
1012 saw_name_at_eof = 0;
1013 return COMPLETE;
1015 else if (saw_structop)
1016 return COMPLETE;
1017 else
1018 return 0;
1020 case ' ':
1021 case '\t':
1022 case '\n':
1023 par_state->lexptr++;
1024 goto retry;
1026 case '[':
1027 case '(':
1028 paren_depth++;
1029 par_state->lexptr++;
1030 return c;
1032 case ']':
1033 case ')':
1034 if (paren_depth == 0)
1035 return 0;
1036 paren_depth--;
1037 par_state->lexptr++;
1038 return c;
1040 case ',':
1041 if (pstate->comma_terminates
1042 && paren_depth == 0)
1043 return 0;
1044 par_state->lexptr++;
1045 return c;
1047 case '.':
1048 /* Might be a floating point number. */
1049 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1051 if (pstate->parse_completion)
1052 last_was_structop = 1;
1053 goto symbol; /* Nope, must be a symbol. */
1055 [[fallthrough]];
1057 case '0':
1058 case '1':
1059 case '2':
1060 case '3':
1061 case '4':
1062 case '5':
1063 case '6':
1064 case '7':
1065 case '8':
1066 case '9':
1068 /* It's a number. */
1069 int got_dot = 0, got_e = 0, toktype;
1070 const char *p = tokstart;
1071 int hex = input_radix > 10;
1073 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1075 p += 2;
1076 hex = 1;
1079 for (;; ++p)
1081 /* This test includes !hex because 'e' is a valid hex digit
1082 and thus does not indicate a floating point number when
1083 the radix is hex. */
1084 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1085 got_dot = got_e = 1;
1086 /* This test does not include !hex, because a '.' always indicates
1087 a decimal floating point number regardless of the radix. */
1088 else if (!got_dot && *p == '.')
1089 got_dot = 1;
1090 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1091 && (*p == '-' || *p == '+'))
1092 /* This is the sign of the exponent, not the end of the
1093 number. */
1094 continue;
1095 /* We will take any letters or digits. parse_number will
1096 complain if past the radix, or if L or U are not final. */
1097 else if ((*p < '0' || *p > '9')
1098 && ((*p < 'a' || *p > 'z')
1099 && (*p < 'A' || *p > 'Z')))
1100 break;
1102 toktype = parse_number (par_state, tokstart, p - tokstart,
1103 got_dot|got_e, &yylval);
1104 if (toktype == ERROR)
1105 error (_("Invalid number \"%.*s\"."), (int) (p - tokstart),
1106 tokstart);
1107 par_state->lexptr = p;
1108 return toktype;
1111 case '@':
1113 const char *p = &tokstart[1];
1114 size_t len = strlen ("entry");
1116 while (isspace (*p))
1117 p++;
1118 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1119 && p[len] != '_')
1121 par_state->lexptr = &p[len];
1122 return ENTRY;
1125 [[fallthrough]];
1126 case '+':
1127 case '-':
1128 case '*':
1129 case '/':
1130 case '%':
1131 case '|':
1132 case '&':
1133 case '^':
1134 case '~':
1135 case '!':
1136 case '<':
1137 case '>':
1138 case '?':
1139 case ':':
1140 case '=':
1141 case '{':
1142 case '}':
1143 symbol:
1144 par_state->lexptr++;
1145 return c;
1147 case '\'':
1148 case '"':
1149 case '`':
1151 int host_len;
1152 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1153 &yylval.tsval, &host_len);
1154 if (result == CHAR)
1156 if (host_len == 0)
1157 error (_("Empty character constant."));
1158 else if (host_len > 2 && c == '\'')
1160 ++tokstart;
1161 namelen = par_state->lexptr - tokstart - 1;
1162 goto tryname;
1164 else if (host_len > 1)
1165 error (_("Invalid character constant."));
1167 return result;
1171 if (!(c == '_' || c == '$'
1172 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1173 /* We must have come across a bad character (e.g. ';'). */
1174 error (_("Invalid character '%c' in expression."), c);
1176 /* It's a name. See how long it is. */
1177 namelen = 0;
1178 for (c = tokstart[namelen];
1179 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1180 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1182 c = tokstart[++namelen];
1185 /* The token "if" terminates the expression and is NOT removed from
1186 the input stream. It doesn't count if it appears in the
1187 expansion of a macro. */
1188 if (namelen == 2
1189 && tokstart[0] == 'i'
1190 && tokstart[1] == 'f')
1192 return 0;
1195 /* For the same reason (breakpoint conditions), "thread N"
1196 terminates the expression. "thread" could be an identifier, but
1197 an identifier is never followed by a number without intervening
1198 punctuation.
1199 Handle abbreviations of these, similarly to
1200 breakpoint.c:find_condition_and_thread.
1201 TODO: Watch for "goroutine" here? */
1202 if (namelen >= 1
1203 && strncmp (tokstart, "thread", namelen) == 0
1204 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1206 const char *p = tokstart + namelen + 1;
1208 while (*p == ' ' || *p == '\t')
1209 p++;
1210 if (*p >= '0' && *p <= '9')
1211 return 0;
1214 par_state->lexptr += namelen;
1216 tryname:
1218 yylval.sval.ptr = tokstart;
1219 yylval.sval.length = namelen;
1221 /* Catch specific keywords. */
1222 std::string copy = copy_name (yylval.sval);
1223 for (const auto &token : ident_tokens)
1224 if (copy == token.oper)
1226 /* It is ok to always set this, even though we don't always
1227 strictly need to. */
1228 yylval.opcode = token.opcode;
1229 return token.token;
1232 if (*tokstart == '$')
1233 return DOLLAR_VARIABLE;
1235 if (pstate->parse_completion && *par_state->lexptr == '\0')
1236 saw_name_at_eof = 1;
1237 return NAME;
1240 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1241 struct go_token_and_value
1243 int token;
1244 YYSTYPE value;
1247 /* A FIFO of tokens that have been read but not yet returned to the
1248 parser. */
1249 static std::vector<go_token_and_value> token_fifo;
1251 /* Non-zero if the lexer should return tokens from the FIFO. */
1252 static int popping;
1254 /* Temporary storage for yylex; this holds symbol names as they are
1255 built up. */
1256 static auto_obstack name_obstack;
1258 /* Build "package.name" in name_obstack.
1259 For convenience of the caller, the name is NUL-terminated,
1260 but the NUL is not included in the recorded length. */
1262 static struct stoken
1263 build_packaged_name (const char *package, int package_len,
1264 const char *name, int name_len)
1266 struct stoken result;
1268 name_obstack.clear ();
1269 obstack_grow (&name_obstack, package, package_len);
1270 obstack_grow_str (&name_obstack, ".");
1271 obstack_grow (&name_obstack, name, name_len);
1272 obstack_grow (&name_obstack, "", 1);
1273 result.ptr = (char *) obstack_base (&name_obstack);
1274 result.length = obstack_object_size (&name_obstack) - 1;
1276 return result;
1279 /* Return non-zero if NAME is a package name.
1280 BLOCK is the scope in which to interpret NAME; this can be NULL
1281 to mean the global scope. */
1283 static int
1284 package_name_p (const char *name, const struct block *block)
1286 struct symbol *sym;
1287 struct field_of_this_result is_a_field_of_this;
1289 sym = lookup_symbol (name, block, SEARCH_TYPE_DOMAIN,
1290 &is_a_field_of_this).symbol;
1292 if (sym
1293 && sym->aclass () == LOC_TYPEDEF
1294 && sym->type ()->code () == TYPE_CODE_MODULE)
1295 return 1;
1297 return 0;
1300 /* Classify a (potential) function in the "unsafe" package.
1301 We fold these into "keywords" to keep things simple, at least until
1302 something more complex is warranted. */
1304 static int
1305 classify_unsafe_function (struct stoken function_name)
1307 std::string copy = copy_name (function_name);
1309 if (copy == "Sizeof")
1311 yylval.sval = function_name;
1312 return SIZEOF_KEYWORD;
1315 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ());
1318 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1319 The contents of the token are in `yylval'.
1320 Updates yylval and returns the new token type.
1322 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1324 static int
1325 classify_packaged_name (const struct block *block)
1327 struct block_symbol sym;
1328 struct field_of_this_result is_a_field_of_this;
1330 std::string copy = copy_name (yylval.sval);
1332 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1334 if (sym.symbol)
1336 yylval.ssym.sym = sym;
1337 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1340 return NAME;
1343 /* Classify a NAME token.
1344 The contents of the token are in `yylval'.
1345 Updates yylval and returns the new token type.
1346 BLOCK is the block in which lookups start; this can be NULL
1347 to mean the global scope.
1349 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1351 static int
1352 classify_name (struct parser_state *par_state, const struct block *block)
1354 struct type *type;
1355 struct block_symbol sym;
1356 struct field_of_this_result is_a_field_of_this;
1358 std::string copy = copy_name (yylval.sval);
1360 /* Try primitive types first so they win over bad/weird debug info. */
1361 type = language_lookup_primitive_type (par_state->language (),
1362 par_state->gdbarch (),
1363 copy.c_str ());
1364 if (type != NULL)
1366 /* NOTE: We take advantage of the fact that yylval coming in was a
1367 NAME, and that struct ttype is a compatible extension of struct
1368 stoken, so yylval.tsym.stoken is already filled in. */
1369 yylval.tsym.type = type;
1370 return TYPENAME;
1373 /* TODO: What about other types? */
1375 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1377 if (sym.symbol)
1379 yylval.ssym.sym = sym;
1380 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1381 return NAME;
1384 /* If we didn't find a symbol, look again in the current package.
1385 This is to, e.g., make "p global_var" work without having to specify
1386 the package name. We intentionally only looks for objects in the
1387 current package. */
1390 gdb::unique_xmalloc_ptr<char> current_package_name
1391 = go_block_package_name (block);
1393 if (current_package_name != NULL)
1395 struct stoken sval =
1396 build_packaged_name (current_package_name.get (),
1397 strlen (current_package_name.get ()),
1398 copy.c_str (), copy.size ());
1400 sym = lookup_symbol (sval.ptr, block, SEARCH_VFT,
1401 &is_a_field_of_this);
1402 if (sym.symbol)
1404 yylval.ssym.stoken = sval;
1405 yylval.ssym.sym = sym;
1406 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1407 return NAME;
1412 /* Input names that aren't symbols but ARE valid hex numbers, when
1413 the input radix permits them, can be names or numbers depending
1414 on the parse. Note we support radixes > 16 here. */
1415 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1416 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1418 YYSTYPE newlval; /* Its value is ignored. */
1419 int hextype = parse_number (par_state, copy.c_str (),
1420 yylval.sval.length, 0, &newlval);
1421 if (hextype == INT)
1423 yylval.ssym.sym.symbol = NULL;
1424 yylval.ssym.sym.block = NULL;
1425 yylval.ssym.is_a_field_of_this = 0;
1426 return NAME_OR_INT;
1430 yylval.ssym.sym.symbol = NULL;
1431 yylval.ssym.sym.block = NULL;
1432 yylval.ssym.is_a_field_of_this = 0;
1433 return NAME;
1436 /* This is taken from c-exp.y mostly to get something working.
1437 The basic structure has been kept because we may yet need some of it. */
1439 static int
1440 yylex (void)
1442 go_token_and_value current, next;
1444 if (popping && !token_fifo.empty ())
1446 go_token_and_value tv = token_fifo[0];
1447 token_fifo.erase (token_fifo.begin ());
1448 yylval = tv.value;
1449 /* There's no need to fall through to handle package.name
1450 as that can never happen here. In theory. */
1451 return tv.token;
1453 popping = 0;
1455 current.token = lex_one_token (pstate);
1457 /* TODO: Need a way to force specifying name1 as a package.
1458 .name1.name2 ? */
1460 if (current.token != NAME)
1461 return current.token;
1463 /* See if we have "name1 . name2". */
1465 current.value = yylval;
1466 next.token = lex_one_token (pstate);
1467 next.value = yylval;
1469 if (next.token == '.')
1471 go_token_and_value name2;
1473 name2.token = lex_one_token (pstate);
1474 name2.value = yylval;
1476 if (name2.token == NAME)
1478 /* Ok, we have "name1 . name2". */
1479 std::string copy = copy_name (current.value.sval);
1481 if (copy == "unsafe")
1483 popping = 1;
1484 return classify_unsafe_function (name2.value.sval);
1487 if (package_name_p (copy.c_str (), pstate->expression_context_block))
1489 popping = 1;
1490 yylval.sval = build_packaged_name (current.value.sval.ptr,
1491 current.value.sval.length,
1492 name2.value.sval.ptr,
1493 name2.value.sval.length);
1494 return classify_packaged_name (pstate->expression_context_block);
1498 token_fifo.push_back (next);
1499 token_fifo.push_back (name2);
1501 else
1502 token_fifo.push_back (next);
1504 /* If we arrive here we don't have a package-qualified name. */
1506 popping = 1;
1507 yylval = current.value;
1508 return classify_name (pstate, pstate->expression_context_block);
1511 /* See language.h. */
1514 go_language::parser (struct parser_state *par_state) const
1516 /* Setting up the parser state. */
1517 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1518 gdb_assert (par_state != NULL);
1519 pstate = par_state;
1521 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1522 par_state->debug);
1524 /* Initialize some state used by the lexer. */
1525 last_was_structop = 0;
1526 saw_name_at_eof = 0;
1527 paren_depth = 0;
1529 token_fifo.clear ();
1530 popping = 0;
1531 name_obstack.clear ();
1533 int result = yyparse ();
1534 if (!result)
1535 pstate->set_operation (pstate->pop ());
1536 return result;
1539 static void
1540 yyerror (const char *msg)
1542 pstate->parse_error (msg);