Automatic date update in version.in
[binutils-gdb.git] / gdb / go-exp.y
blob20ab8ff76cf8ea9652350784c1c0c7933d7f07c3
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include <ctype.h>
55 #include "expression.h"
56 #include "value.h"
57 #include "parser-defs.h"
58 #include "language.h"
59 #include "c-lang.h"
60 #include "go-lang.h"
61 #include "charset.h"
62 #include "block.h"
63 #include "expop.h"
65 #define parse_type(ps) builtin_type (ps->gdbarch ())
67 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
68 etc). */
69 #define GDB_YY_REMAP_PREFIX go_
70 #include "yy-remap.h"
72 /* The state of the parser, used internally when we are parsing the
73 expression. */
75 static struct parser_state *pstate = NULL;
77 int yyparse (void);
79 static int yylex (void);
81 static void yyerror (const char *);
85 /* Although the yacc "value" of an expression is not used,
86 since the result is stored in the structure being created,
87 other node types do have values. */
89 %union
91 LONGEST lval;
92 struct {
93 LONGEST val;
94 struct type *type;
95 } typed_val_int;
96 struct {
97 gdb_byte val[16];
98 struct type *type;
99 } typed_val_float;
100 struct stoken sval;
101 struct symtoken ssym;
102 struct type *tval;
103 struct typed_stoken tsval;
104 struct ttype tsym;
105 int voidval;
106 enum exp_opcode opcode;
107 struct internalvar *ivar;
108 struct stoken_vector svec;
112 /* YYSTYPE gets defined by %union. */
113 static int parse_number (struct parser_state *,
114 const char *, int, int, YYSTYPE *);
116 using namespace expr;
119 %type <voidval> exp exp1 type_exp start variable lcurly
120 %type <lval> rcurly
121 %type <tval> type
123 %token <typed_val_int> INT
124 %token <typed_val_float> FLOAT
126 /* Both NAME and TYPENAME tokens represent symbols in the input,
127 and both convey their data as strings.
128 But a TYPENAME is a string that happens to be defined as a type
129 or builtin type name (such as int or char)
130 and a NAME is any other symbol.
131 Contexts where this distinction is not important can use the
132 nonterminal "name", which matches either NAME or TYPENAME. */
134 %token <tsval> RAW_STRING
135 %token <tsval> STRING
136 %token <tsval> CHAR
137 %token <ssym> NAME
138 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
139 %token <voidval> COMPLETE
140 /*%type <sval> name*/
141 %type <svec> string_exp
142 %type <ssym> name_not_typename
144 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
145 but which would parse as a valid number in the current input radix.
146 E.g. "c" when input_radix==16. Depending on the parse, it will be
147 turned into a name or into a number. */
148 %token <ssym> NAME_OR_INT
150 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
151 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
152 %token SIZEOF_KEYWORD
153 %token LEN_KEYWORD CAP_KEYWORD
154 %token NEW_KEYWORD
155 %token IOTA_KEYWORD NIL_KEYWORD
156 %token CONST_KEYWORD
157 %token DOTDOTDOT
158 %token ENTRY
159 %token ERROR
161 /* Special type cases. */
162 %token BYTE_KEYWORD /* An alias of uint8. */
164 %token <sval> DOLLAR_VARIABLE
166 %token <opcode> ASSIGN_MODIFY
168 %left ','
169 %left ABOVE_COMMA
170 %right '=' ASSIGN_MODIFY
171 %right '?'
172 %left OROR
173 %left ANDAND
174 %left '|'
175 %left '^'
176 %left '&'
177 %left ANDNOT
178 %left EQUAL NOTEQUAL
179 %left '<' '>' LEQ GEQ
180 %left LSH RSH
181 %left '@'
182 %left '+' '-'
183 %left '*' '/' '%'
184 %right UNARY INCREMENT DECREMENT
185 %right LEFT_ARROW '.' '[' '('
190 start : exp1
191 | type_exp
194 type_exp: type
195 { pstate->push_new<type_operation> ($1); }
198 /* Expressions, including the comma operator. */
199 exp1 : exp
200 | exp1 ',' exp
201 { pstate->wrap2<comma_operation> (); }
204 /* Expressions, not including the comma operator. */
205 exp : '*' exp %prec UNARY
206 { pstate->wrap<unop_ind_operation> (); }
209 exp : '&' exp %prec UNARY
210 { pstate->wrap<unop_addr_operation> (); }
213 exp : '-' exp %prec UNARY
214 { pstate->wrap<unary_neg_operation> (); }
217 exp : '+' exp %prec UNARY
218 { pstate->wrap<unary_plus_operation> (); }
221 exp : '!' exp %prec UNARY
222 { pstate->wrap<unary_logical_not_operation> (); }
225 exp : '^' exp %prec UNARY
226 { pstate->wrap<unary_complement_operation> (); }
229 exp : exp INCREMENT %prec UNARY
230 { pstate->wrap<postinc_operation> (); }
233 exp : exp DECREMENT %prec UNARY
234 { pstate->wrap<postdec_operation> (); }
237 /* foo->bar is not in Go. May want as a gdb extension. Later. */
239 exp : exp '.' name_not_typename
241 pstate->push_new<structop_operation>
242 (pstate->pop (), copy_name ($3.stoken));
246 exp : exp '.' name_not_typename COMPLETE
248 structop_base_operation *op
249 = new structop_operation (pstate->pop (),
250 copy_name ($3.stoken));
251 pstate->mark_struct_expression (op);
252 pstate->push (operation_up (op));
256 exp : exp '.' COMPLETE
258 structop_base_operation *op
259 = new structop_operation (pstate->pop (), "");
260 pstate->mark_struct_expression (op);
261 pstate->push (operation_up (op));
265 exp : exp '[' exp1 ']'
266 { pstate->wrap2<subscript_operation> (); }
269 exp : exp '('
270 /* This is to save the value of arglist_len
271 being accumulated by an outer function call. */
272 { pstate->start_arglist (); }
273 arglist ')' %prec LEFT_ARROW
275 std::vector<operation_up> args
276 = pstate->pop_vector (pstate->end_arglist ());
277 pstate->push_new<funcall_operation>
278 (pstate->pop (), std::move (args));
282 lcurly : '{'
283 { pstate->start_arglist (); }
286 arglist :
289 arglist : exp
290 { pstate->arglist_len = 1; }
293 arglist : arglist ',' exp %prec ABOVE_COMMA
294 { pstate->arglist_len++; }
297 rcurly : '}'
298 { $$ = pstate->end_arglist () - 1; }
301 exp : lcurly type rcurly exp %prec UNARY
303 pstate->push_new<unop_memval_operation>
304 (pstate->pop (), $2);
308 exp : type '(' exp ')' %prec UNARY
310 pstate->push_new<unop_cast_operation>
311 (pstate->pop (), $1);
315 exp : '(' exp1 ')'
319 /* Binary operators in order of decreasing precedence. */
321 exp : exp '@' exp
322 { pstate->wrap2<repeat_operation> (); }
325 exp : exp '*' exp
326 { pstate->wrap2<mul_operation> (); }
329 exp : exp '/' exp
330 { pstate->wrap2<div_operation> (); }
333 exp : exp '%' exp
334 { pstate->wrap2<rem_operation> (); }
337 exp : exp '+' exp
338 { pstate->wrap2<add_operation> (); }
341 exp : exp '-' exp
342 { pstate->wrap2<sub_operation> (); }
345 exp : exp LSH exp
346 { pstate->wrap2<lsh_operation> (); }
349 exp : exp RSH exp
350 { pstate->wrap2<rsh_operation> (); }
353 exp : exp EQUAL exp
354 { pstate->wrap2<equal_operation> (); }
357 exp : exp NOTEQUAL exp
358 { pstate->wrap2<notequal_operation> (); }
361 exp : exp LEQ exp
362 { pstate->wrap2<leq_operation> (); }
365 exp : exp GEQ exp
366 { pstate->wrap2<geq_operation> (); }
369 exp : exp '<' exp
370 { pstate->wrap2<less_operation> (); }
373 exp : exp '>' exp
374 { pstate->wrap2<gtr_operation> (); }
377 exp : exp '&' exp
378 { pstate->wrap2<bitwise_and_operation> (); }
381 exp : exp '^' exp
382 { pstate->wrap2<bitwise_xor_operation> (); }
385 exp : exp '|' exp
386 { pstate->wrap2<bitwise_ior_operation> (); }
389 exp : exp ANDAND exp
390 { pstate->wrap2<logical_and_operation> (); }
393 exp : exp OROR exp
394 { pstate->wrap2<logical_or_operation> (); }
397 exp : exp '?' exp ':' exp %prec '?'
399 operation_up last = pstate->pop ();
400 operation_up mid = pstate->pop ();
401 operation_up first = pstate->pop ();
402 pstate->push_new<ternop_cond_operation>
403 (std::move (first), std::move (mid),
404 std::move (last));
408 exp : exp '=' exp
409 { pstate->wrap2<assign_operation> (); }
412 exp : exp ASSIGN_MODIFY exp
414 operation_up rhs = pstate->pop ();
415 operation_up lhs = pstate->pop ();
416 pstate->push_new<assign_modify_operation>
417 ($2, std::move (lhs), std::move (rhs));
421 exp : INT
423 pstate->push_new<long_const_operation>
424 ($1.type, $1.val);
428 exp : CHAR
430 struct stoken_vector vec;
431 vec.len = 1;
432 vec.tokens = &$1;
433 pstate->push_c_string ($1.type, &vec);
437 exp : NAME_OR_INT
438 { YYSTYPE val;
439 parse_number (pstate, $1.stoken.ptr,
440 $1.stoken.length, 0, &val);
441 pstate->push_new<long_const_operation>
442 (val.typed_val_int.type,
443 val.typed_val_int.val);
448 exp : FLOAT
450 float_data data;
451 std::copy (std::begin ($1.val), std::end ($1.val),
452 std::begin (data));
453 pstate->push_new<float_const_operation> ($1.type, data);
457 exp : variable
460 exp : DOLLAR_VARIABLE
462 pstate->push_dollar ($1);
466 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
468 /* TODO(dje): Go objects in structs. */
469 /* TODO(dje): What's the right type here? */
470 struct type *size_type
471 = parse_type (pstate)->builtin_unsigned_int;
472 $3 = check_typedef ($3);
473 pstate->push_new<long_const_operation>
474 (size_type, (LONGEST) $3->length ());
478 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
480 /* TODO(dje): Go objects in structs. */
481 pstate->wrap<unop_sizeof_operation> ();
484 string_exp:
485 STRING
487 /* We copy the string here, and not in the
488 lexer, to guarantee that we do not leak a
489 string. */
490 /* Note that we NUL-terminate here, but just
491 for convenience. */
492 struct typed_stoken *vec = XNEW (struct typed_stoken);
493 $$.len = 1;
494 $$.tokens = vec;
496 vec->type = $1.type;
497 vec->length = $1.length;
498 vec->ptr = (char *) malloc ($1.length + 1);
499 memcpy (vec->ptr, $1.ptr, $1.length + 1);
502 | string_exp '+' STRING
504 /* Note that we NUL-terminate here, but just
505 for convenience. */
506 char *p;
507 ++$$.len;
508 $$.tokens = XRESIZEVEC (struct typed_stoken,
509 $$.tokens, $$.len);
511 p = (char *) malloc ($3.length + 1);
512 memcpy (p, $3.ptr, $3.length + 1);
514 $$.tokens[$$.len - 1].type = $3.type;
515 $$.tokens[$$.len - 1].length = $3.length;
516 $$.tokens[$$.len - 1].ptr = p;
520 exp : string_exp %prec ABOVE_COMMA
522 int i;
524 /* Always utf8. */
525 pstate->push_c_string (0, &$1);
526 for (i = 0; i < $1.len; ++i)
527 free ($1.tokens[i].ptr);
528 free ($1.tokens);
532 exp : TRUE_KEYWORD
533 { pstate->push_new<bool_operation> ($1); }
536 exp : FALSE_KEYWORD
537 { pstate->push_new<bool_operation> ($1); }
540 variable: name_not_typename ENTRY
541 { struct symbol *sym = $1.sym.symbol;
543 if (sym == NULL
544 || !sym->is_argument ()
545 || !symbol_read_needs_frame (sym))
546 error (_("@entry can be used only for function "
547 "parameters, not for \"%s\""),
548 copy_name ($1.stoken).c_str ());
550 pstate->push_new<var_entry_value_operation> (sym);
554 variable: name_not_typename
555 { struct block_symbol sym = $1.sym;
557 if (sym.symbol)
559 if (symbol_read_needs_frame (sym.symbol))
560 pstate->block_tracker->update (sym);
562 pstate->push_new<var_value_operation> (sym);
564 else if ($1.is_a_field_of_this)
566 /* TODO(dje): Can we get here?
567 E.g., via a mix of c++ and go? */
568 gdb_assert_not_reached ("go with `this' field");
570 else
572 struct bound_minimal_symbol msymbol;
573 std::string arg = copy_name ($1.stoken);
575 msymbol =
576 lookup_bound_minimal_symbol (arg.c_str ());
577 if (msymbol.minsym != NULL)
578 pstate->push_new<var_msym_value_operation>
579 (msymbol);
580 else if (!have_full_symbols ()
581 && !have_partial_symbols ())
582 error (_("No symbol table is loaded. "
583 "Use the \"file\" command."));
584 else
585 error (_("No symbol \"%s\" in current context."),
586 arg.c_str ());
591 /* TODO
592 method_exp: PACKAGENAME '.' name '.' name
598 type /* Implements (approximately): [*] type-specifier */
599 : '*' type
600 { $$ = lookup_pointer_type ($2); }
601 | TYPENAME
602 { $$ = $1.type; }
604 | STRUCT_KEYWORD name
605 { $$ = lookup_struct (copy_name ($2),
606 expression_context_block); }
608 | BYTE_KEYWORD
609 { $$ = builtin_go_type (pstate->gdbarch ())
610 ->builtin_uint8; }
613 /* TODO
614 name : NAME { $$ = $1.stoken; }
615 | TYPENAME { $$ = $1.stoken; }
616 | NAME_OR_INT { $$ = $1.stoken; }
620 name_not_typename
621 : NAME
622 /* These would be useful if name_not_typename was useful, but it is just
623 a fake for "variable", so these cause reduce/reduce conflicts because
624 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
625 =exp) or just an exp. If name_not_typename was ever used in an lvalue
626 context where only a name could occur, this might be useful.
627 | NAME_OR_INT
633 /* Take care of parsing a number (anything that starts with a digit).
634 Set yylval and return the token type; update lexptr.
635 LEN is the number of characters in it. */
637 /* FIXME: Needs some error checking for the float case. */
638 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
639 That will require moving the guts into a function that we both call
640 as our YYSTYPE is different than c-exp.y's */
642 static int
643 parse_number (struct parser_state *par_state,
644 const char *p, int len, int parsed_float, YYSTYPE *putithere)
646 ULONGEST n = 0;
647 ULONGEST prevn = 0;
649 int i = 0;
650 int c;
651 int base = input_radix;
652 int unsigned_p = 0;
654 /* Number of "L" suffixes encountered. */
655 int long_p = 0;
657 /* We have found a "L" or "U" suffix. */
658 int found_suffix = 0;
660 if (parsed_float)
662 const struct builtin_go_type *builtin_go_types
663 = builtin_go_type (par_state->gdbarch ());
665 /* Handle suffixes: 'f' for float32, 'l' for long double.
666 FIXME: This appears to be an extension -- do we want this? */
667 if (len >= 1 && tolower (p[len - 1]) == 'f')
669 putithere->typed_val_float.type
670 = builtin_go_types->builtin_float32;
671 len--;
673 else if (len >= 1 && tolower (p[len - 1]) == 'l')
675 putithere->typed_val_float.type
676 = parse_type (par_state)->builtin_long_double;
677 len--;
679 /* Default type for floating-point literals is float64. */
680 else
682 putithere->typed_val_float.type
683 = builtin_go_types->builtin_float64;
686 if (!parse_float (p, len,
687 putithere->typed_val_float.type,
688 putithere->typed_val_float.val))
689 return ERROR;
690 return FLOAT;
693 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
694 if (p[0] == '0' && len > 1)
695 switch (p[1])
697 case 'x':
698 case 'X':
699 if (len >= 3)
701 p += 2;
702 base = 16;
703 len -= 2;
705 break;
707 case 'b':
708 case 'B':
709 if (len >= 3)
711 p += 2;
712 base = 2;
713 len -= 2;
715 break;
717 case 't':
718 case 'T':
719 case 'd':
720 case 'D':
721 if (len >= 3)
723 p += 2;
724 base = 10;
725 len -= 2;
727 break;
729 default:
730 base = 8;
731 break;
734 while (len-- > 0)
736 c = *p++;
737 if (c >= 'A' && c <= 'Z')
738 c += 'a' - 'A';
739 if (c != 'l' && c != 'u')
740 n *= base;
741 if (c >= '0' && c <= '9')
743 if (found_suffix)
744 return ERROR;
745 n += i = c - '0';
747 else
749 if (base > 10 && c >= 'a' && c <= 'f')
751 if (found_suffix)
752 return ERROR;
753 n += i = c - 'a' + 10;
755 else if (c == 'l')
757 ++long_p;
758 found_suffix = 1;
760 else if (c == 'u')
762 unsigned_p = 1;
763 found_suffix = 1;
765 else
766 return ERROR; /* Char not a digit */
768 if (i >= base)
769 return ERROR; /* Invalid digit in this base. */
771 if (c != 'l' && c != 'u')
773 /* Test for overflow. */
774 if (n == 0 && prevn == 0)
776 else if (prevn >= n)
777 error (_("Numeric constant too large."));
779 prevn = n;
782 /* An integer constant is an int, a long, or a long long. An L
783 suffix forces it to be long; an LL suffix forces it to be long
784 long. If not forced to a larger size, it gets the first type of
785 the above that it fits in. To figure out whether it fits, we
786 shift it right and see whether anything remains. Note that we
787 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
788 operation, because many compilers will warn about such a shift
789 (which always produces a zero result). Sometimes gdbarch_int_bit
790 or gdbarch_long_bit will be that big, sometimes not. To deal with
791 the case where it is we just always shift the value more than
792 once, with fewer bits each time. */
794 int int_bits = gdbarch_int_bit (par_state->gdbarch ());
795 int long_bits = gdbarch_long_bit (par_state->gdbarch ());
796 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
797 bool have_signed = !unsigned_p;
798 bool have_int = long_p == 0;
799 bool have_long = long_p <= 1;
800 if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
801 putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
802 else if (have_int && fits_in_type (1, n, int_bits, false))
803 putithere->typed_val_int.type
804 = parse_type (par_state)->builtin_unsigned_int;
805 else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
806 putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
807 else if (have_long && fits_in_type (1, n, long_bits, false))
808 putithere->typed_val_int.type
809 = parse_type (par_state)->builtin_unsigned_long;
810 else if (have_signed && fits_in_type (1, n, long_long_bits, true))
811 putithere->typed_val_int.type
812 = parse_type (par_state)->builtin_long_long;
813 else if (fits_in_type (1, n, long_long_bits, false))
814 putithere->typed_val_int.type
815 = parse_type (par_state)->builtin_unsigned_long_long;
816 else
817 error (_("Numeric constant too large."));
818 putithere->typed_val_int.val = n;
820 return INT;
823 /* Temporary obstack used for holding strings. */
824 static struct obstack tempbuf;
825 static int tempbuf_init;
827 /* Parse a string or character literal from TOKPTR. The string or
828 character may be wide or unicode. *OUTPTR is set to just after the
829 end of the literal in the input string. The resulting token is
830 stored in VALUE. This returns a token value, either STRING or
831 CHAR, depending on what was parsed. *HOST_CHARS is set to the
832 number of host characters in the literal. */
834 static int
835 parse_string_or_char (const char *tokptr, const char **outptr,
836 struct typed_stoken *value, int *host_chars)
838 int quote;
840 /* Build the gdb internal form of the input string in tempbuf. Note
841 that the buffer is null byte terminated *only* for the
842 convenience of debugging gdb itself and printing the buffer
843 contents when the buffer contains no embedded nulls. Gdb does
844 not depend upon the buffer being null byte terminated, it uses
845 the length string instead. This allows gdb to handle C strings
846 (as well as strings in other languages) with embedded null
847 bytes */
849 if (!tempbuf_init)
850 tempbuf_init = 1;
851 else
852 obstack_free (&tempbuf, NULL);
853 obstack_init (&tempbuf);
855 /* Skip the quote. */
856 quote = *tokptr;
857 ++tokptr;
859 *host_chars = 0;
861 while (*tokptr)
863 char c = *tokptr;
864 if (c == '\\')
866 ++tokptr;
867 *host_chars += c_parse_escape (&tokptr, &tempbuf);
869 else if (c == quote)
870 break;
871 else
873 obstack_1grow (&tempbuf, c);
874 ++tokptr;
875 /* FIXME: this does the wrong thing with multi-byte host
876 characters. We could use mbrlen here, but that would
877 make "set host-charset" a bit less useful. */
878 ++*host_chars;
882 if (*tokptr != quote)
884 if (quote == '"')
885 error (_("Unterminated string in expression."));
886 else
887 error (_("Unmatched single quote."));
889 ++tokptr;
891 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
892 value->ptr = (char *) obstack_base (&tempbuf);
893 value->length = obstack_object_size (&tempbuf);
895 *outptr = tokptr;
897 return quote == '\'' ? CHAR : STRING;
900 struct go_token
902 const char *oper;
903 int token;
904 enum exp_opcode opcode;
907 static const struct go_token tokentab3[] =
909 {">>=", ASSIGN_MODIFY, BINOP_RSH},
910 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
911 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
912 {"...", DOTDOTDOT, OP_NULL},
915 static const struct go_token tokentab2[] =
917 {"+=", ASSIGN_MODIFY, BINOP_ADD},
918 {"-=", ASSIGN_MODIFY, BINOP_SUB},
919 {"*=", ASSIGN_MODIFY, BINOP_MUL},
920 {"/=", ASSIGN_MODIFY, BINOP_DIV},
921 {"%=", ASSIGN_MODIFY, BINOP_REM},
922 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
923 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
924 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
925 {"++", INCREMENT, OP_NULL},
926 {"--", DECREMENT, OP_NULL},
927 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */
928 {"<-", LEFT_ARROW, OP_NULL},
929 {"&&", ANDAND, OP_NULL},
930 {"||", OROR, OP_NULL},
931 {"<<", LSH, OP_NULL},
932 {">>", RSH, OP_NULL},
933 {"==", EQUAL, OP_NULL},
934 {"!=", NOTEQUAL, OP_NULL},
935 {"<=", LEQ, OP_NULL},
936 {">=", GEQ, OP_NULL},
937 /*{"&^", ANDNOT, OP_NULL}, TODO */
940 /* Identifier-like tokens. */
941 static const struct go_token ident_tokens[] =
943 {"true", TRUE_KEYWORD, OP_NULL},
944 {"false", FALSE_KEYWORD, OP_NULL},
945 {"nil", NIL_KEYWORD, OP_NULL},
946 {"const", CONST_KEYWORD, OP_NULL},
947 {"struct", STRUCT_KEYWORD, OP_NULL},
948 {"type", TYPE_KEYWORD, OP_NULL},
949 {"interface", INTERFACE_KEYWORD, OP_NULL},
950 {"chan", CHAN_KEYWORD, OP_NULL},
951 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
952 {"len", LEN_KEYWORD, OP_NULL},
953 {"cap", CAP_KEYWORD, OP_NULL},
954 {"new", NEW_KEYWORD, OP_NULL},
955 {"iota", IOTA_KEYWORD, OP_NULL},
958 /* This is set if a NAME token appeared at the very end of the input
959 string, with no whitespace separating the name from the EOF. This
960 is used only when parsing to do field name completion. */
961 static int saw_name_at_eof;
963 /* This is set if the previously-returned token was a structure
964 operator -- either '.' or ARROW. This is used only when parsing to
965 do field name completion. */
966 static int last_was_structop;
968 /* Depth of parentheses. */
969 static int paren_depth;
971 /* Read one token, getting characters through lexptr. */
973 static int
974 lex_one_token (struct parser_state *par_state)
976 int c;
977 int namelen;
978 const char *tokstart;
979 int saw_structop = last_was_structop;
981 last_was_structop = 0;
983 retry:
985 par_state->prev_lexptr = par_state->lexptr;
987 tokstart = par_state->lexptr;
988 /* See if it is a special token of length 3. */
989 for (const auto &token : tokentab3)
990 if (strncmp (tokstart, token.oper, 3) == 0)
992 par_state->lexptr += 3;
993 yylval.opcode = token.opcode;
994 return token.token;
997 /* See if it is a special token of length 2. */
998 for (const auto &token : tokentab2)
999 if (strncmp (tokstart, token.oper, 2) == 0)
1001 par_state->lexptr += 2;
1002 yylval.opcode = token.opcode;
1003 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1004 setting last_was_structop here. */
1005 return token.token;
1008 switch (c = *tokstart)
1010 case 0:
1011 if (saw_name_at_eof)
1013 saw_name_at_eof = 0;
1014 return COMPLETE;
1016 else if (saw_structop)
1017 return COMPLETE;
1018 else
1019 return 0;
1021 case ' ':
1022 case '\t':
1023 case '\n':
1024 par_state->lexptr++;
1025 goto retry;
1027 case '[':
1028 case '(':
1029 paren_depth++;
1030 par_state->lexptr++;
1031 return c;
1033 case ']':
1034 case ')':
1035 if (paren_depth == 0)
1036 return 0;
1037 paren_depth--;
1038 par_state->lexptr++;
1039 return c;
1041 case ',':
1042 if (pstate->comma_terminates
1043 && paren_depth == 0)
1044 return 0;
1045 par_state->lexptr++;
1046 return c;
1048 case '.':
1049 /* Might be a floating point number. */
1050 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1052 if (pstate->parse_completion)
1053 last_was_structop = 1;
1054 goto symbol; /* Nope, must be a symbol. */
1056 [[fallthrough]];
1058 case '0':
1059 case '1':
1060 case '2':
1061 case '3':
1062 case '4':
1063 case '5':
1064 case '6':
1065 case '7':
1066 case '8':
1067 case '9':
1069 /* It's a number. */
1070 int got_dot = 0, got_e = 0, toktype;
1071 const char *p = tokstart;
1072 int hex = input_radix > 10;
1074 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1076 p += 2;
1077 hex = 1;
1080 for (;; ++p)
1082 /* This test includes !hex because 'e' is a valid hex digit
1083 and thus does not indicate a floating point number when
1084 the radix is hex. */
1085 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1086 got_dot = got_e = 1;
1087 /* This test does not include !hex, because a '.' always indicates
1088 a decimal floating point number regardless of the radix. */
1089 else if (!got_dot && *p == '.')
1090 got_dot = 1;
1091 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1092 && (*p == '-' || *p == '+'))
1093 /* This is the sign of the exponent, not the end of the
1094 number. */
1095 continue;
1096 /* We will take any letters or digits. parse_number will
1097 complain if past the radix, or if L or U are not final. */
1098 else if ((*p < '0' || *p > '9')
1099 && ((*p < 'a' || *p > 'z')
1100 && (*p < 'A' || *p > 'Z')))
1101 break;
1103 toktype = parse_number (par_state, tokstart, p - tokstart,
1104 got_dot|got_e, &yylval);
1105 if (toktype == ERROR)
1107 char *err_copy = (char *) alloca (p - tokstart + 1);
1109 memcpy (err_copy, tokstart, p - tokstart);
1110 err_copy[p - tokstart] = 0;
1111 error (_("Invalid number \"%s\"."), err_copy);
1113 par_state->lexptr = p;
1114 return toktype;
1117 case '@':
1119 const char *p = &tokstart[1];
1120 size_t len = strlen ("entry");
1122 while (isspace (*p))
1123 p++;
1124 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1125 && p[len] != '_')
1127 par_state->lexptr = &p[len];
1128 return ENTRY;
1131 [[fallthrough]];
1132 case '+':
1133 case '-':
1134 case '*':
1135 case '/':
1136 case '%':
1137 case '|':
1138 case '&':
1139 case '^':
1140 case '~':
1141 case '!':
1142 case '<':
1143 case '>':
1144 case '?':
1145 case ':':
1146 case '=':
1147 case '{':
1148 case '}':
1149 symbol:
1150 par_state->lexptr++;
1151 return c;
1153 case '\'':
1154 case '"':
1155 case '`':
1157 int host_len;
1158 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1159 &yylval.tsval, &host_len);
1160 if (result == CHAR)
1162 if (host_len == 0)
1163 error (_("Empty character constant."));
1164 else if (host_len > 2 && c == '\'')
1166 ++tokstart;
1167 namelen = par_state->lexptr - tokstart - 1;
1168 goto tryname;
1170 else if (host_len > 1)
1171 error (_("Invalid character constant."));
1173 return result;
1177 if (!(c == '_' || c == '$'
1178 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1179 /* We must have come across a bad character (e.g. ';'). */
1180 error (_("Invalid character '%c' in expression."), c);
1182 /* It's a name. See how long it is. */
1183 namelen = 0;
1184 for (c = tokstart[namelen];
1185 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1186 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1188 c = tokstart[++namelen];
1191 /* The token "if" terminates the expression and is NOT removed from
1192 the input stream. It doesn't count if it appears in the
1193 expansion of a macro. */
1194 if (namelen == 2
1195 && tokstart[0] == 'i'
1196 && tokstart[1] == 'f')
1198 return 0;
1201 /* For the same reason (breakpoint conditions), "thread N"
1202 terminates the expression. "thread" could be an identifier, but
1203 an identifier is never followed by a number without intervening
1204 punctuation.
1205 Handle abbreviations of these, similarly to
1206 breakpoint.c:find_condition_and_thread.
1207 TODO: Watch for "goroutine" here? */
1208 if (namelen >= 1
1209 && strncmp (tokstart, "thread", namelen) == 0
1210 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1212 const char *p = tokstart + namelen + 1;
1214 while (*p == ' ' || *p == '\t')
1215 p++;
1216 if (*p >= '0' && *p <= '9')
1217 return 0;
1220 par_state->lexptr += namelen;
1222 tryname:
1224 yylval.sval.ptr = tokstart;
1225 yylval.sval.length = namelen;
1227 /* Catch specific keywords. */
1228 std::string copy = copy_name (yylval.sval);
1229 for (const auto &token : ident_tokens)
1230 if (copy == token.oper)
1232 /* It is ok to always set this, even though we don't always
1233 strictly need to. */
1234 yylval.opcode = token.opcode;
1235 return token.token;
1238 if (*tokstart == '$')
1239 return DOLLAR_VARIABLE;
1241 if (pstate->parse_completion && *par_state->lexptr == '\0')
1242 saw_name_at_eof = 1;
1243 return NAME;
1246 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1247 struct go_token_and_value
1249 int token;
1250 YYSTYPE value;
1253 /* A FIFO of tokens that have been read but not yet returned to the
1254 parser. */
1255 static std::vector<go_token_and_value> token_fifo;
1257 /* Non-zero if the lexer should return tokens from the FIFO. */
1258 static int popping;
1260 /* Temporary storage for yylex; this holds symbol names as they are
1261 built up. */
1262 static auto_obstack name_obstack;
1264 /* Build "package.name" in name_obstack.
1265 For convenience of the caller, the name is NUL-terminated,
1266 but the NUL is not included in the recorded length. */
1268 static struct stoken
1269 build_packaged_name (const char *package, int package_len,
1270 const char *name, int name_len)
1272 struct stoken result;
1274 name_obstack.clear ();
1275 obstack_grow (&name_obstack, package, package_len);
1276 obstack_grow_str (&name_obstack, ".");
1277 obstack_grow (&name_obstack, name, name_len);
1278 obstack_grow (&name_obstack, "", 1);
1279 result.ptr = (char *) obstack_base (&name_obstack);
1280 result.length = obstack_object_size (&name_obstack) - 1;
1282 return result;
1285 /* Return non-zero if NAME is a package name.
1286 BLOCK is the scope in which to interpret NAME; this can be NULL
1287 to mean the global scope. */
1289 static int
1290 package_name_p (const char *name, const struct block *block)
1292 struct symbol *sym;
1293 struct field_of_this_result is_a_field_of_this;
1295 sym = lookup_symbol (name, block, SEARCH_TYPE_DOMAIN,
1296 &is_a_field_of_this).symbol;
1298 if (sym
1299 && sym->aclass () == LOC_TYPEDEF
1300 && sym->type ()->code () == TYPE_CODE_MODULE)
1301 return 1;
1303 return 0;
1306 /* Classify a (potential) function in the "unsafe" package.
1307 We fold these into "keywords" to keep things simple, at least until
1308 something more complex is warranted. */
1310 static int
1311 classify_unsafe_function (struct stoken function_name)
1313 std::string copy = copy_name (function_name);
1315 if (copy == "Sizeof")
1317 yylval.sval = function_name;
1318 return SIZEOF_KEYWORD;
1321 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ());
1324 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1325 The contents of the token are in `yylval'.
1326 Updates yylval and returns the new token type.
1328 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1330 static int
1331 classify_packaged_name (const struct block *block)
1333 struct block_symbol sym;
1334 struct field_of_this_result is_a_field_of_this;
1336 std::string copy = copy_name (yylval.sval);
1338 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1340 if (sym.symbol)
1342 yylval.ssym.sym = sym;
1343 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1346 return NAME;
1349 /* Classify a NAME token.
1350 The contents of the token are in `yylval'.
1351 Updates yylval and returns the new token type.
1352 BLOCK is the block in which lookups start; this can be NULL
1353 to mean the global scope.
1355 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1357 static int
1358 classify_name (struct parser_state *par_state, const struct block *block)
1360 struct type *type;
1361 struct block_symbol sym;
1362 struct field_of_this_result is_a_field_of_this;
1364 std::string copy = copy_name (yylval.sval);
1366 /* Try primitive types first so they win over bad/weird debug info. */
1367 type = language_lookup_primitive_type (par_state->language (),
1368 par_state->gdbarch (),
1369 copy.c_str ());
1370 if (type != NULL)
1372 /* NOTE: We take advantage of the fact that yylval coming in was a
1373 NAME, and that struct ttype is a compatible extension of struct
1374 stoken, so yylval.tsym.stoken is already filled in. */
1375 yylval.tsym.type = type;
1376 return TYPENAME;
1379 /* TODO: What about other types? */
1381 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1383 if (sym.symbol)
1385 yylval.ssym.sym = sym;
1386 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1387 return NAME;
1390 /* If we didn't find a symbol, look again in the current package.
1391 This is to, e.g., make "p global_var" work without having to specify
1392 the package name. We intentionally only looks for objects in the
1393 current package. */
1396 gdb::unique_xmalloc_ptr<char> current_package_name
1397 = go_block_package_name (block);
1399 if (current_package_name != NULL)
1401 struct stoken sval =
1402 build_packaged_name (current_package_name.get (),
1403 strlen (current_package_name.get ()),
1404 copy.c_str (), copy.size ());
1406 sym = lookup_symbol (sval.ptr, block, SEARCH_VFT,
1407 &is_a_field_of_this);
1408 if (sym.symbol)
1410 yylval.ssym.stoken = sval;
1411 yylval.ssym.sym = sym;
1412 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1413 return NAME;
1418 /* Input names that aren't symbols but ARE valid hex numbers, when
1419 the input radix permits them, can be names or numbers depending
1420 on the parse. Note we support radixes > 16 here. */
1421 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1422 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1424 YYSTYPE newlval; /* Its value is ignored. */
1425 int hextype = parse_number (par_state, copy.c_str (),
1426 yylval.sval.length, 0, &newlval);
1427 if (hextype == INT)
1429 yylval.ssym.sym.symbol = NULL;
1430 yylval.ssym.sym.block = NULL;
1431 yylval.ssym.is_a_field_of_this = 0;
1432 return NAME_OR_INT;
1436 yylval.ssym.sym.symbol = NULL;
1437 yylval.ssym.sym.block = NULL;
1438 yylval.ssym.is_a_field_of_this = 0;
1439 return NAME;
1442 /* This is taken from c-exp.y mostly to get something working.
1443 The basic structure has been kept because we may yet need some of it. */
1445 static int
1446 yylex (void)
1448 go_token_and_value current, next;
1450 if (popping && !token_fifo.empty ())
1452 go_token_and_value tv = token_fifo[0];
1453 token_fifo.erase (token_fifo.begin ());
1454 yylval = tv.value;
1455 /* There's no need to fall through to handle package.name
1456 as that can never happen here. In theory. */
1457 return tv.token;
1459 popping = 0;
1461 current.token = lex_one_token (pstate);
1463 /* TODO: Need a way to force specifying name1 as a package.
1464 .name1.name2 ? */
1466 if (current.token != NAME)
1467 return current.token;
1469 /* See if we have "name1 . name2". */
1471 current.value = yylval;
1472 next.token = lex_one_token (pstate);
1473 next.value = yylval;
1475 if (next.token == '.')
1477 go_token_and_value name2;
1479 name2.token = lex_one_token (pstate);
1480 name2.value = yylval;
1482 if (name2.token == NAME)
1484 /* Ok, we have "name1 . name2". */
1485 std::string copy = copy_name (current.value.sval);
1487 if (copy == "unsafe")
1489 popping = 1;
1490 return classify_unsafe_function (name2.value.sval);
1493 if (package_name_p (copy.c_str (), pstate->expression_context_block))
1495 popping = 1;
1496 yylval.sval = build_packaged_name (current.value.sval.ptr,
1497 current.value.sval.length,
1498 name2.value.sval.ptr,
1499 name2.value.sval.length);
1500 return classify_packaged_name (pstate->expression_context_block);
1504 token_fifo.push_back (next);
1505 token_fifo.push_back (name2);
1507 else
1508 token_fifo.push_back (next);
1510 /* If we arrive here we don't have a package-qualified name. */
1512 popping = 1;
1513 yylval = current.value;
1514 return classify_name (pstate, pstate->expression_context_block);
1517 /* See language.h. */
1520 go_language::parser (struct parser_state *par_state) const
1522 /* Setting up the parser state. */
1523 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1524 gdb_assert (par_state != NULL);
1525 pstate = par_state;
1527 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1528 par_state->debug);
1530 /* Initialize some state used by the lexer. */
1531 last_was_structop = 0;
1532 saw_name_at_eof = 0;
1533 paren_depth = 0;
1535 token_fifo.clear ();
1536 popping = 0;
1537 name_obstack.clear ();
1539 int result = yyparse ();
1540 if (!result)
1541 pstate->set_operation (pstate->pop ());
1542 return result;
1545 static void
1546 yyerror (const char *msg)
1548 pstate->parse_error (msg);