Automatic date update in version.in
[binutils-gdb.git] / gdb / go-exp.y
blob2e56e4bff6105f613504a9ef58742637d0b644cb
1 /* YACC parser for Go expressions, for GDB.
3 Copyright (C) 2012-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, p-exp.y. */
22 /* Parse a Go expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
39 /* Known bugs or limitations:
41 - Unicode
42 - &^
43 - '_' (blank identifier)
44 - automatic deref of pointers
45 - method expressions
46 - interfaces, channels, etc.
48 And lots of other things.
49 I'm sure there's some cleanup to do.
54 #include "defs.h"
55 #include <ctype.h>
56 #include "expression.h"
57 #include "value.h"
58 #include "parser-defs.h"
59 #include "language.h"
60 #include "c-lang.h"
61 #include "go-lang.h"
62 #include "charset.h"
63 #include "block.h"
64 #include "expop.h"
66 #define parse_type(ps) builtin_type (ps->gdbarch ())
68 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
69 etc). */
70 #define GDB_YY_REMAP_PREFIX go_
71 #include "yy-remap.h"
73 /* The state of the parser, used internally when we are parsing the
74 expression. */
76 static struct parser_state *pstate = NULL;
78 int yyparse (void);
80 static int yylex (void);
82 static void yyerror (const char *);
86 /* Although the yacc "value" of an expression is not used,
87 since the result is stored in the structure being created,
88 other node types do have values. */
90 %union
92 LONGEST lval;
93 struct {
94 LONGEST val;
95 struct type *type;
96 } typed_val_int;
97 struct {
98 gdb_byte val[16];
99 struct type *type;
100 } typed_val_float;
101 struct stoken sval;
102 struct symtoken ssym;
103 struct type *tval;
104 struct typed_stoken tsval;
105 struct ttype tsym;
106 int voidval;
107 enum exp_opcode opcode;
108 struct internalvar *ivar;
109 struct stoken_vector svec;
113 /* YYSTYPE gets defined by %union. */
114 static int parse_number (struct parser_state *,
115 const char *, int, int, YYSTYPE *);
117 using namespace expr;
120 %type <voidval> exp exp1 type_exp start variable lcurly
121 %type <lval> rcurly
122 %type <tval> type
124 %token <typed_val_int> INT
125 %token <typed_val_float> FLOAT
127 /* Both NAME and TYPENAME tokens represent symbols in the input,
128 and both convey their data as strings.
129 But a TYPENAME is a string that happens to be defined as a type
130 or builtin type name (such as int or char)
131 and a NAME is any other symbol.
132 Contexts where this distinction is not important can use the
133 nonterminal "name", which matches either NAME or TYPENAME. */
135 %token <tsval> RAW_STRING
136 %token <tsval> STRING
137 %token <tsval> CHAR
138 %token <ssym> NAME
139 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */
140 %token <voidval> COMPLETE
141 /*%type <sval> name*/
142 %type <svec> string_exp
143 %type <ssym> name_not_typename
145 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
146 but which would parse as a valid number in the current input radix.
147 E.g. "c" when input_radix==16. Depending on the parse, it will be
148 turned into a name or into a number. */
149 %token <ssym> NAME_OR_INT
151 %token <lval> TRUE_KEYWORD FALSE_KEYWORD
152 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD
153 %token SIZEOF_KEYWORD
154 %token LEN_KEYWORD CAP_KEYWORD
155 %token NEW_KEYWORD
156 %token IOTA_KEYWORD NIL_KEYWORD
157 %token CONST_KEYWORD
158 %token DOTDOTDOT
159 %token ENTRY
160 %token ERROR
162 /* Special type cases. */
163 %token BYTE_KEYWORD /* An alias of uint8. */
165 %token <sval> DOLLAR_VARIABLE
167 %token <opcode> ASSIGN_MODIFY
169 %left ','
170 %left ABOVE_COMMA
171 %right '=' ASSIGN_MODIFY
172 %right '?'
173 %left OROR
174 %left ANDAND
175 %left '|'
176 %left '^'
177 %left '&'
178 %left ANDNOT
179 %left EQUAL NOTEQUAL
180 %left '<' '>' LEQ GEQ
181 %left LSH RSH
182 %left '@'
183 %left '+' '-'
184 %left '*' '/' '%'
185 %right UNARY INCREMENT DECREMENT
186 %right LEFT_ARROW '.' '[' '('
191 start : exp1
192 | type_exp
195 type_exp: type
196 { pstate->push_new<type_operation> ($1); }
199 /* Expressions, including the comma operator. */
200 exp1 : exp
201 | exp1 ',' exp
202 { pstate->wrap2<comma_operation> (); }
205 /* Expressions, not including the comma operator. */
206 exp : '*' exp %prec UNARY
207 { pstate->wrap<unop_ind_operation> (); }
210 exp : '&' exp %prec UNARY
211 { pstate->wrap<unop_addr_operation> (); }
214 exp : '-' exp %prec UNARY
215 { pstate->wrap<unary_neg_operation> (); }
218 exp : '+' exp %prec UNARY
219 { pstate->wrap<unary_plus_operation> (); }
222 exp : '!' exp %prec UNARY
223 { pstate->wrap<unary_logical_not_operation> (); }
226 exp : '^' exp %prec UNARY
227 { pstate->wrap<unary_complement_operation> (); }
230 exp : exp INCREMENT %prec UNARY
231 { pstate->wrap<postinc_operation> (); }
234 exp : exp DECREMENT %prec UNARY
235 { pstate->wrap<postdec_operation> (); }
238 /* foo->bar is not in Go. May want as a gdb extension. Later. */
240 exp : exp '.' name_not_typename
242 pstate->push_new<structop_operation>
243 (pstate->pop (), copy_name ($3.stoken));
247 exp : exp '.' name_not_typename COMPLETE
249 structop_base_operation *op
250 = new structop_operation (pstate->pop (),
251 copy_name ($3.stoken));
252 pstate->mark_struct_expression (op);
253 pstate->push (operation_up (op));
257 exp : exp '.' COMPLETE
259 structop_base_operation *op
260 = new structop_operation (pstate->pop (), "");
261 pstate->mark_struct_expression (op);
262 pstate->push (operation_up (op));
266 exp : exp '[' exp1 ']'
267 { pstate->wrap2<subscript_operation> (); }
270 exp : exp '('
271 /* This is to save the value of arglist_len
272 being accumulated by an outer function call. */
273 { pstate->start_arglist (); }
274 arglist ')' %prec LEFT_ARROW
276 std::vector<operation_up> args
277 = pstate->pop_vector (pstate->end_arglist ());
278 pstate->push_new<funcall_operation>
279 (pstate->pop (), std::move (args));
283 lcurly : '{'
284 { pstate->start_arglist (); }
287 arglist :
290 arglist : exp
291 { pstate->arglist_len = 1; }
294 arglist : arglist ',' exp %prec ABOVE_COMMA
295 { pstate->arglist_len++; }
298 rcurly : '}'
299 { $$ = pstate->end_arglist () - 1; }
302 exp : lcurly type rcurly exp %prec UNARY
304 pstate->push_new<unop_memval_operation>
305 (pstate->pop (), $2);
309 exp : type '(' exp ')' %prec UNARY
311 pstate->push_new<unop_cast_operation>
312 (pstate->pop (), $1);
316 exp : '(' exp1 ')'
320 /* Binary operators in order of decreasing precedence. */
322 exp : exp '@' exp
323 { pstate->wrap2<repeat_operation> (); }
326 exp : exp '*' exp
327 { pstate->wrap2<mul_operation> (); }
330 exp : exp '/' exp
331 { pstate->wrap2<div_operation> (); }
334 exp : exp '%' exp
335 { pstate->wrap2<rem_operation> (); }
338 exp : exp '+' exp
339 { pstate->wrap2<add_operation> (); }
342 exp : exp '-' exp
343 { pstate->wrap2<sub_operation> (); }
346 exp : exp LSH exp
347 { pstate->wrap2<lsh_operation> (); }
350 exp : exp RSH exp
351 { pstate->wrap2<rsh_operation> (); }
354 exp : exp EQUAL exp
355 { pstate->wrap2<equal_operation> (); }
358 exp : exp NOTEQUAL exp
359 { pstate->wrap2<notequal_operation> (); }
362 exp : exp LEQ exp
363 { pstate->wrap2<leq_operation> (); }
366 exp : exp GEQ exp
367 { pstate->wrap2<geq_operation> (); }
370 exp : exp '<' exp
371 { pstate->wrap2<less_operation> (); }
374 exp : exp '>' exp
375 { pstate->wrap2<gtr_operation> (); }
378 exp : exp '&' exp
379 { pstate->wrap2<bitwise_and_operation> (); }
382 exp : exp '^' exp
383 { pstate->wrap2<bitwise_xor_operation> (); }
386 exp : exp '|' exp
387 { pstate->wrap2<bitwise_ior_operation> (); }
390 exp : exp ANDAND exp
391 { pstate->wrap2<logical_and_operation> (); }
394 exp : exp OROR exp
395 { pstate->wrap2<logical_or_operation> (); }
398 exp : exp '?' exp ':' exp %prec '?'
400 operation_up last = pstate->pop ();
401 operation_up mid = pstate->pop ();
402 operation_up first = pstate->pop ();
403 pstate->push_new<ternop_cond_operation>
404 (std::move (first), std::move (mid),
405 std::move (last));
409 exp : exp '=' exp
410 { pstate->wrap2<assign_operation> (); }
413 exp : exp ASSIGN_MODIFY exp
415 operation_up rhs = pstate->pop ();
416 operation_up lhs = pstate->pop ();
417 pstate->push_new<assign_modify_operation>
418 ($2, std::move (lhs), std::move (rhs));
422 exp : INT
424 pstate->push_new<long_const_operation>
425 ($1.type, $1.val);
429 exp : CHAR
431 struct stoken_vector vec;
432 vec.len = 1;
433 vec.tokens = &$1;
434 pstate->push_c_string ($1.type, &vec);
438 exp : NAME_OR_INT
439 { YYSTYPE val;
440 parse_number (pstate, $1.stoken.ptr,
441 $1.stoken.length, 0, &val);
442 pstate->push_new<long_const_operation>
443 (val.typed_val_int.type,
444 val.typed_val_int.val);
449 exp : FLOAT
451 float_data data;
452 std::copy (std::begin ($1.val), std::end ($1.val),
453 std::begin (data));
454 pstate->push_new<float_const_operation> ($1.type, data);
458 exp : variable
461 exp : DOLLAR_VARIABLE
463 pstate->push_dollar ($1);
467 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY
469 /* TODO(dje): Go objects in structs. */
470 /* TODO(dje): What's the right type here? */
471 struct type *size_type
472 = parse_type (pstate)->builtin_unsigned_int;
473 $3 = check_typedef ($3);
474 pstate->push_new<long_const_operation>
475 (size_type, (LONGEST) $3->length ());
479 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY
481 /* TODO(dje): Go objects in structs. */
482 pstate->wrap<unop_sizeof_operation> ();
485 string_exp:
486 STRING
488 /* We copy the string here, and not in the
489 lexer, to guarantee that we do not leak a
490 string. */
491 /* Note that we NUL-terminate here, but just
492 for convenience. */
493 struct typed_stoken *vec = XNEW (struct typed_stoken);
494 $$.len = 1;
495 $$.tokens = vec;
497 vec->type = $1.type;
498 vec->length = $1.length;
499 vec->ptr = (char *) malloc ($1.length + 1);
500 memcpy (vec->ptr, $1.ptr, $1.length + 1);
503 | string_exp '+' STRING
505 /* Note that we NUL-terminate here, but just
506 for convenience. */
507 char *p;
508 ++$$.len;
509 $$.tokens = XRESIZEVEC (struct typed_stoken,
510 $$.tokens, $$.len);
512 p = (char *) malloc ($3.length + 1);
513 memcpy (p, $3.ptr, $3.length + 1);
515 $$.tokens[$$.len - 1].type = $3.type;
516 $$.tokens[$$.len - 1].length = $3.length;
517 $$.tokens[$$.len - 1].ptr = p;
521 exp : string_exp %prec ABOVE_COMMA
523 int i;
525 /* Always utf8. */
526 pstate->push_c_string (0, &$1);
527 for (i = 0; i < $1.len; ++i)
528 free ($1.tokens[i].ptr);
529 free ($1.tokens);
533 exp : TRUE_KEYWORD
534 { pstate->push_new<bool_operation> ($1); }
537 exp : FALSE_KEYWORD
538 { pstate->push_new<bool_operation> ($1); }
541 variable: name_not_typename ENTRY
542 { struct symbol *sym = $1.sym.symbol;
544 if (sym == NULL
545 || !sym->is_argument ()
546 || !symbol_read_needs_frame (sym))
547 error (_("@entry can be used only for function "
548 "parameters, not for \"%s\""),
549 copy_name ($1.stoken).c_str ());
551 pstate->push_new<var_entry_value_operation> (sym);
555 variable: name_not_typename
556 { struct block_symbol sym = $1.sym;
558 if (sym.symbol)
560 if (symbol_read_needs_frame (sym.symbol))
561 pstate->block_tracker->update (sym);
563 pstate->push_new<var_value_operation> (sym);
565 else if ($1.is_a_field_of_this)
567 /* TODO(dje): Can we get here?
568 E.g., via a mix of c++ and go? */
569 gdb_assert_not_reached ("go with `this' field");
571 else
573 struct bound_minimal_symbol msymbol;
574 std::string arg = copy_name ($1.stoken);
576 msymbol =
577 lookup_bound_minimal_symbol (arg.c_str ());
578 if (msymbol.minsym != NULL)
579 pstate->push_new<var_msym_value_operation>
580 (msymbol);
581 else if (!have_full_symbols ()
582 && !have_partial_symbols ())
583 error (_("No symbol table is loaded. "
584 "Use the \"file\" command."));
585 else
586 error (_("No symbol \"%s\" in current context."),
587 arg.c_str ());
592 /* TODO
593 method_exp: PACKAGENAME '.' name '.' name
599 type /* Implements (approximately): [*] type-specifier */
600 : '*' type
601 { $$ = lookup_pointer_type ($2); }
602 | TYPENAME
603 { $$ = $1.type; }
605 | STRUCT_KEYWORD name
606 { $$ = lookup_struct (copy_name ($2),
607 expression_context_block); }
609 | BYTE_KEYWORD
610 { $$ = builtin_go_type (pstate->gdbarch ())
611 ->builtin_uint8; }
614 /* TODO
615 name : NAME { $$ = $1.stoken; }
616 | TYPENAME { $$ = $1.stoken; }
617 | NAME_OR_INT { $$ = $1.stoken; }
621 name_not_typename
622 : NAME
623 /* These would be useful if name_not_typename was useful, but it is just
624 a fake for "variable", so these cause reduce/reduce conflicts because
625 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable,
626 =exp) or just an exp. If name_not_typename was ever used in an lvalue
627 context where only a name could occur, this might be useful.
628 | NAME_OR_INT
634 /* Take care of parsing a number (anything that starts with a digit).
635 Set yylval and return the token type; update lexptr.
636 LEN is the number of characters in it. */
638 /* FIXME: Needs some error checking for the float case. */
639 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could.
640 That will require moving the guts into a function that we both call
641 as our YYSTYPE is different than c-exp.y's */
643 static int
644 parse_number (struct parser_state *par_state,
645 const char *p, int len, int parsed_float, YYSTYPE *putithere)
647 ULONGEST n = 0;
648 ULONGEST prevn = 0;
650 int i = 0;
651 int c;
652 int base = input_radix;
653 int unsigned_p = 0;
655 /* Number of "L" suffixes encountered. */
656 int long_p = 0;
658 /* We have found a "L" or "U" suffix. */
659 int found_suffix = 0;
661 if (parsed_float)
663 const struct builtin_go_type *builtin_go_types
664 = builtin_go_type (par_state->gdbarch ());
666 /* Handle suffixes: 'f' for float32, 'l' for long double.
667 FIXME: This appears to be an extension -- do we want this? */
668 if (len >= 1 && tolower (p[len - 1]) == 'f')
670 putithere->typed_val_float.type
671 = builtin_go_types->builtin_float32;
672 len--;
674 else if (len >= 1 && tolower (p[len - 1]) == 'l')
676 putithere->typed_val_float.type
677 = parse_type (par_state)->builtin_long_double;
678 len--;
680 /* Default type for floating-point literals is float64. */
681 else
683 putithere->typed_val_float.type
684 = builtin_go_types->builtin_float64;
687 if (!parse_float (p, len,
688 putithere->typed_val_float.type,
689 putithere->typed_val_float.val))
690 return ERROR;
691 return FLOAT;
694 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */
695 if (p[0] == '0' && len > 1)
696 switch (p[1])
698 case 'x':
699 case 'X':
700 if (len >= 3)
702 p += 2;
703 base = 16;
704 len -= 2;
706 break;
708 case 'b':
709 case 'B':
710 if (len >= 3)
712 p += 2;
713 base = 2;
714 len -= 2;
716 break;
718 case 't':
719 case 'T':
720 case 'd':
721 case 'D':
722 if (len >= 3)
724 p += 2;
725 base = 10;
726 len -= 2;
728 break;
730 default:
731 base = 8;
732 break;
735 while (len-- > 0)
737 c = *p++;
738 if (c >= 'A' && c <= 'Z')
739 c += 'a' - 'A';
740 if (c != 'l' && c != 'u')
741 n *= base;
742 if (c >= '0' && c <= '9')
744 if (found_suffix)
745 return ERROR;
746 n += i = c - '0';
748 else
750 if (base > 10 && c >= 'a' && c <= 'f')
752 if (found_suffix)
753 return ERROR;
754 n += i = c - 'a' + 10;
756 else if (c == 'l')
758 ++long_p;
759 found_suffix = 1;
761 else if (c == 'u')
763 unsigned_p = 1;
764 found_suffix = 1;
766 else
767 return ERROR; /* Char not a digit */
769 if (i >= base)
770 return ERROR; /* Invalid digit in this base. */
772 if (c != 'l' && c != 'u')
774 /* Test for overflow. */
775 if (n == 0 && prevn == 0)
777 else if (prevn >= n)
778 error (_("Numeric constant too large."));
780 prevn = n;
783 /* An integer constant is an int, a long, or a long long. An L
784 suffix forces it to be long; an LL suffix forces it to be long
785 long. If not forced to a larger size, it gets the first type of
786 the above that it fits in. To figure out whether it fits, we
787 shift it right and see whether anything remains. Note that we
788 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one
789 operation, because many compilers will warn about such a shift
790 (which always produces a zero result). Sometimes gdbarch_int_bit
791 or gdbarch_long_bit will be that big, sometimes not. To deal with
792 the case where it is we just always shift the value more than
793 once, with fewer bits each time. */
795 int int_bits = gdbarch_int_bit (par_state->gdbarch ());
796 int long_bits = gdbarch_long_bit (par_state->gdbarch ());
797 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
798 bool have_signed = !unsigned_p;
799 bool have_int = long_p == 0;
800 bool have_long = long_p <= 1;
801 if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
802 putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
803 else if (have_int && fits_in_type (1, n, int_bits, false))
804 putithere->typed_val_int.type
805 = parse_type (par_state)->builtin_unsigned_int;
806 else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
807 putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
808 else if (have_long && fits_in_type (1, n, long_bits, false))
809 putithere->typed_val_int.type
810 = parse_type (par_state)->builtin_unsigned_long;
811 else if (have_signed && fits_in_type (1, n, long_long_bits, true))
812 putithere->typed_val_int.type
813 = parse_type (par_state)->builtin_long_long;
814 else if (fits_in_type (1, n, long_long_bits, false))
815 putithere->typed_val_int.type
816 = parse_type (par_state)->builtin_unsigned_long_long;
817 else
818 error (_("Numeric constant too large."));
819 putithere->typed_val_int.val = n;
821 return INT;
824 /* Temporary obstack used for holding strings. */
825 static struct obstack tempbuf;
826 static int tempbuf_init;
828 /* Parse a string or character literal from TOKPTR. The string or
829 character may be wide or unicode. *OUTPTR is set to just after the
830 end of the literal in the input string. The resulting token is
831 stored in VALUE. This returns a token value, either STRING or
832 CHAR, depending on what was parsed. *HOST_CHARS is set to the
833 number of host characters in the literal. */
835 static int
836 parse_string_or_char (const char *tokptr, const char **outptr,
837 struct typed_stoken *value, int *host_chars)
839 int quote;
841 /* Build the gdb internal form of the input string in tempbuf. Note
842 that the buffer is null byte terminated *only* for the
843 convenience of debugging gdb itself and printing the buffer
844 contents when the buffer contains no embedded nulls. Gdb does
845 not depend upon the buffer being null byte terminated, it uses
846 the length string instead. This allows gdb to handle C strings
847 (as well as strings in other languages) with embedded null
848 bytes */
850 if (!tempbuf_init)
851 tempbuf_init = 1;
852 else
853 obstack_free (&tempbuf, NULL);
854 obstack_init (&tempbuf);
856 /* Skip the quote. */
857 quote = *tokptr;
858 ++tokptr;
860 *host_chars = 0;
862 while (*tokptr)
864 char c = *tokptr;
865 if (c == '\\')
867 ++tokptr;
868 *host_chars += c_parse_escape (&tokptr, &tempbuf);
870 else if (c == quote)
871 break;
872 else
874 obstack_1grow (&tempbuf, c);
875 ++tokptr;
876 /* FIXME: this does the wrong thing with multi-byte host
877 characters. We could use mbrlen here, but that would
878 make "set host-charset" a bit less useful. */
879 ++*host_chars;
883 if (*tokptr != quote)
885 if (quote == '"')
886 error (_("Unterminated string in expression."));
887 else
888 error (_("Unmatched single quote."));
890 ++tokptr;
892 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/
893 value->ptr = (char *) obstack_base (&tempbuf);
894 value->length = obstack_object_size (&tempbuf);
896 *outptr = tokptr;
898 return quote == '\'' ? CHAR : STRING;
901 struct go_token
903 const char *oper;
904 int token;
905 enum exp_opcode opcode;
908 static const struct go_token tokentab3[] =
910 {">>=", ASSIGN_MODIFY, BINOP_RSH},
911 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
912 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */
913 {"...", DOTDOTDOT, OP_NULL},
916 static const struct go_token tokentab2[] =
918 {"+=", ASSIGN_MODIFY, BINOP_ADD},
919 {"-=", ASSIGN_MODIFY, BINOP_SUB},
920 {"*=", ASSIGN_MODIFY, BINOP_MUL},
921 {"/=", ASSIGN_MODIFY, BINOP_DIV},
922 {"%=", ASSIGN_MODIFY, BINOP_REM},
923 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
924 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
925 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
926 {"++", INCREMENT, OP_NULL},
927 {"--", DECREMENT, OP_NULL},
928 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */
929 {"<-", LEFT_ARROW, OP_NULL},
930 {"&&", ANDAND, OP_NULL},
931 {"||", OROR, OP_NULL},
932 {"<<", LSH, OP_NULL},
933 {">>", RSH, OP_NULL},
934 {"==", EQUAL, OP_NULL},
935 {"!=", NOTEQUAL, OP_NULL},
936 {"<=", LEQ, OP_NULL},
937 {">=", GEQ, OP_NULL},
938 /*{"&^", ANDNOT, OP_NULL}, TODO */
941 /* Identifier-like tokens. */
942 static const struct go_token ident_tokens[] =
944 {"true", TRUE_KEYWORD, OP_NULL},
945 {"false", FALSE_KEYWORD, OP_NULL},
946 {"nil", NIL_KEYWORD, OP_NULL},
947 {"const", CONST_KEYWORD, OP_NULL},
948 {"struct", STRUCT_KEYWORD, OP_NULL},
949 {"type", TYPE_KEYWORD, OP_NULL},
950 {"interface", INTERFACE_KEYWORD, OP_NULL},
951 {"chan", CHAN_KEYWORD, OP_NULL},
952 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */
953 {"len", LEN_KEYWORD, OP_NULL},
954 {"cap", CAP_KEYWORD, OP_NULL},
955 {"new", NEW_KEYWORD, OP_NULL},
956 {"iota", IOTA_KEYWORD, OP_NULL},
959 /* This is set if a NAME token appeared at the very end of the input
960 string, with no whitespace separating the name from the EOF. This
961 is used only when parsing to do field name completion. */
962 static int saw_name_at_eof;
964 /* This is set if the previously-returned token was a structure
965 operator -- either '.' or ARROW. This is used only when parsing to
966 do field name completion. */
967 static int last_was_structop;
969 /* Depth of parentheses. */
970 static int paren_depth;
972 /* Read one token, getting characters through lexptr. */
974 static int
975 lex_one_token (struct parser_state *par_state)
977 int c;
978 int namelen;
979 const char *tokstart;
980 int saw_structop = last_was_structop;
982 last_was_structop = 0;
984 retry:
986 par_state->prev_lexptr = par_state->lexptr;
988 tokstart = par_state->lexptr;
989 /* See if it is a special token of length 3. */
990 for (const auto &token : tokentab3)
991 if (strncmp (tokstart, token.oper, 3) == 0)
993 par_state->lexptr += 3;
994 yylval.opcode = token.opcode;
995 return token.token;
998 /* See if it is a special token of length 2. */
999 for (const auto &token : tokentab2)
1000 if (strncmp (tokstart, token.oper, 2) == 0)
1002 par_state->lexptr += 2;
1003 yylval.opcode = token.opcode;
1004 /* NOTE: -> doesn't exist in Go, so we don't need to watch for
1005 setting last_was_structop here. */
1006 return token.token;
1009 switch (c = *tokstart)
1011 case 0:
1012 if (saw_name_at_eof)
1014 saw_name_at_eof = 0;
1015 return COMPLETE;
1017 else if (saw_structop)
1018 return COMPLETE;
1019 else
1020 return 0;
1022 case ' ':
1023 case '\t':
1024 case '\n':
1025 par_state->lexptr++;
1026 goto retry;
1028 case '[':
1029 case '(':
1030 paren_depth++;
1031 par_state->lexptr++;
1032 return c;
1034 case ']':
1035 case ')':
1036 if (paren_depth == 0)
1037 return 0;
1038 paren_depth--;
1039 par_state->lexptr++;
1040 return c;
1042 case ',':
1043 if (pstate->comma_terminates
1044 && paren_depth == 0)
1045 return 0;
1046 par_state->lexptr++;
1047 return c;
1049 case '.':
1050 /* Might be a floating point number. */
1051 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9')
1053 if (pstate->parse_completion)
1054 last_was_structop = 1;
1055 goto symbol; /* Nope, must be a symbol. */
1057 [[fallthrough]];
1059 case '0':
1060 case '1':
1061 case '2':
1062 case '3':
1063 case '4':
1064 case '5':
1065 case '6':
1066 case '7':
1067 case '8':
1068 case '9':
1070 /* It's a number. */
1071 int got_dot = 0, got_e = 0, toktype;
1072 const char *p = tokstart;
1073 int hex = input_radix > 10;
1075 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1077 p += 2;
1078 hex = 1;
1081 for (;; ++p)
1083 /* This test includes !hex because 'e' is a valid hex digit
1084 and thus does not indicate a floating point number when
1085 the radix is hex. */
1086 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1087 got_dot = got_e = 1;
1088 /* This test does not include !hex, because a '.' always indicates
1089 a decimal floating point number regardless of the radix. */
1090 else if (!got_dot && *p == '.')
1091 got_dot = 1;
1092 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1093 && (*p == '-' || *p == '+'))
1094 /* This is the sign of the exponent, not the end of the
1095 number. */
1096 continue;
1097 /* We will take any letters or digits. parse_number will
1098 complain if past the radix, or if L or U are not final. */
1099 else if ((*p < '0' || *p > '9')
1100 && ((*p < 'a' || *p > 'z')
1101 && (*p < 'A' || *p > 'Z')))
1102 break;
1104 toktype = parse_number (par_state, tokstart, p - tokstart,
1105 got_dot|got_e, &yylval);
1106 if (toktype == ERROR)
1108 char *err_copy = (char *) alloca (p - tokstart + 1);
1110 memcpy (err_copy, tokstart, p - tokstart);
1111 err_copy[p - tokstart] = 0;
1112 error (_("Invalid number \"%s\"."), err_copy);
1114 par_state->lexptr = p;
1115 return toktype;
1118 case '@':
1120 const char *p = &tokstart[1];
1121 size_t len = strlen ("entry");
1123 while (isspace (*p))
1124 p++;
1125 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1126 && p[len] != '_')
1128 par_state->lexptr = &p[len];
1129 return ENTRY;
1132 [[fallthrough]];
1133 case '+':
1134 case '-':
1135 case '*':
1136 case '/':
1137 case '%':
1138 case '|':
1139 case '&':
1140 case '^':
1141 case '~':
1142 case '!':
1143 case '<':
1144 case '>':
1145 case '?':
1146 case ':':
1147 case '=':
1148 case '{':
1149 case '}':
1150 symbol:
1151 par_state->lexptr++;
1152 return c;
1154 case '\'':
1155 case '"':
1156 case '`':
1158 int host_len;
1159 int result = parse_string_or_char (tokstart, &par_state->lexptr,
1160 &yylval.tsval, &host_len);
1161 if (result == CHAR)
1163 if (host_len == 0)
1164 error (_("Empty character constant."));
1165 else if (host_len > 2 && c == '\'')
1167 ++tokstart;
1168 namelen = par_state->lexptr - tokstart - 1;
1169 goto tryname;
1171 else if (host_len > 1)
1172 error (_("Invalid character constant."));
1174 return result;
1178 if (!(c == '_' || c == '$'
1179 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1180 /* We must have come across a bad character (e.g. ';'). */
1181 error (_("Invalid character '%c' in expression."), c);
1183 /* It's a name. See how long it is. */
1184 namelen = 0;
1185 for (c = tokstart[namelen];
1186 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1187 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1189 c = tokstart[++namelen];
1192 /* The token "if" terminates the expression and is NOT removed from
1193 the input stream. It doesn't count if it appears in the
1194 expansion of a macro. */
1195 if (namelen == 2
1196 && tokstart[0] == 'i'
1197 && tokstart[1] == 'f')
1199 return 0;
1202 /* For the same reason (breakpoint conditions), "thread N"
1203 terminates the expression. "thread" could be an identifier, but
1204 an identifier is never followed by a number without intervening
1205 punctuation.
1206 Handle abbreviations of these, similarly to
1207 breakpoint.c:find_condition_and_thread.
1208 TODO: Watch for "goroutine" here? */
1209 if (namelen >= 1
1210 && strncmp (tokstart, "thread", namelen) == 0
1211 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1213 const char *p = tokstart + namelen + 1;
1215 while (*p == ' ' || *p == '\t')
1216 p++;
1217 if (*p >= '0' && *p <= '9')
1218 return 0;
1221 par_state->lexptr += namelen;
1223 tryname:
1225 yylval.sval.ptr = tokstart;
1226 yylval.sval.length = namelen;
1228 /* Catch specific keywords. */
1229 std::string copy = copy_name (yylval.sval);
1230 for (const auto &token : ident_tokens)
1231 if (copy == token.oper)
1233 /* It is ok to always set this, even though we don't always
1234 strictly need to. */
1235 yylval.opcode = token.opcode;
1236 return token.token;
1239 if (*tokstart == '$')
1240 return DOLLAR_VARIABLE;
1242 if (pstate->parse_completion && *par_state->lexptr == '\0')
1243 saw_name_at_eof = 1;
1244 return NAME;
1247 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1248 struct go_token_and_value
1250 int token;
1251 YYSTYPE value;
1254 /* A FIFO of tokens that have been read but not yet returned to the
1255 parser. */
1256 static std::vector<go_token_and_value> token_fifo;
1258 /* Non-zero if the lexer should return tokens from the FIFO. */
1259 static int popping;
1261 /* Temporary storage for yylex; this holds symbol names as they are
1262 built up. */
1263 static auto_obstack name_obstack;
1265 /* Build "package.name" in name_obstack.
1266 For convenience of the caller, the name is NUL-terminated,
1267 but the NUL is not included in the recorded length. */
1269 static struct stoken
1270 build_packaged_name (const char *package, int package_len,
1271 const char *name, int name_len)
1273 struct stoken result;
1275 name_obstack.clear ();
1276 obstack_grow (&name_obstack, package, package_len);
1277 obstack_grow_str (&name_obstack, ".");
1278 obstack_grow (&name_obstack, name, name_len);
1279 obstack_grow (&name_obstack, "", 1);
1280 result.ptr = (char *) obstack_base (&name_obstack);
1281 result.length = obstack_object_size (&name_obstack) - 1;
1283 return result;
1286 /* Return non-zero if NAME is a package name.
1287 BLOCK is the scope in which to interpret NAME; this can be NULL
1288 to mean the global scope. */
1290 static int
1291 package_name_p (const char *name, const struct block *block)
1293 struct symbol *sym;
1294 struct field_of_this_result is_a_field_of_this;
1296 sym = lookup_symbol (name, block, SEARCH_TYPE_DOMAIN,
1297 &is_a_field_of_this).symbol;
1299 if (sym
1300 && sym->aclass () == LOC_TYPEDEF
1301 && sym->type ()->code () == TYPE_CODE_MODULE)
1302 return 1;
1304 return 0;
1307 /* Classify a (potential) function in the "unsafe" package.
1308 We fold these into "keywords" to keep things simple, at least until
1309 something more complex is warranted. */
1311 static int
1312 classify_unsafe_function (struct stoken function_name)
1314 std::string copy = copy_name (function_name);
1316 if (copy == "Sizeof")
1318 yylval.sval = function_name;
1319 return SIZEOF_KEYWORD;
1322 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ());
1325 /* Classify token(s) "name1.name2" where name1 is known to be a package.
1326 The contents of the token are in `yylval'.
1327 Updates yylval and returns the new token type.
1329 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1331 static int
1332 classify_packaged_name (const struct block *block)
1334 struct block_symbol sym;
1335 struct field_of_this_result is_a_field_of_this;
1337 std::string copy = copy_name (yylval.sval);
1339 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1341 if (sym.symbol)
1343 yylval.ssym.sym = sym;
1344 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1347 return NAME;
1350 /* Classify a NAME token.
1351 The contents of the token are in `yylval'.
1352 Updates yylval and returns the new token type.
1353 BLOCK is the block in which lookups start; this can be NULL
1354 to mean the global scope.
1356 The result is one of NAME, NAME_OR_INT, or TYPENAME. */
1358 static int
1359 classify_name (struct parser_state *par_state, const struct block *block)
1361 struct type *type;
1362 struct block_symbol sym;
1363 struct field_of_this_result is_a_field_of_this;
1365 std::string copy = copy_name (yylval.sval);
1367 /* Try primitive types first so they win over bad/weird debug info. */
1368 type = language_lookup_primitive_type (par_state->language (),
1369 par_state->gdbarch (),
1370 copy.c_str ());
1371 if (type != NULL)
1373 /* NOTE: We take advantage of the fact that yylval coming in was a
1374 NAME, and that struct ttype is a compatible extension of struct
1375 stoken, so yylval.tsym.stoken is already filled in. */
1376 yylval.tsym.type = type;
1377 return TYPENAME;
1380 /* TODO: What about other types? */
1382 sym = lookup_symbol (copy.c_str (), block, SEARCH_VFT, &is_a_field_of_this);
1384 if (sym.symbol)
1386 yylval.ssym.sym = sym;
1387 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1388 return NAME;
1391 /* If we didn't find a symbol, look again in the current package.
1392 This is to, e.g., make "p global_var" work without having to specify
1393 the package name. We intentionally only looks for objects in the
1394 current package. */
1397 gdb::unique_xmalloc_ptr<char> current_package_name
1398 = go_block_package_name (block);
1400 if (current_package_name != NULL)
1402 struct stoken sval =
1403 build_packaged_name (current_package_name.get (),
1404 strlen (current_package_name.get ()),
1405 copy.c_str (), copy.size ());
1407 sym = lookup_symbol (sval.ptr, block, SEARCH_VFT,
1408 &is_a_field_of_this);
1409 if (sym.symbol)
1411 yylval.ssym.stoken = sval;
1412 yylval.ssym.sym = sym;
1413 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL;
1414 return NAME;
1419 /* Input names that aren't symbols but ARE valid hex numbers, when
1420 the input radix permits them, can be names or numbers depending
1421 on the parse. Note we support radixes > 16 here. */
1422 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10)
1423 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10))
1425 YYSTYPE newlval; /* Its value is ignored. */
1426 int hextype = parse_number (par_state, copy.c_str (),
1427 yylval.sval.length, 0, &newlval);
1428 if (hextype == INT)
1430 yylval.ssym.sym.symbol = NULL;
1431 yylval.ssym.sym.block = NULL;
1432 yylval.ssym.is_a_field_of_this = 0;
1433 return NAME_OR_INT;
1437 yylval.ssym.sym.symbol = NULL;
1438 yylval.ssym.sym.block = NULL;
1439 yylval.ssym.is_a_field_of_this = 0;
1440 return NAME;
1443 /* This is taken from c-exp.y mostly to get something working.
1444 The basic structure has been kept because we may yet need some of it. */
1446 static int
1447 yylex (void)
1449 go_token_and_value current, next;
1451 if (popping && !token_fifo.empty ())
1453 go_token_and_value tv = token_fifo[0];
1454 token_fifo.erase (token_fifo.begin ());
1455 yylval = tv.value;
1456 /* There's no need to fall through to handle package.name
1457 as that can never happen here. In theory. */
1458 return tv.token;
1460 popping = 0;
1462 current.token = lex_one_token (pstate);
1464 /* TODO: Need a way to force specifying name1 as a package.
1465 .name1.name2 ? */
1467 if (current.token != NAME)
1468 return current.token;
1470 /* See if we have "name1 . name2". */
1472 current.value = yylval;
1473 next.token = lex_one_token (pstate);
1474 next.value = yylval;
1476 if (next.token == '.')
1478 go_token_and_value name2;
1480 name2.token = lex_one_token (pstate);
1481 name2.value = yylval;
1483 if (name2.token == NAME)
1485 /* Ok, we have "name1 . name2". */
1486 std::string copy = copy_name (current.value.sval);
1488 if (copy == "unsafe")
1490 popping = 1;
1491 return classify_unsafe_function (name2.value.sval);
1494 if (package_name_p (copy.c_str (), pstate->expression_context_block))
1496 popping = 1;
1497 yylval.sval = build_packaged_name (current.value.sval.ptr,
1498 current.value.sval.length,
1499 name2.value.sval.ptr,
1500 name2.value.sval.length);
1501 return classify_packaged_name (pstate->expression_context_block);
1505 token_fifo.push_back (next);
1506 token_fifo.push_back (name2);
1508 else
1509 token_fifo.push_back (next);
1511 /* If we arrive here we don't have a package-qualified name. */
1513 popping = 1;
1514 yylval = current.value;
1515 return classify_name (pstate, pstate->expression_context_block);
1518 /* See language.h. */
1521 go_language::parser (struct parser_state *par_state) const
1523 /* Setting up the parser state. */
1524 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1525 gdb_assert (par_state != NULL);
1526 pstate = par_state;
1528 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1529 par_state->debug);
1531 /* Initialize some state used by the lexer. */
1532 last_was_structop = 0;
1533 saw_name_at_eof = 0;
1534 paren_depth = 0;
1536 token_fifo.clear ();
1537 popping = 0;
1538 name_obstack.clear ();
1540 int result = yyparse ();
1541 if (!result)
1542 pstate->set_operation (pstate->pop ());
1543 return result;
1546 static void
1547 yyerror (const char *msg)
1549 pstate->parse_error (msg);