Update
[gdb.git] / gdb / cp-name-parser.y
blob18c959e13b15b2eed909f39ee517e2759daaaf2b
1 /* YACC parser for C++ names, for GDB.
3 Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
5 Parts of the lexer are based on c-exp.y from GDB.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 /* Note that malloc's and realloc's in this file are transformed to
25 xmalloc and xrealloc respectively by the same sed command in the
26 makefile that remaps any other malloc/realloc inserted by the parser
27 generator. Doing this with #defines and trying to control the interaction
28 with include files (<malloc.h> and <stdlib.h> for example) just became
29 too messy, particularly when such includes can be inserted at random
30 times by the parser generator. */
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <string.h>
39 #include "safe-ctype.h"
40 #include "libiberty.h"
41 #include "demangle.h"
43 /* Bison does not make it easy to create a parser without global
44 state, unfortunately. Here are all the global variables used
45 in this parser. */
47 /* LEXPTR is the current pointer into our lex buffer. PREV_LEXPTR
48 is the start of the last token lexed, only used for diagnostics.
49 ERROR_LEXPTR is the first place an error occurred. GLOBAL_ERRMSG
50 is the first error message encountered. */
52 static const char *lexptr, *prev_lexptr, *error_lexptr, *global_errmsg;
54 /* The components built by the parser are allocated ahead of time,
55 and cached in this structure. */
57 #define ALLOC_CHUNK 100
59 struct demangle_info {
60 int used;
61 struct demangle_info *prev, *next;
62 struct demangle_component comps[ALLOC_CHUNK];
65 static struct demangle_info *demangle_info;
67 static struct demangle_component *
68 d_grab (void)
70 struct demangle_info *more;
72 if (demangle_info->used >= ALLOC_CHUNK)
74 if (demangle_info->next == NULL)
76 more = malloc (sizeof (struct demangle_info));
77 more->prev = demangle_info;
78 more->next = NULL;
79 demangle_info->next = more;
81 else
82 more = demangle_info->next;
84 more->used = 0;
85 demangle_info = more;
87 return &demangle_info->comps[demangle_info->used++];
90 /* The parse tree created by the parser is stored here after a successful
91 parse. */
93 static struct demangle_component *global_result;
95 /* Prototypes for helper functions used when constructing the parse
96 tree. */
98 static struct demangle_component *d_qualify (struct demangle_component *, int,
99 int);
101 static struct demangle_component *d_int_type (int);
103 static struct demangle_component *d_unary (const char *,
104 struct demangle_component *);
105 static struct demangle_component *d_binary (const char *,
106 struct demangle_component *,
107 struct demangle_component *);
109 /* Flags passed to d_qualify. */
111 #define QUAL_CONST 1
112 #define QUAL_RESTRICT 2
113 #define QUAL_VOLATILE 4
115 /* Flags passed to d_int_type. */
117 #define INT_CHAR (1 << 0)
118 #define INT_SHORT (1 << 1)
119 #define INT_LONG (1 << 2)
120 #define INT_LLONG (1 << 3)
122 #define INT_SIGNED (1 << 4)
123 #define INT_UNSIGNED (1 << 5)
125 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror, etc),
126 as well as gratuitiously global symbol names, so we can have multiple
127 yacc generated parsers in gdb. Note that these are only the variables
128 produced by yacc. If other parser generators (bison, byacc, etc) produce
129 additional global names that conflict at link time, then those parser
130 generators need to be fixed instead of adding those names to this list. */
132 #define yymaxdepth cpname_maxdepth
133 #define yyparse cpname_parse
134 #define yylex cpname_lex
135 #define yyerror cpname_error
136 #define yylval cpname_lval
137 #define yychar cpname_char
138 #define yydebug cpname_debug
139 #define yypact cpname_pact
140 #define yyr1 cpname_r1
141 #define yyr2 cpname_r2
142 #define yydef cpname_def
143 #define yychk cpname_chk
144 #define yypgo cpname_pgo
145 #define yyact cpname_act
146 #define yyexca cpname_exca
147 #define yyerrflag cpname_errflag
148 #define yynerrs cpname_nerrs
149 #define yyps cpname_ps
150 #define yypv cpname_pv
151 #define yys cpname_s
152 #define yy_yys cpname_yys
153 #define yystate cpname_state
154 #define yytmp cpname_tmp
155 #define yyv cpname_v
156 #define yy_yyv cpname_yyv
157 #define yyval cpname_val
158 #define yylloc cpname_lloc
159 #define yyreds cpname_reds /* With YYDEBUG defined */
160 #define yytoks cpname_toks /* With YYDEBUG defined */
161 #define yyname cpname_name /* With YYDEBUG defined */
162 #define yyrule cpname_rule /* With YYDEBUG defined */
163 #define yylhs cpname_yylhs
164 #define yylen cpname_yylen
165 #define yydefred cpname_yydefred
166 #define yydgoto cpname_yydgoto
167 #define yysindex cpname_yysindex
168 #define yyrindex cpname_yyrindex
169 #define yygindex cpname_yygindex
170 #define yytable cpname_yytable
171 #define yycheck cpname_yycheck
173 int yyparse (void);
174 static int yylex (void);
175 static void yyerror (char *);
177 /* Enable yydebug for the stand-alone parser. */
178 #ifdef TEST_CPNAMES
179 # define YYDEBUG 1
180 #endif
182 /* Helper functions. These wrap the demangler tree interface, handle
183 allocation from our global store, and return the allocated component. */
185 static struct demangle_component *
186 fill_comp (enum demangle_component_type d_type, struct demangle_component *lhs,
187 struct demangle_component *rhs)
189 struct demangle_component *ret = d_grab ();
190 cplus_demangle_fill_component (ret, d_type, lhs, rhs);
191 return ret;
194 static struct demangle_component *
195 make_empty (enum demangle_component_type d_type)
197 struct demangle_component *ret = d_grab ();
198 ret->type = d_type;
199 return ret;
202 static struct demangle_component *
203 make_operator (const char *name, int args)
205 struct demangle_component *ret = d_grab ();
206 cplus_demangle_fill_operator (ret, name, args);
207 return ret;
210 static struct demangle_component *
211 make_dtor (enum gnu_v3_dtor_kinds kind, struct demangle_component *name)
213 struct demangle_component *ret = d_grab ();
214 cplus_demangle_fill_dtor (ret, kind, name);
215 return ret;
218 static struct demangle_component *
219 make_builtin_type (const char *name)
221 struct demangle_component *ret = d_grab ();
222 cplus_demangle_fill_builtin_type (ret, name);
223 return ret;
226 static struct demangle_component *
227 make_name (const char *name, int len)
229 struct demangle_component *ret = d_grab ();
230 cplus_demangle_fill_name (ret, name, len);
231 return ret;
234 #define d_left(dc) (dc)->u.s_binary.left
235 #define d_right(dc) (dc)->u.s_binary.right
239 %union
241 struct demangle_component *comp;
242 struct nested {
243 struct demangle_component *comp;
244 struct demangle_component **last;
245 } nested;
246 struct {
247 struct demangle_component *comp, *last;
248 } nested1;
249 struct {
250 struct demangle_component *comp, **last;
251 struct nested fn;
252 struct demangle_component *start;
253 int fold_flag;
254 } abstract;
255 int lval;
256 struct {
257 int val;
258 struct demangle_component *type;
259 } typed_val_int;
260 const char *opname;
263 %type <comp> exp exp1 type start start_opt operator colon_name
264 %type <comp> unqualified_name colon_ext_name
265 %type <comp> template template_arg
266 %type <comp> builtin_type
267 %type <comp> typespec_2 array_indicator
268 %type <comp> colon_ext_only ext_only_name
270 %type <comp> demangler_special function conversion_op
271 %type <nested> conversion_op_name
273 %type <abstract> abstract_declarator direct_abstract_declarator
274 %type <abstract> abstract_declarator_fn
275 %type <nested> declarator direct_declarator function_arglist
277 %type <nested> declarator_1 direct_declarator_1
279 %type <nested> template_params function_args
280 %type <nested> ptr_operator
282 %type <nested1> nested_name
284 %type <lval> qualifier qualifiers qualifiers_opt
286 %type <lval> int_part int_seq
288 %token <comp> INT
289 %token <comp> FLOAT
291 %token <comp> NAME
292 %type <comp> name
294 %token STRUCT CLASS UNION ENUM SIZEOF UNSIGNED COLONCOLON
295 %token TEMPLATE
296 %token ERROR
297 %token NEW DELETE OPERATOR
298 %token STATIC_CAST REINTERPRET_CAST DYNAMIC_CAST
300 /* Special type cases, put in to allow the parser to distinguish different
301 legal basetypes. */
302 %token SIGNED_KEYWORD LONG SHORT INT_KEYWORD CONST_KEYWORD VOLATILE_KEYWORD DOUBLE_KEYWORD BOOL
303 %token ELLIPSIS RESTRICT VOID FLOAT_KEYWORD CHAR WCHAR_T
305 %token <opname> ASSIGN_MODIFY
307 /* C++ */
308 %token TRUEKEYWORD
309 %token FALSEKEYWORD
311 /* Non-C++ things we get from the demangler. */
312 %token <lval> DEMANGLER_SPECIAL
313 %token CONSTRUCTION_VTABLE CONSTRUCTION_IN
314 %token <typed_val_int> GLOBAL
317 enum {
318 GLOBAL_CONSTRUCTORS = DEMANGLE_COMPONENT_LITERAL + 20,
319 GLOBAL_DESTRUCTORS = DEMANGLE_COMPONENT_LITERAL + 21
323 /* Precedence declarations. */
325 /* Give NAME lower precedence than COLONCOLON, so that nested_name will
326 associate greedily. */
327 %nonassoc NAME
329 /* Give NEW and DELETE lower precedence than ']', because we can not
330 have an array of type operator new. This causes NEW '[' to be
331 parsed as operator new[]. */
332 %nonassoc NEW DELETE
334 /* Give VOID higher precedence than NAME. Then we can use %prec NAME
335 to prefer (VOID) to (function_args). */
336 %nonassoc VOID
338 /* Give VOID lower precedence than ')' for similar reasons. */
339 %nonassoc ')'
341 %left ','
342 %right '=' ASSIGN_MODIFY
343 %right '?'
344 %left OROR
345 %left ANDAND
346 %left '|'
347 %left '^'
348 %left '&'
349 %left EQUAL NOTEQUAL
350 %left '<' '>' LEQ GEQ
351 %left LSH RSH
352 %left '@'
353 %left '+' '-'
354 %left '*' '/' '%'
355 %right UNARY INCREMENT DECREMENT
357 /* We don't need a precedence for '(' in this reduced grammar, and it
358 can mask some unpleasant bugs, so disable it for now. */
360 %right ARROW '.' '[' /* '(' */
361 %left COLONCOLON
366 result : start
367 { global_result = $1; }
370 start : type
372 | demangler_special
374 | function
378 start_opt : /* */
379 { $$ = NULL; }
380 | COLONCOLON start
381 { $$ = $2; }
384 function
385 /* Function with a return type. declarator_1 is used to prevent
386 ambiguity with the next rule. */
387 : typespec_2 declarator_1
388 { $$ = $2.comp;
389 *$2.last = $1;
392 /* Function without a return type. We need to use typespec_2
393 to prevent conflicts from qualifiers_opt - harmless. The
394 start_opt is used to handle "function-local" variables and
395 types. */
396 | typespec_2 function_arglist start_opt
397 { $$ = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
398 if ($3) $$ = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $3); }
399 | colon_ext_only function_arglist start_opt
400 { $$ = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
401 if ($3) $$ = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $3); }
403 | conversion_op_name start_opt
404 { $$ = $1.comp;
405 if ($2) $$ = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $2); }
406 | conversion_op_name abstract_declarator_fn
407 { if ($2.last)
409 /* First complete the abstract_declarator's type using
410 the typespec from the conversion_op_name. */
411 *$2.last = *$1.last;
412 /* Then complete the conversion_op_name with the type. */
413 *$1.last = $2.comp;
415 /* If we have an arglist, build a function type. */
416 if ($2.fn.comp)
417 $$ = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1.comp, $2.fn.comp);
418 else
419 $$ = $1.comp;
420 if ($2.start) $$ = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$, $2.start);
424 demangler_special
425 : DEMANGLER_SPECIAL start
426 { $$ = make_empty ($1);
427 d_left ($$) = $2;
428 d_right ($$) = NULL; }
429 | CONSTRUCTION_VTABLE start CONSTRUCTION_IN start
430 { $$ = fill_comp (DEMANGLE_COMPONENT_CONSTRUCTION_VTABLE, $2, $4); }
431 | GLOBAL
432 { $$ = make_empty ($1.val);
433 d_left ($$) = $1.type;
434 d_right ($$) = NULL; }
437 operator : OPERATOR NEW
438 { $$ = make_operator ("new", 1); }
439 | OPERATOR DELETE
440 { $$ = make_operator ("delete", 1); }
441 | OPERATOR NEW '[' ']'
442 { $$ = make_operator ("new[]", 1); }
443 | OPERATOR DELETE '[' ']'
444 { $$ = make_operator ("delete[]", 1); }
445 | OPERATOR '+'
446 { $$ = make_operator ("+", 2); }
447 | OPERATOR '-'
448 { $$ = make_operator ("-", 2); }
449 | OPERATOR '*'
450 { $$ = make_operator ("*", 2); }
451 | OPERATOR '/'
452 { $$ = make_operator ("/", 2); }
453 | OPERATOR '%'
454 { $$ = make_operator ("%", 2); }
455 | OPERATOR '^'
456 { $$ = make_operator ("^", 2); }
457 | OPERATOR '&'
458 { $$ = make_operator ("&", 2); }
459 | OPERATOR '|'
460 { $$ = make_operator ("|", 2); }
461 | OPERATOR '~'
462 { $$ = make_operator ("~", 1); }
463 | OPERATOR '!'
464 { $$ = make_operator ("!", 1); }
465 | OPERATOR '='
466 { $$ = make_operator ("=", 2); }
467 | OPERATOR '<'
468 { $$ = make_operator ("<", 2); }
469 | OPERATOR '>'
470 { $$ = make_operator (">", 2); }
471 | OPERATOR ASSIGN_MODIFY
472 { $$ = make_operator ($2, 2); }
473 | OPERATOR LSH
474 { $$ = make_operator ("<<", 2); }
475 | OPERATOR RSH
476 { $$ = make_operator (">>", 2); }
477 | OPERATOR EQUAL
478 { $$ = make_operator ("==", 2); }
479 | OPERATOR NOTEQUAL
480 { $$ = make_operator ("!=", 2); }
481 | OPERATOR LEQ
482 { $$ = make_operator ("<=", 2); }
483 | OPERATOR GEQ
484 { $$ = make_operator (">=", 2); }
485 | OPERATOR ANDAND
486 { $$ = make_operator ("&&", 2); }
487 | OPERATOR OROR
488 { $$ = make_operator ("||", 2); }
489 | OPERATOR INCREMENT
490 { $$ = make_operator ("++", 1); }
491 | OPERATOR DECREMENT
492 { $$ = make_operator ("--", 1); }
493 | OPERATOR ','
494 { $$ = make_operator (",", 2); }
495 | OPERATOR ARROW '*'
496 { $$ = make_operator ("->*", 2); }
497 | OPERATOR ARROW
498 { $$ = make_operator ("->", 2); }
499 | OPERATOR '(' ')'
500 { $$ = make_operator ("()", 0); }
501 | OPERATOR '[' ']'
502 { $$ = make_operator ("[]", 2); }
505 /* Conversion operators. We don't try to handle some of
506 the wackier demangler output for function pointers,
507 since it's not clear that it's parseable. */
508 conversion_op
509 : OPERATOR typespec_2
510 { $$ = fill_comp (DEMANGLE_COMPONENT_CAST, $2, NULL); }
513 conversion_op_name
514 : nested_name conversion_op
515 { $$.comp = $1.comp;
516 d_right ($1.last) = $2;
517 $$.last = &d_left ($2);
519 | conversion_op
520 { $$.comp = $1;
521 $$.last = &d_left ($1);
523 | COLONCOLON nested_name conversion_op
524 { $$.comp = $2.comp;
525 d_right ($2.last) = $3;
526 $$.last = &d_left ($3);
528 | COLONCOLON conversion_op
529 { $$.comp = $2;
530 $$.last = &d_left ($2);
534 /* DEMANGLE_COMPONENT_NAME */
535 /* This accepts certain invalid placements of '~'. */
536 unqualified_name: operator
537 | operator '<' template_params '>'
538 { $$ = fill_comp (DEMANGLE_COMPONENT_TEMPLATE, $1, $3.comp); }
539 | '~' NAME
540 { $$ = make_dtor (gnu_v3_complete_object_dtor, $2); }
543 /* This rule is used in name and nested_name, and expanded inline there
544 for efficiency. */
546 scope_id : NAME
547 | template
551 colon_name : name
552 | COLONCOLON name
553 { $$ = $2; }
556 /* DEMANGLE_COMPONENT_QUAL_NAME */
557 /* DEMANGLE_COMPONENT_CTOR / DEMANGLE_COMPONENT_DTOR ? */
558 name : nested_name NAME %prec NAME
559 { $$ = $1.comp; d_right ($1.last) = $2; }
560 | NAME %prec NAME
561 | nested_name template %prec NAME
562 { $$ = $1.comp; d_right ($1.last) = $2; }
563 | template %prec NAME
566 colon_ext_name : colon_name
567 | colon_ext_only
570 colon_ext_only : ext_only_name
571 | COLONCOLON ext_only_name
572 { $$ = $2; }
575 ext_only_name : nested_name unqualified_name
576 { $$ = $1.comp; d_right ($1.last) = $2; }
577 | unqualified_name
580 nested_name : NAME COLONCOLON
581 { $$.comp = make_empty (DEMANGLE_COMPONENT_QUAL_NAME);
582 d_left ($$.comp) = $1;
583 d_right ($$.comp) = NULL;
584 $$.last = $$.comp;
586 | nested_name NAME COLONCOLON
587 { $$.comp = $1.comp;
588 d_right ($1.last) = make_empty (DEMANGLE_COMPONENT_QUAL_NAME);
589 $$.last = d_right ($1.last);
590 d_left ($$.last) = $2;
591 d_right ($$.last) = NULL;
593 | template COLONCOLON
594 { $$.comp = make_empty (DEMANGLE_COMPONENT_QUAL_NAME);
595 d_left ($$.comp) = $1;
596 d_right ($$.comp) = NULL;
597 $$.last = $$.comp;
599 | nested_name template COLONCOLON
600 { $$.comp = $1.comp;
601 d_right ($1.last) = make_empty (DEMANGLE_COMPONENT_QUAL_NAME);
602 $$.last = d_right ($1.last);
603 d_left ($$.last) = $2;
604 d_right ($$.last) = NULL;
608 /* DEMANGLE_COMPONENT_TEMPLATE */
609 /* DEMANGLE_COMPONENT_TEMPLATE_ARGLIST */
610 template : NAME '<' template_params '>'
611 { $$ = fill_comp (DEMANGLE_COMPONENT_TEMPLATE, $1, $3.comp); }
614 template_params : template_arg
615 { $$.comp = fill_comp (DEMANGLE_COMPONENT_TEMPLATE_ARGLIST, $1, NULL);
616 $$.last = &d_right ($$.comp); }
617 | template_params ',' template_arg
618 { $$.comp = $1.comp;
619 *$1.last = fill_comp (DEMANGLE_COMPONENT_TEMPLATE_ARGLIST, $3, NULL);
620 $$.last = &d_right (*$1.last);
624 /* "type" is inlined into template_arg and function_args. */
626 /* Also an integral constant-expression of integral type, and a
627 pointer to member (?) */
628 template_arg : typespec_2
629 | typespec_2 abstract_declarator
630 { $$ = $2.comp;
631 *$2.last = $1;
633 | '&' start
634 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator ("&", 1), $2); }
635 | '&' '(' start ')'
636 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator ("&", 1), $3); }
637 | exp
640 function_args : typespec_2
641 { $$.comp = fill_comp (DEMANGLE_COMPONENT_ARGLIST, $1, NULL);
642 $$.last = &d_right ($$.comp);
644 | typespec_2 abstract_declarator
645 { *$2.last = $1;
646 $$.comp = fill_comp (DEMANGLE_COMPONENT_ARGLIST, $2.comp, NULL);
647 $$.last = &d_right ($$.comp);
649 | function_args ',' typespec_2
650 { *$1.last = fill_comp (DEMANGLE_COMPONENT_ARGLIST, $3, NULL);
651 $$.comp = $1.comp;
652 $$.last = &d_right (*$1.last);
654 | function_args ',' typespec_2 abstract_declarator
655 { *$4.last = $3;
656 *$1.last = fill_comp (DEMANGLE_COMPONENT_ARGLIST, $4.comp, NULL);
657 $$.comp = $1.comp;
658 $$.last = &d_right (*$1.last);
660 | function_args ',' ELLIPSIS
661 { *$1.last
662 = fill_comp (DEMANGLE_COMPONENT_ARGLIST,
663 make_builtin_type ("..."),
664 NULL);
665 $$.comp = $1.comp;
666 $$.last = &d_right (*$1.last);
670 function_arglist: '(' function_args ')' qualifiers_opt %prec NAME
671 { $$.comp = fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, $2.comp);
672 $$.last = &d_left ($$.comp);
673 $$.comp = d_qualify ($$.comp, $4, 1); }
674 | '(' VOID ')' qualifiers_opt
675 { $$.comp = fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, NULL);
676 $$.last = &d_left ($$.comp);
677 $$.comp = d_qualify ($$.comp, $4, 1); }
678 | '(' ')' qualifiers_opt
679 { $$.comp = fill_comp (DEMANGLE_COMPONENT_FUNCTION_TYPE, NULL, NULL);
680 $$.last = &d_left ($$.comp);
681 $$.comp = d_qualify ($$.comp, $3, 1); }
684 /* Should do something about DEMANGLE_COMPONENT_VENDOR_TYPE_QUAL */
685 qualifiers_opt : /* epsilon */
686 { $$ = 0; }
687 | qualifiers
690 qualifier : RESTRICT
691 { $$ = QUAL_RESTRICT; }
692 | VOLATILE_KEYWORD
693 { $$ = QUAL_VOLATILE; }
694 | CONST_KEYWORD
695 { $$ = QUAL_CONST; }
698 qualifiers : qualifier
699 | qualifier qualifiers
700 { $$ = $1 | $2; }
703 /* This accepts all sorts of invalid constructions and produces
704 invalid output for them - an error would be better. */
706 int_part : INT_KEYWORD
707 { $$ = 0; }
708 | SIGNED_KEYWORD
709 { $$ = INT_SIGNED; }
710 | UNSIGNED
711 { $$ = INT_UNSIGNED; }
712 | CHAR
713 { $$ = INT_CHAR; }
714 | LONG
715 { $$ = INT_LONG; }
716 | SHORT
717 { $$ = INT_SHORT; }
720 int_seq : int_part
721 | int_seq int_part
722 { $$ = $1 | $2; if ($1 & $2 & INT_LONG) $$ = $1 | INT_LLONG; }
725 builtin_type : int_seq
726 { $$ = d_int_type ($1); }
727 | FLOAT_KEYWORD
728 { $$ = make_builtin_type ("float"); }
729 | DOUBLE_KEYWORD
730 { $$ = make_builtin_type ("double"); }
731 | LONG DOUBLE_KEYWORD
732 { $$ = make_builtin_type ("long double"); }
733 | BOOL
734 { $$ = make_builtin_type ("bool"); }
735 | WCHAR_T
736 { $$ = make_builtin_type ("wchar_t"); }
737 | VOID
738 { $$ = make_builtin_type ("void"); }
741 ptr_operator : '*' qualifiers_opt
742 { $$.comp = make_empty (DEMANGLE_COMPONENT_POINTER);
743 $$.comp->u.s_binary.left = $$.comp->u.s_binary.right = NULL;
744 $$.last = &d_left ($$.comp);
745 $$.comp = d_qualify ($$.comp, $2, 0); }
746 /* g++ seems to allow qualifiers after the reference? */
747 | '&'
748 { $$.comp = make_empty (DEMANGLE_COMPONENT_REFERENCE);
749 $$.comp->u.s_binary.left = $$.comp->u.s_binary.right = NULL;
750 $$.last = &d_left ($$.comp); }
751 | nested_name '*' qualifiers_opt
752 { $$.comp = make_empty (DEMANGLE_COMPONENT_PTRMEM_TYPE);
753 $$.comp->u.s_binary.left = $1.comp;
754 /* Convert the innermost DEMANGLE_COMPONENT_QUAL_NAME to a DEMANGLE_COMPONENT_NAME. */
755 *$1.last = *d_left ($1.last);
756 $$.comp->u.s_binary.right = NULL;
757 $$.last = &d_right ($$.comp);
758 $$.comp = d_qualify ($$.comp, $3, 0); }
759 | COLONCOLON nested_name '*' qualifiers_opt
760 { $$.comp = make_empty (DEMANGLE_COMPONENT_PTRMEM_TYPE);
761 $$.comp->u.s_binary.left = $2.comp;
762 /* Convert the innermost DEMANGLE_COMPONENT_QUAL_NAME to a DEMANGLE_COMPONENT_NAME. */
763 *$2.last = *d_left ($2.last);
764 $$.comp->u.s_binary.right = NULL;
765 $$.last = &d_right ($$.comp);
766 $$.comp = d_qualify ($$.comp, $4, 0); }
769 array_indicator : '[' ']'
770 { $$ = make_empty (DEMANGLE_COMPONENT_ARRAY_TYPE);
771 d_left ($$) = NULL;
773 | '[' INT ']'
774 { $$ = make_empty (DEMANGLE_COMPONENT_ARRAY_TYPE);
775 d_left ($$) = $2;
779 /* Details of this approach inspired by the G++ < 3.4 parser. */
781 /* This rule is only used in typespec_2, and expanded inline there for
782 efficiency. */
784 typespec : builtin_type
785 | colon_name
789 typespec_2 : builtin_type qualifiers
790 { $$ = d_qualify ($1, $2, 0); }
791 | builtin_type
792 | qualifiers builtin_type qualifiers
793 { $$ = d_qualify ($2, $1 | $3, 0); }
794 | qualifiers builtin_type
795 { $$ = d_qualify ($2, $1, 0); }
797 | name qualifiers
798 { $$ = d_qualify ($1, $2, 0); }
799 | name
800 | qualifiers name qualifiers
801 { $$ = d_qualify ($2, $1 | $3, 0); }
802 | qualifiers name
803 { $$ = d_qualify ($2, $1, 0); }
805 | COLONCOLON name qualifiers
806 { $$ = d_qualify ($2, $3, 0); }
807 | COLONCOLON name
808 { $$ = $2; }
809 | qualifiers COLONCOLON name qualifiers
810 { $$ = d_qualify ($3, $1 | $4, 0); }
811 | qualifiers COLONCOLON name
812 { $$ = d_qualify ($3, $1, 0); }
815 abstract_declarator
816 : ptr_operator
817 { $$.comp = $1.comp; $$.last = $1.last;
818 $$.fn.comp = NULL; $$.fn.last = NULL; }
819 | ptr_operator abstract_declarator
820 { $$ = $2; $$.fn.comp = NULL; $$.fn.last = NULL;
821 if ($2.fn.comp) { $$.last = $2.fn.last; *$2.last = $2.fn.comp; }
822 *$$.last = $1.comp;
823 $$.last = $1.last; }
824 | direct_abstract_declarator
825 { $$.fn.comp = NULL; $$.fn.last = NULL;
826 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
830 direct_abstract_declarator
831 : '(' abstract_declarator ')'
832 { $$ = $2; $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 1;
833 if ($2.fn.comp) { $$.last = $2.fn.last; *$2.last = $2.fn.comp; }
835 | direct_abstract_declarator function_arglist
836 { $$.fold_flag = 0;
837 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
838 if ($1.fold_flag)
840 *$$.last = $2.comp;
841 $$.last = $2.last;
843 else
844 $$.fn = $2;
846 | direct_abstract_declarator array_indicator
847 { $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 0;
848 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
849 *$1.last = $2;
850 $$.last = &d_right ($2);
852 | array_indicator
853 { $$.fn.comp = NULL; $$.fn.last = NULL; $$.fold_flag = 0;
854 $$.comp = $1;
855 $$.last = &d_right ($1);
857 /* G++ has the following except for () and (type). Then
858 (type) is handled in regcast_or_absdcl and () is handled
859 in fcast_or_absdcl.
861 However, this is only useful for function types, and
862 generates reduce/reduce conflicts with direct_declarator.
863 We're interested in pointer-to-function types, and in
864 functions, but not in function types - so leave this
865 out. */
866 /* | function_arglist */
869 abstract_declarator_fn
870 : ptr_operator
871 { $$.comp = $1.comp; $$.last = $1.last;
872 $$.fn.comp = NULL; $$.fn.last = NULL; $$.start = NULL; }
873 | ptr_operator abstract_declarator_fn
874 { $$ = $2;
875 if ($2.last)
876 *$$.last = $1.comp;
877 else
878 $$.comp = $1.comp;
879 $$.last = $1.last;
881 | direct_abstract_declarator
882 { $$.comp = $1.comp; $$.last = $1.last; $$.fn = $1.fn; $$.start = NULL; }
883 | direct_abstract_declarator function_arglist COLONCOLON start
884 { $$.start = $4;
885 if ($1.fn.comp) { $$.last = $1.fn.last; *$1.last = $1.fn.comp; }
886 if ($1.fold_flag)
888 *$$.last = $2.comp;
889 $$.last = $2.last;
891 else
892 $$.fn = $2;
894 | function_arglist start_opt
895 { $$.fn = $1;
896 $$.start = $2;
897 $$.comp = NULL; $$.last = NULL;
901 type : typespec_2
902 | typespec_2 abstract_declarator
903 { $$ = $2.comp;
904 *$2.last = $1;
908 declarator : ptr_operator declarator
909 { $$.comp = $2.comp;
910 $$.last = $1.last;
911 *$2.last = $1.comp; }
912 | direct_declarator
915 direct_declarator
916 : '(' declarator ')'
917 { $$ = $2; }
918 | direct_declarator function_arglist
919 { $$.comp = $1.comp;
920 *$1.last = $2.comp;
921 $$.last = $2.last;
923 | direct_declarator array_indicator
924 { $$.comp = $1.comp;
925 *$1.last = $2;
926 $$.last = &d_right ($2);
928 | colon_ext_name
929 { $$.comp = make_empty (DEMANGLE_COMPONENT_TYPED_NAME);
930 d_left ($$.comp) = $1;
931 $$.last = &d_right ($$.comp);
935 /* These are similar to declarator and direct_declarator except that they
936 do not permit ( colon_ext_name ), which is ambiguous with a function
937 argument list. They also don't permit a few other forms with redundant
938 parentheses around the colon_ext_name; any colon_ext_name in parentheses
939 must be followed by an argument list or an array indicator, or preceded
940 by a pointer. */
941 declarator_1 : ptr_operator declarator_1
942 { $$.comp = $2.comp;
943 $$.last = $1.last;
944 *$2.last = $1.comp; }
945 | colon_ext_name
946 { $$.comp = make_empty (DEMANGLE_COMPONENT_TYPED_NAME);
947 d_left ($$.comp) = $1;
948 $$.last = &d_right ($$.comp);
950 | direct_declarator_1
952 /* Function local variable or type. The typespec to
953 our left is the type of the containing function.
954 This should be OK, because function local types
955 can not be templates, so the return types of their
956 members will not be mangled. If they are hopefully
957 they'll end up to the right of the ::. */
958 | colon_ext_name function_arglist COLONCOLON start
959 { $$.comp = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
960 $$.last = $2.last;
961 $$.comp = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$.comp, $4);
963 | direct_declarator_1 function_arglist COLONCOLON start
964 { $$.comp = $1.comp;
965 *$1.last = $2.comp;
966 $$.last = $2.last;
967 $$.comp = fill_comp (DEMANGLE_COMPONENT_LOCAL_NAME, $$.comp, $4);
971 direct_declarator_1
972 : '(' ptr_operator declarator ')'
973 { $$.comp = $3.comp;
974 $$.last = $2.last;
975 *$3.last = $2.comp; }
976 | direct_declarator_1 function_arglist
977 { $$.comp = $1.comp;
978 *$1.last = $2.comp;
979 $$.last = $2.last;
981 | direct_declarator_1 array_indicator
982 { $$.comp = $1.comp;
983 *$1.last = $2;
984 $$.last = &d_right ($2);
986 | colon_ext_name function_arglist
987 { $$.comp = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2.comp);
988 $$.last = $2.last;
990 | colon_ext_name array_indicator
991 { $$.comp = fill_comp (DEMANGLE_COMPONENT_TYPED_NAME, $1, $2);
992 $$.last = &d_right ($2);
996 exp : '(' exp1 ')'
997 { $$ = $2; }
1000 /* Silly trick. Only allow '>' when parenthesized, in order to
1001 handle conflict with templates. */
1002 exp1 : exp
1005 exp1 : exp '>' exp
1006 { $$ = d_binary (">", $1, $3); }
1009 /* References. Not allowed everywhere in template parameters, only
1010 at the top level, but treat them as expressions in case they are wrapped
1011 in parentheses. */
1012 exp1 : '&' start
1013 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator ("&", 1), $2); }
1014 | '&' '(' start ')'
1015 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator ("&", 1), $3); }
1018 /* Expressions, not including the comma operator. */
1019 exp : '-' exp %prec UNARY
1020 { $$ = d_unary ("-", $2); }
1023 exp : '!' exp %prec UNARY
1024 { $$ = d_unary ("!", $2); }
1027 exp : '~' exp %prec UNARY
1028 { $$ = d_unary ("~", $2); }
1031 /* Casts. First your normal C-style cast. If exp is a LITERAL, just change
1032 its type. */
1034 exp : '(' type ')' exp %prec UNARY
1035 { if ($4->type == DEMANGLE_COMPONENT_LITERAL
1036 || $4->type == DEMANGLE_COMPONENT_LITERAL_NEG)
1038 $$ = $4;
1039 d_left ($4) = $2;
1041 else
1042 $$ = fill_comp (DEMANGLE_COMPONENT_UNARY,
1043 fill_comp (DEMANGLE_COMPONENT_CAST, $2, NULL),
1044 $4);
1048 /* Mangling does not differentiate between these, so we don't need to
1049 either. */
1050 exp : STATIC_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1051 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY,
1052 fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1053 $6);
1057 exp : DYNAMIC_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1058 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY,
1059 fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1060 $6);
1064 exp : REINTERPRET_CAST '<' type '>' '(' exp1 ')' %prec UNARY
1065 { $$ = fill_comp (DEMANGLE_COMPONENT_UNARY,
1066 fill_comp (DEMANGLE_COMPONENT_CAST, $3, NULL),
1067 $6);
1071 /* Another form of C++-style cast is "type ( exp1 )". This creates too many
1072 conflicts to support. For a while we supported the simpler
1073 "typespec_2 ( exp1 )", but that conflicts with "& ( start )" as a
1074 reference, deep within the wilderness of abstract declarators:
1075 Qux<int(&(*))> vs Qux<int(&(var))>, a shift-reduce conflict at the
1076 innermost left parenthesis. So we do not support function-like casts.
1077 Fortunately they never appear in demangler output. */
1079 /* TO INVESTIGATE: ._0 style anonymous names; anonymous namespaces */
1081 /* Binary operators in order of decreasing precedence. */
1083 exp : exp '*' exp
1084 { $$ = d_binary ("*", $1, $3); }
1087 exp : exp '/' exp
1088 { $$ = d_binary ("/", $1, $3); }
1091 exp : exp '%' exp
1092 { $$ = d_binary ("%", $1, $3); }
1095 exp : exp '+' exp
1096 { $$ = d_binary ("+", $1, $3); }
1099 exp : exp '-' exp
1100 { $$ = d_binary ("-", $1, $3); }
1103 exp : exp LSH exp
1104 { $$ = d_binary ("<<", $1, $3); }
1107 exp : exp RSH exp
1108 { $$ = d_binary (">>", $1, $3); }
1111 exp : exp EQUAL exp
1112 { $$ = d_binary ("==", $1, $3); }
1115 exp : exp NOTEQUAL exp
1116 { $$ = d_binary ("!=", $1, $3); }
1119 exp : exp LEQ exp
1120 { $$ = d_binary ("<=", $1, $3); }
1123 exp : exp GEQ exp
1124 { $$ = d_binary (">=", $1, $3); }
1127 exp : exp '<' exp
1128 { $$ = d_binary ("<", $1, $3); }
1131 exp : exp '&' exp
1132 { $$ = d_binary ("&", $1, $3); }
1135 exp : exp '^' exp
1136 { $$ = d_binary ("^", $1, $3); }
1139 exp : exp '|' exp
1140 { $$ = d_binary ("|", $1, $3); }
1143 exp : exp ANDAND exp
1144 { $$ = d_binary ("&&", $1, $3); }
1147 exp : exp OROR exp
1148 { $$ = d_binary ("||", $1, $3); }
1151 /* Not 100% sure these are necessary, but they're harmless. */
1152 exp : exp ARROW NAME
1153 { $$ = d_binary ("->", $1, $3); }
1156 exp : exp '.' NAME
1157 { $$ = d_binary (".", $1, $3); }
1160 exp : exp '?' exp ':' exp %prec '?'
1161 { $$ = fill_comp (DEMANGLE_COMPONENT_TRINARY, make_operator ("?", 3),
1162 fill_comp (DEMANGLE_COMPONENT_TRINARY_ARG1, $1,
1163 fill_comp (DEMANGLE_COMPONENT_TRINARY_ARG2, $3, $5)));
1167 exp : INT
1170 /* Not generally allowed. */
1171 exp : FLOAT
1174 exp : SIZEOF '(' type ')' %prec UNARY
1175 { $$ = d_unary ("sizeof", $3); }
1178 /* C++. */
1179 exp : TRUEKEYWORD
1180 { struct demangle_component *i;
1181 i = make_name ("1", 1);
1182 $$ = fill_comp (DEMANGLE_COMPONENT_LITERAL,
1183 make_builtin_type ("bool"),
1188 exp : FALSEKEYWORD
1189 { struct demangle_component *i;
1190 i = make_name ("0", 1);
1191 $$ = fill_comp (DEMANGLE_COMPONENT_LITERAL,
1192 make_builtin_type ("bool"),
1197 /* end of C++. */
1201 /* Apply QUALIFIERS to LHS and return a qualified component. IS_METHOD
1202 is set if LHS is a method, in which case the qualifiers are logically
1203 applied to "this". We apply qualifiers in a consistent order; LHS
1204 may already be qualified; duplicate qualifiers are not created. */
1206 struct demangle_component *
1207 d_qualify (struct demangle_component *lhs, int qualifiers, int is_method)
1209 struct demangle_component **inner_p;
1210 enum demangle_component_type type;
1212 /* For now the order is CONST (innermost), VOLATILE, RESTRICT. */
1214 #define HANDLE_QUAL(TYPE, MTYPE, QUAL) \
1215 if ((qualifiers & QUAL) && (type != TYPE) && (type != MTYPE)) \
1217 *inner_p = fill_comp (is_method ? MTYPE : TYPE, \
1218 *inner_p, NULL); \
1219 inner_p = &d_left (*inner_p); \
1220 type = (*inner_p)->type; \
1222 else if (type == TYPE || type == MTYPE) \
1224 inner_p = &d_left (*inner_p); \
1225 type = (*inner_p)->type; \
1228 inner_p = &lhs;
1230 type = (*inner_p)->type;
1232 HANDLE_QUAL (DEMANGLE_COMPONENT_RESTRICT, DEMANGLE_COMPONENT_RESTRICT_THIS, QUAL_RESTRICT);
1233 HANDLE_QUAL (DEMANGLE_COMPONENT_VOLATILE, DEMANGLE_COMPONENT_VOLATILE_THIS, QUAL_VOLATILE);
1234 HANDLE_QUAL (DEMANGLE_COMPONENT_CONST, DEMANGLE_COMPONENT_CONST_THIS, QUAL_CONST);
1236 return lhs;
1239 /* Return a builtin type corresponding to FLAGS. */
1241 static struct demangle_component *
1242 d_int_type (int flags)
1244 const char *name;
1246 switch (flags)
1248 case INT_SIGNED | INT_CHAR:
1249 name = "signed char";
1250 break;
1251 case INT_CHAR:
1252 name = "char";
1253 break;
1254 case INT_UNSIGNED | INT_CHAR:
1255 name = "unsigned char";
1256 break;
1257 case 0:
1258 case INT_SIGNED:
1259 name = "int";
1260 break;
1261 case INT_UNSIGNED:
1262 name = "unsigned int";
1263 break;
1264 case INT_LONG:
1265 case INT_SIGNED | INT_LONG:
1266 name = "long";
1267 break;
1268 case INT_UNSIGNED | INT_LONG:
1269 name = "unsigned long";
1270 break;
1271 case INT_SHORT:
1272 case INT_SIGNED | INT_SHORT:
1273 name = "short";
1274 break;
1275 case INT_UNSIGNED | INT_SHORT:
1276 name = "unsigned short";
1277 break;
1278 case INT_LLONG | INT_LONG:
1279 case INT_SIGNED | INT_LLONG | INT_LONG:
1280 name = "long long";
1281 break;
1282 case INT_UNSIGNED | INT_LLONG | INT_LONG:
1283 name = "unsigned long long";
1284 break;
1285 default:
1286 return NULL;
1289 return make_builtin_type (name);
1292 /* Wrapper to create a unary operation. */
1294 static struct demangle_component *
1295 d_unary (const char *name, struct demangle_component *lhs)
1297 return fill_comp (DEMANGLE_COMPONENT_UNARY, make_operator (name, 1), lhs);
1300 /* Wrapper to create a binary operation. */
1302 static struct demangle_component *
1303 d_binary (const char *name, struct demangle_component *lhs, struct demangle_component *rhs)
1305 return fill_comp (DEMANGLE_COMPONENT_BINARY, make_operator (name, 2),
1306 fill_comp (DEMANGLE_COMPONENT_BINARY_ARGS, lhs, rhs));
1309 /* Find the end of a symbol name starting at LEXPTR. */
1311 static const char *
1312 symbol_end (const char *lexptr)
1314 const char *p = lexptr;
1316 while (*p && (ISALNUM (*p) || *p == '_' || *p == '$' || *p == '.'))
1317 p++;
1319 return p;
1322 /* Take care of parsing a number (anything that starts with a digit).
1323 The number starts at P and contains LEN characters. Store the result in
1324 YYLVAL. */
1326 static int
1327 parse_number (const char *p, int len, int parsed_float)
1329 int unsigned_p = 0;
1331 /* Number of "L" suffixes encountered. */
1332 int long_p = 0;
1334 struct demangle_component *signed_type;
1335 struct demangle_component *unsigned_type;
1336 struct demangle_component *type, *name;
1337 enum demangle_component_type literal_type;
1339 if (p[0] == '-')
1341 literal_type = DEMANGLE_COMPONENT_LITERAL_NEG;
1342 p++;
1343 len--;
1345 else
1346 literal_type = DEMANGLE_COMPONENT_LITERAL;
1348 if (parsed_float)
1350 /* It's a float since it contains a point or an exponent. */
1351 char c;
1353 /* The GDB lexer checks the result of scanf at this point. Not doing
1354 this leaves our error checking slightly weaker but only for invalid
1355 data. */
1357 /* See if it has `f' or `l' suffix (float or long double). */
1359 c = TOLOWER (p[len - 1]);
1361 if (c == 'f')
1363 len--;
1364 type = make_builtin_type ("float");
1366 else if (c == 'l')
1368 len--;
1369 type = make_builtin_type ("long double");
1371 else if (ISDIGIT (c) || c == '.')
1372 type = make_builtin_type ("double");
1373 else
1374 return ERROR;
1376 name = make_name (p, len);
1377 yylval.comp = fill_comp (literal_type, type, name);
1379 return FLOAT;
1382 /* This treats 0x1 and 1 as different literals. We also do not
1383 automatically generate unsigned types. */
1385 long_p = 0;
1386 unsigned_p = 0;
1387 while (len > 0)
1389 if (p[len - 1] == 'l' || p[len - 1] == 'L')
1391 len--;
1392 long_p++;
1393 continue;
1395 if (p[len - 1] == 'u' || p[len - 1] == 'U')
1397 len--;
1398 unsigned_p++;
1399 continue;
1401 break;
1404 if (long_p == 0)
1406 unsigned_type = make_builtin_type ("unsigned int");
1407 signed_type = make_builtin_type ("int");
1409 else if (long_p == 1)
1411 unsigned_type = make_builtin_type ("unsigned long");
1412 signed_type = make_builtin_type ("long");
1414 else
1416 unsigned_type = make_builtin_type ("unsigned long long");
1417 signed_type = make_builtin_type ("long long");
1420 if (unsigned_p)
1421 type = unsigned_type;
1422 else
1423 type = signed_type;
1425 name = make_name (p, len);
1426 yylval.comp = fill_comp (literal_type, type, name);
1428 return INT;
1431 static char backslashable[] = "abefnrtv";
1432 static char represented[] = "\a\b\e\f\n\r\t\v";
1434 /* Translate the backslash the way we would in the host character set. */
1435 static int
1436 c_parse_backslash (int host_char, int *target_char)
1438 const char *ix;
1439 ix = strchr (backslashable, host_char);
1440 if (! ix)
1441 return 0;
1442 else
1443 *target_char = represented[ix - backslashable];
1444 return 1;
1447 /* Parse a C escape sequence. STRING_PTR points to a variable
1448 containing a pointer to the string to parse. That pointer
1449 should point to the character after the \. That pointer
1450 is updated past the characters we use. The value of the
1451 escape sequence is returned.
1453 A negative value means the sequence \ newline was seen,
1454 which is supposed to be equivalent to nothing at all.
1456 If \ is followed by a null character, we return a negative
1457 value and leave the string pointer pointing at the null character.
1459 If \ is followed by 000, we return 0 and leave the string pointer
1460 after the zeros. A value of 0 does not mean end of string. */
1462 static int
1463 parse_escape (const char **string_ptr)
1465 int target_char;
1466 int c = *(*string_ptr)++;
1467 if (c_parse_backslash (c, &target_char))
1468 return target_char;
1469 else
1470 switch (c)
1472 case '\n':
1473 return -2;
1474 case 0:
1475 (*string_ptr)--;
1476 return 0;
1477 case '^':
1479 c = *(*string_ptr)++;
1481 if (c == '?')
1482 return 0177;
1483 else if (c == '\\')
1484 target_char = parse_escape (string_ptr);
1485 else
1486 target_char = c;
1488 /* Now target_char is something like `c', and we want to find
1489 its control-character equivalent. */
1490 target_char = target_char & 037;
1492 return target_char;
1495 case '0':
1496 case '1':
1497 case '2':
1498 case '3':
1499 case '4':
1500 case '5':
1501 case '6':
1502 case '7':
1504 int i = c - '0';
1505 int count = 0;
1506 while (++count < 3)
1508 c = (**string_ptr);
1509 if (c >= '0' && c <= '7')
1511 (*string_ptr)++;
1512 i *= 8;
1513 i += c - '0';
1515 else
1517 break;
1520 return i;
1522 default:
1523 return c;
1527 #define HANDLE_SPECIAL(string, comp) \
1528 if (strncmp (tokstart, string, sizeof (string) - 1) == 0) \
1530 lexptr = tokstart + sizeof (string) - 1; \
1531 yylval.lval = comp; \
1532 return DEMANGLER_SPECIAL; \
1535 #define HANDLE_TOKEN2(string, token) \
1536 if (lexptr[1] == string[1]) \
1538 lexptr += 2; \
1539 yylval.opname = string; \
1540 return token; \
1543 #define HANDLE_TOKEN3(string, token) \
1544 if (lexptr[1] == string[1] && lexptr[2] == string[2]) \
1546 lexptr += 3; \
1547 yylval.opname = string; \
1548 return token; \
1551 /* Read one token, getting characters through LEXPTR. */
1553 static int
1554 yylex (void)
1556 int c;
1557 int namelen;
1558 const char *tokstart, *tokptr;
1560 retry:
1561 prev_lexptr = lexptr;
1562 tokstart = lexptr;
1564 switch (c = *tokstart)
1566 case 0:
1567 return 0;
1569 case ' ':
1570 case '\t':
1571 case '\n':
1572 lexptr++;
1573 goto retry;
1575 case '\'':
1576 /* We either have a character constant ('0' or '\177' for example)
1577 or we have a quoted symbol reference ('foo(int,int)' in C++
1578 for example). */
1579 lexptr++;
1580 c = *lexptr++;
1581 if (c == '\\')
1582 c = parse_escape (&lexptr);
1583 else if (c == '\'')
1585 yyerror ("empty character constant");
1586 return ERROR;
1589 c = *lexptr++;
1590 if (c != '\'')
1592 yyerror ("invalid character constant");
1593 return ERROR;
1596 /* FIXME: We should refer to a canonical form of the character,
1597 presumably the same one that appears in manglings - the decimal
1598 representation. But if that isn't in our input then we have to
1599 allocate memory for it somewhere. */
1600 yylval.comp = fill_comp (DEMANGLE_COMPONENT_LITERAL,
1601 make_builtin_type ("char"),
1602 make_name (tokstart, lexptr - tokstart));
1604 return INT;
1606 case '(':
1607 if (strncmp (tokstart, "(anonymous namespace)", 21) == 0)
1609 lexptr += 21;
1610 yylval.comp = make_name ("(anonymous namespace)",
1611 sizeof "(anonymous namespace)" - 1);
1612 return NAME;
1614 /* FALL THROUGH */
1616 case ')':
1617 case ',':
1618 lexptr++;
1619 return c;
1621 case '.':
1622 if (lexptr[1] == '.' && lexptr[2] == '.')
1624 lexptr += 3;
1625 return ELLIPSIS;
1628 /* Might be a floating point number. */
1629 if (lexptr[1] < '0' || lexptr[1] > '9')
1630 goto symbol; /* Nope, must be a symbol. */
1632 goto try_number;
1634 case '-':
1635 HANDLE_TOKEN2 ("-=", ASSIGN_MODIFY);
1636 HANDLE_TOKEN2 ("--", DECREMENT);
1637 HANDLE_TOKEN2 ("->", ARROW);
1639 /* For construction vtables. This is kind of hokey. */
1640 if (strncmp (tokstart, "-in-", 4) == 0)
1642 lexptr += 4;
1643 return CONSTRUCTION_IN;
1646 if (lexptr[1] < '0' || lexptr[1] > '9')
1648 lexptr++;
1649 return '-';
1651 /* FALL THRU into number case. */
1653 try_number:
1654 case '0':
1655 case '1':
1656 case '2':
1657 case '3':
1658 case '4':
1659 case '5':
1660 case '6':
1661 case '7':
1662 case '8':
1663 case '9':
1665 /* It's a number. */
1666 int got_dot = 0, got_e = 0, toktype;
1667 const char *p = tokstart;
1668 int hex = 0;
1670 if (c == '-')
1671 p++;
1673 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1675 p += 2;
1676 hex = 1;
1678 else if (c == '0' && (p[1]=='t' || p[1]=='T' || p[1]=='d' || p[1]=='D'))
1680 p += 2;
1681 hex = 0;
1684 for (;; ++p)
1686 /* This test includes !hex because 'e' is a valid hex digit
1687 and thus does not indicate a floating point number when
1688 the radix is hex. */
1689 if (!hex && !got_e && (*p == 'e' || *p == 'E'))
1690 got_dot = got_e = 1;
1691 /* This test does not include !hex, because a '.' always indicates
1692 a decimal floating point number regardless of the radix.
1694 NOTE drow/2005-03-09: This comment is not accurate in C99;
1695 however, it's not clear that all the floating point support
1696 in this file is doing any good here. */
1697 else if (!got_dot && *p == '.')
1698 got_dot = 1;
1699 else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
1700 && (*p == '-' || *p == '+'))
1701 /* This is the sign of the exponent, not the end of the
1702 number. */
1703 continue;
1704 /* We will take any letters or digits. parse_number will
1705 complain if past the radix, or if L or U are not final. */
1706 else if (! ISALNUM (*p))
1707 break;
1709 toktype = parse_number (tokstart, p - tokstart, got_dot|got_e);
1710 if (toktype == ERROR)
1712 char *err_copy = (char *) alloca (p - tokstart + 1);
1714 memcpy (err_copy, tokstart, p - tokstart);
1715 err_copy[p - tokstart] = 0;
1716 yyerror ("invalid number");
1717 return ERROR;
1719 lexptr = p;
1720 return toktype;
1723 case '+':
1724 HANDLE_TOKEN2 ("+=", ASSIGN_MODIFY);
1725 HANDLE_TOKEN2 ("++", INCREMENT);
1726 lexptr++;
1727 return c;
1728 case '*':
1729 HANDLE_TOKEN2 ("*=", ASSIGN_MODIFY);
1730 lexptr++;
1731 return c;
1732 case '/':
1733 HANDLE_TOKEN2 ("/=", ASSIGN_MODIFY);
1734 lexptr++;
1735 return c;
1736 case '%':
1737 HANDLE_TOKEN2 ("%=", ASSIGN_MODIFY);
1738 lexptr++;
1739 return c;
1740 case '|':
1741 HANDLE_TOKEN2 ("|=", ASSIGN_MODIFY);
1742 HANDLE_TOKEN2 ("||", OROR);
1743 lexptr++;
1744 return c;
1745 case '&':
1746 HANDLE_TOKEN2 ("&=", ASSIGN_MODIFY);
1747 HANDLE_TOKEN2 ("&&", ANDAND);
1748 lexptr++;
1749 return c;
1750 case '^':
1751 HANDLE_TOKEN2 ("^=", ASSIGN_MODIFY);
1752 lexptr++;
1753 return c;
1754 case '!':
1755 HANDLE_TOKEN2 ("!=", NOTEQUAL);
1756 lexptr++;
1757 return c;
1758 case '<':
1759 HANDLE_TOKEN3 ("<<=", ASSIGN_MODIFY);
1760 HANDLE_TOKEN2 ("<=", LEQ);
1761 HANDLE_TOKEN2 ("<<", LSH);
1762 lexptr++;
1763 return c;
1764 case '>':
1765 HANDLE_TOKEN3 (">>=", ASSIGN_MODIFY);
1766 HANDLE_TOKEN2 (">=", GEQ);
1767 HANDLE_TOKEN2 (">>", RSH);
1768 lexptr++;
1769 return c;
1770 case '=':
1771 HANDLE_TOKEN2 ("==", EQUAL);
1772 lexptr++;
1773 return c;
1774 case ':':
1775 HANDLE_TOKEN2 ("::", COLONCOLON);
1776 lexptr++;
1777 return c;
1779 case '[':
1780 case ']':
1781 case '?':
1782 case '@':
1783 case '~':
1784 case '{':
1785 case '}':
1786 symbol:
1787 lexptr++;
1788 return c;
1790 case '"':
1791 /* These can't occur in C++ names. */
1792 yyerror ("unexpected string literal");
1793 return ERROR;
1796 if (!(c == '_' || c == '$' || ISALPHA (c)))
1798 /* We must have come across a bad character (e.g. ';'). */
1799 yyerror ("invalid character");
1800 return ERROR;
1803 /* It's a name. See how long it is. */
1804 namelen = 0;
1806 c = tokstart[++namelen];
1807 while (ISALNUM (c) || c == '_' || c == '$');
1809 lexptr += namelen;
1811 /* Catch specific keywords. Notice that some of the keywords contain
1812 spaces, and are sorted by the length of the first word. They must
1813 all include a trailing space in the string comparison. */
1814 switch (namelen)
1816 case 16:
1817 if (strncmp (tokstart, "reinterpret_cast", 16) == 0)
1818 return REINTERPRET_CAST;
1819 break;
1820 case 12:
1821 if (strncmp (tokstart, "construction vtable for ", 24) == 0)
1823 lexptr = tokstart + 24;
1824 return CONSTRUCTION_VTABLE;
1826 if (strncmp (tokstart, "dynamic_cast", 12) == 0)
1827 return DYNAMIC_CAST;
1828 break;
1829 case 11:
1830 if (strncmp (tokstart, "static_cast", 11) == 0)
1831 return STATIC_CAST;
1832 break;
1833 case 9:
1834 HANDLE_SPECIAL ("covariant return thunk to ", DEMANGLE_COMPONENT_COVARIANT_THUNK);
1835 HANDLE_SPECIAL ("reference temporary for ", DEMANGLE_COMPONENT_REFTEMP);
1836 break;
1837 case 8:
1838 HANDLE_SPECIAL ("typeinfo for ", DEMANGLE_COMPONENT_TYPEINFO);
1839 HANDLE_SPECIAL ("typeinfo fn for ", DEMANGLE_COMPONENT_TYPEINFO_FN);
1840 HANDLE_SPECIAL ("typeinfo name for ", DEMANGLE_COMPONENT_TYPEINFO_NAME);
1841 if (strncmp (tokstart, "operator", 8) == 0)
1842 return OPERATOR;
1843 if (strncmp (tokstart, "restrict", 8) == 0)
1844 return RESTRICT;
1845 if (strncmp (tokstart, "unsigned", 8) == 0)
1846 return UNSIGNED;
1847 if (strncmp (tokstart, "template", 8) == 0)
1848 return TEMPLATE;
1849 if (strncmp (tokstart, "volatile", 8) == 0)
1850 return VOLATILE_KEYWORD;
1851 break;
1852 case 7:
1853 HANDLE_SPECIAL ("virtual thunk to ", DEMANGLE_COMPONENT_VIRTUAL_THUNK);
1854 if (strncmp (tokstart, "wchar_t", 7) == 0)
1855 return WCHAR_T;
1856 break;
1857 case 6:
1858 if (strncmp (tokstart, "global constructors keyed to ", 29) == 0)
1860 const char *p;
1861 lexptr = tokstart + 29;
1862 yylval.typed_val_int.val = GLOBAL_CONSTRUCTORS;
1863 /* Find the end of the symbol. */
1864 p = symbol_end (lexptr);
1865 yylval.typed_val_int.type = make_name (lexptr, p - lexptr);
1866 lexptr = p;
1867 return GLOBAL;
1869 if (strncmp (tokstart, "global destructors keyed to ", 28) == 0)
1871 const char *p;
1872 lexptr = tokstart + 28;
1873 yylval.typed_val_int.val = GLOBAL_DESTRUCTORS;
1874 /* Find the end of the symbol. */
1875 p = symbol_end (lexptr);
1876 yylval.typed_val_int.type = make_name (lexptr, p - lexptr);
1877 lexptr = p;
1878 return GLOBAL;
1881 HANDLE_SPECIAL ("vtable for ", DEMANGLE_COMPONENT_VTABLE);
1882 if (strncmp (tokstart, "delete", 6) == 0)
1883 return DELETE;
1884 if (strncmp (tokstart, "struct", 6) == 0)
1885 return STRUCT;
1886 if (strncmp (tokstart, "signed", 6) == 0)
1887 return SIGNED_KEYWORD;
1888 if (strncmp (tokstart, "sizeof", 6) == 0)
1889 return SIZEOF;
1890 if (strncmp (tokstart, "double", 6) == 0)
1891 return DOUBLE_KEYWORD;
1892 break;
1893 case 5:
1894 HANDLE_SPECIAL ("guard variable for ", DEMANGLE_COMPONENT_GUARD);
1895 if (strncmp (tokstart, "false", 5) == 0)
1896 return FALSEKEYWORD;
1897 if (strncmp (tokstart, "class", 5) == 0)
1898 return CLASS;
1899 if (strncmp (tokstart, "union", 5) == 0)
1900 return UNION;
1901 if (strncmp (tokstart, "float", 5) == 0)
1902 return FLOAT_KEYWORD;
1903 if (strncmp (tokstart, "short", 5) == 0)
1904 return SHORT;
1905 if (strncmp (tokstart, "const", 5) == 0)
1906 return CONST_KEYWORD;
1907 break;
1908 case 4:
1909 if (strncmp (tokstart, "void", 4) == 0)
1910 return VOID;
1911 if (strncmp (tokstart, "bool", 4) == 0)
1912 return BOOL;
1913 if (strncmp (tokstart, "char", 4) == 0)
1914 return CHAR;
1915 if (strncmp (tokstart, "enum", 4) == 0)
1916 return ENUM;
1917 if (strncmp (tokstart, "long", 4) == 0)
1918 return LONG;
1919 if (strncmp (tokstart, "true", 4) == 0)
1920 return TRUEKEYWORD;
1921 break;
1922 case 3:
1923 HANDLE_SPECIAL ("VTT for ", DEMANGLE_COMPONENT_VTT);
1924 HANDLE_SPECIAL ("non-virtual thunk to ", DEMANGLE_COMPONENT_THUNK);
1925 if (strncmp (tokstart, "new", 3) == 0)
1926 return NEW;
1927 if (strncmp (tokstart, "int", 3) == 0)
1928 return INT_KEYWORD;
1929 break;
1930 default:
1931 break;
1934 yylval.comp = make_name (tokstart, namelen);
1935 return NAME;
1938 static void
1939 yyerror (char *msg)
1941 if (global_errmsg)
1942 return;
1944 error_lexptr = prev_lexptr;
1945 global_errmsg = msg ? msg : "parse error";
1948 /* Allocate a chunk of the components we'll need to build a tree. We
1949 generally allocate too many components, but the extra memory usage
1950 doesn't hurt because the trees are temporary and the storage is
1951 reused. More may be allocated later, by d_grab. */
1952 static void
1953 allocate_info (void)
1955 if (demangle_info == NULL)
1957 demangle_info = malloc (sizeof (struct demangle_info));
1958 demangle_info->prev = NULL;
1959 demangle_info->next = NULL;
1961 else
1962 while (demangle_info->prev)
1963 demangle_info = demangle_info->prev;
1965 demangle_info->used = 0;
1968 /* Convert RESULT to a string. The return value is allocated
1969 using xmalloc. ESTIMATED_LEN is used only as a guide to the
1970 length of the result. This functions handles a few cases that
1971 cplus_demangle_print does not, specifically the global destructor
1972 and constructor labels. */
1974 char *
1975 cp_comp_to_string (struct demangle_component *result, int estimated_len)
1977 char *str, *prefix = NULL, *buf;
1978 size_t err = 0;
1980 if (result->type == GLOBAL_DESTRUCTORS)
1982 result = d_left (result);
1983 prefix = "global destructors keyed to ";
1985 else if (result->type == GLOBAL_CONSTRUCTORS)
1987 result = d_left (result);
1988 prefix = "global constructors keyed to ";
1991 str = cplus_demangle_print (DMGL_PARAMS | DMGL_ANSI, result, estimated_len, &err);
1992 if (str == NULL)
1993 return NULL;
1995 if (prefix == NULL)
1996 return str;
1998 buf = malloc (strlen (str) + strlen (prefix) + 1);
1999 strcpy (buf, prefix);
2000 strcat (buf, str);
2001 free (str);
2002 return (buf);
2005 /* Convert a demangled name to a demangle_component tree. On success,
2006 the root of the new tree is returned; it is valid until the next
2007 call to this function and should not be freed. On error, NULL is
2008 returned, and an error message will be set in *ERRMSG (which does
2009 not need to be freed). */
2011 struct demangle_component *
2012 cp_demangled_name_to_comp (const char *demangled_name, const char **errmsg)
2014 static char errbuf[60];
2015 struct demangle_component *result;
2017 prev_lexptr = lexptr = demangled_name;
2018 error_lexptr = NULL;
2019 global_errmsg = NULL;
2021 allocate_info ();
2023 if (yyparse ())
2025 if (global_errmsg && errmsg)
2027 snprintf (errbuf, sizeof (errbuf) - 2, "%s, near `%s",
2028 global_errmsg, error_lexptr);
2029 strcat (errbuf, "'");
2030 *errmsg = errbuf;
2032 return NULL;
2035 result = global_result;
2036 global_result = NULL;
2038 return result;
2041 #ifdef TEST_CPNAMES
2043 static void
2044 cp_print (struct demangle_component *result)
2046 char *str;
2047 size_t err = 0;
2049 if (result->type == GLOBAL_DESTRUCTORS)
2051 result = d_left (result);
2052 fputs ("global destructors keyed to ", stdout);
2054 else if (result->type == GLOBAL_CONSTRUCTORS)
2056 result = d_left (result);
2057 fputs ("global constructors keyed to ", stdout);
2060 str = cplus_demangle_print (DMGL_PARAMS | DMGL_ANSI, result, 64, &err);
2061 if (str == NULL)
2062 return;
2064 fputs (str, stdout);
2066 free (str);
2069 static char
2070 trim_chars (char *lexptr, char **extra_chars)
2072 char *p = (char *) symbol_end (lexptr);
2073 char c = 0;
2075 if (*p)
2077 c = *p;
2078 *p = 0;
2079 *extra_chars = p + 1;
2082 return c;
2086 main (int argc, char **argv)
2088 char *str2, *extra_chars = "", c;
2089 char buf[65536];
2090 int arg;
2091 const char *errmsg;
2092 struct demangle_component *result;
2094 arg = 1;
2095 if (argv[arg] && strcmp (argv[arg], "--debug") == 0)
2097 yydebug = 1;
2098 arg++;
2101 if (argv[arg] == NULL)
2102 while (fgets (buf, 65536, stdin) != NULL)
2104 int len;
2105 buf[strlen (buf) - 1] = 0;
2106 /* Use DMGL_VERBOSE to get expanded standard substitutions. */
2107 c = trim_chars (buf, &extra_chars);
2108 str2 = cplus_demangle (buf, DMGL_PARAMS | DMGL_ANSI | DMGL_VERBOSE);
2109 if (str2 == NULL)
2111 /* printf ("Demangling error\n"); */
2112 if (c)
2113 printf ("%s%c%s\n", buf, c, extra_chars);
2114 else
2115 printf ("%s\n", buf);
2116 continue;
2118 result = cp_demangled_name_to_comp (str2, &errmsg);
2119 if (result == NULL)
2121 fputs (errmsg, stderr);
2122 fputc ('\n', stderr);
2123 continue;
2126 cp_print (result);
2128 free (str2);
2129 if (c)
2131 putchar (c);
2132 fputs (extra_chars, stdout);
2134 putchar ('\n');
2136 else
2138 result = cp_demangled_name_to_comp (argv[arg], &errmsg);
2139 if (result == NULL)
2141 fputs (errmsg, stderr);
2142 fputc ('\n', stderr);
2143 return 0;
2145 cp_print (result);
2146 putchar ('\n');
2148 return 0;
2151 #endif