2 * Demangle VC++ symbols into C function prototypes
4 * Copyright 2000 Jon Griffiths
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #include "wine/port.h"
26 /* Type for parsing mangled types */
27 typedef struct _compound_type
36 /* Initialise a compound type structure */
37 #define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0)
39 /* free the memory used by a compound structure */
40 #define FREE_CT(ct) free (ct.expression)
42 /* Flags for data types */
43 #define DATA_VTABLE 0x1
45 /* Internal functions */
46 static char *demangle_datatype (char **str
, compound_type
*ct
,
49 static char *get_constraints_convention_1 (char **str
, compound_type
*ct
);
51 static char *get_constraints_convention_2 (char **str
, compound_type
*ct
);
53 static char *get_type_string (const char c
, const int constraints
);
55 static int get_type_constant (const char c
, const int constraints
);
57 static char *get_pointer_type_string (compound_type
*ct
,
58 const char *expression
);
61 /*******************************************************************
64 * Demangle a C++ linker symbol into a C prototype
66 int symbol_demangle (parsed_symbol
*sym
)
69 int is_static
= 0, is_const
= 0;
70 char *function_name
= NULL
;
71 char *class_name
= NULL
;
73 const char *const_status
;
74 static unsigned int hash
= 0; /* In case of overloaded functions */
75 unsigned int data_flags
= 0;
77 assert (globals
.do_code
);
78 assert (sym
&& sym
->symbol
);
82 /* MS mangled names always begin with '?' */
88 puts ("Attempting to demangle symbol");
90 /* Then function name or operator code */
93 /* C++ operator code (one character, or two if the first is '_') */
96 case '0': function_name
= strdup ("ctor"); break;
97 case '1': function_name
= strdup ("dtor"); break;
98 case '2': function_name
= strdup ("operator_new"); break;
99 case '3': function_name
= strdup ("operator_delete"); break;
100 case '4': function_name
= strdup ("operator_equals"); break;
101 case '5': function_name
= strdup ("operator_shiftright"); break;
102 case '6': function_name
= strdup ("operator_shiftleft"); break;
103 case '7': function_name
= strdup ("operator_not"); break;
104 case '8': function_name
= strdup ("operator_equalsequals"); break;
105 case '9': function_name
= strdup ("operator_notequals"); break;
106 case 'A': function_name
= strdup ("operator_array"); break;
107 case 'C': function_name
= strdup ("operator_dereference"); break;
108 case 'D': function_name
= strdup ("operator_multiply"); break;
109 case 'E': function_name
= strdup ("operator_plusplus"); break;
110 case 'F': function_name
= strdup ("operator_minusminus"); break;
111 case 'G': function_name
= strdup ("operator_minus"); break;
112 case 'H': function_name
= strdup ("operator_plus"); break;
113 case 'I': function_name
= strdup ("operator_address"); break;
114 case 'J': function_name
= strdup ("operator_dereferencememberptr"); break;
115 case 'K': function_name
= strdup ("operator_divide"); break;
116 case 'L': function_name
= strdup ("operator_modulo"); break;
117 case 'M': function_name
= strdup ("operator_lessthan"); break;
118 case 'N': function_name
= strdup ("operator_lessthanequal"); break;
119 case 'O': function_name
= strdup ("operator_greaterthan"); break;
120 case 'P': function_name
= strdup ("operator_greaterthanequal"); break;
121 case 'Q': function_name
= strdup ("operator_comma"); break;
122 case 'R': function_name
= strdup ("operator_functioncall"); break;
123 case 'S': function_name
= strdup ("operator_complement"); break;
124 case 'T': function_name
= strdup ("operator_xor"); break;
125 case 'U': function_name
= strdup ("operator_logicalor"); break;
126 case 'V': function_name
= strdup ("operator_logicaland"); break;
127 case 'W': function_name
= strdup ("operator_or"); break;
128 case 'X': function_name
= strdup ("operator_multiplyequals"); break;
129 case 'Y': function_name
= strdup ("operator_plusequals"); break;
130 case 'Z': function_name
= strdup ("operator_minusequals"); break;
134 case '0': function_name
= strdup ("operator_divideequals"); break;
135 case '1': function_name
= strdup ("operator_moduloequals"); break;
136 case '2': function_name
= strdup ("operator_shiftrightequals"); break;
137 case '3': function_name
= strdup ("operator_shiftleftequals"); break;
138 case '4': function_name
= strdup ("operator_andequals"); break;
139 case '5': function_name
= strdup ("operator_orequals"); break;
140 case '6': function_name
= strdup ("operator_xorequals"); break;
141 case '7': function_name
= strdup ("vftable"); data_flags
= DATA_VTABLE
; break;
142 case '8': function_name
= strdup ("vbtable"); data_flags
= DATA_VTABLE
; break;
143 case '9': function_name
= strdup ("vcall"); data_flags
= DATA_VTABLE
; break;
144 case 'A': function_name
= strdup ("typeof"); data_flags
= DATA_VTABLE
; break;
145 case 'B': function_name
= strdup ("local_static_guard"); data_flags
= DATA_VTABLE
; break;
146 case 'C': function_name
= strdup ("string"); data_flags
= DATA_VTABLE
; break;
147 case 'D': function_name
= strdup ("vbase_dtor"); data_flags
= DATA_VTABLE
; break;
148 case 'E': function_name
= strdup ("vector_dtor"); break;
149 case 'G': function_name
= strdup ("scalar_dtor"); break;
150 case 'H': function_name
= strdup ("vector_ctor_iter"); break;
151 case 'I': function_name
= strdup ("vector_dtor_iter"); break;
152 case 'J': function_name
= strdup ("vector_vbase_ctor_iter"); break;
153 case 'L': function_name
= strdup ("eh_vector_ctor_iter"); break;
154 case 'M': function_name
= strdup ("eh_vector_dtor_iter"); break;
155 case 'N': function_name
= strdup ("eh_vector_vbase_ctor_iter"); break;
156 case 'O': function_name
= strdup ("copy_ctor_closure"); break;
157 case 'S': function_name
= strdup ("local_vftable"); data_flags
= DATA_VTABLE
; break;
158 case 'T': function_name
= strdup ("local_vftable_ctor_closure"); break;
159 case 'U': function_name
= strdup ("operator_new_vector"); break;
160 case 'V': function_name
= strdup ("operator_delete_vector"); break;
161 case 'X': function_name
= strdup ("placement_new_closure"); break;
162 case 'Y': function_name
= strdup ("placement_delete_closure"); break;
168 /* FIXME: Other operators */
175 /* Type or function name terminated by '@' */
176 function_name
= name
;
177 while (*name
&& *name
++ != '@') ;
180 function_name
= str_substring (function_name
, name
- 1);
183 /* Either a class name, or '@' if the symbol is not a class member */
186 class_name
= strdup ("global"); /* Non member function (or a datatype) */
191 /* Class the function is associated with, terminated by '@@' */
193 while (*name
&& *name
++ != '@') ;
194 if (*name
++ != '@') {
195 free (function_name
);
198 class_name
= str_substring (class_name
, name
- 2);
201 /* Function/Data type and access level */
202 /* FIXME: why 2 possible letters for each option? */
207 case '0' : /* private static */
208 case '1' : /* protected static */
209 case '2' : /* public static */
212 case '3' : /* non static */
213 case '4' : /* non static */
214 /* Data members need to be implemented: report */
216 if (!demangle_datatype (&name
, &ct
, sym
))
219 printf ("/*FIXME: %s: unknown data*/\n", sym
->symbol
);
220 free (function_name
);
223 sym
->flags
|= SYM_DATA
;
225 sym
->arg_name
[0] = str_create (5, OUTPUT_UC_DLL_NAME
, "_", class_name
,
226 is_static
? "static_" : "_", function_name
);
227 sym
->arg_text
[0] = str_create (3, ct
.expression
, " ", sym
->arg_name
[0]);
229 free (function_name
);
233 case '6' : /* compiler generated static */
234 case '7' : /* compiler generated static */
235 if (data_flags
& DATA_VTABLE
)
237 sym
->flags
|= SYM_DATA
;
239 sym
->arg_name
[0] = str_create (5, OUTPUT_UC_DLL_NAME
, "_", class_name
,
241 sym
->arg_text
[0] = str_create (2, "void *", sym
->arg_name
[0]);
244 puts ("Demangled symbol OK [vtable]");
245 free (function_name
);
248 free (function_name
);
254 case 'E' : /* private virtual */
255 case 'F' : /* private virtual */
256 case 'M' : /* protected virtual */
257 case 'N' : /* protected virtual */
258 case 'U' : /* public virtual */
259 case 'V' : /* public virtual */
260 /* Virtual functions need to be added to the exported vtable: report */
262 printf ("/*FIXME %s: %s::%s is virtual-add to vftable*/\n", sym
->symbol
,
263 class_name
, function_name
);
265 case 'A' : /* private */
266 case 'B' : /* private */
267 case 'I' : /* protected */
268 case 'J' : /* protected */
269 case 'Q' : /* public */
270 case 'R' : /* public */
271 /* Implicit 'this' pointer */
272 sym
->arg_text
[sym
->argc
] = str_create (3, "struct ", class_name
, " *");
273 sym
->arg_type
[sym
->argc
] = ARG_POINTER
;
274 sym
->arg_flag
[sym
->argc
] = 0;
275 sym
->arg_name
[sym
->argc
++] = strdup ("_this");
276 /* New struct definitions can be 'grep'ed out for making a fixup header */
278 printf ("struct %s { void **vtable; /*FIXME: class definition */ };\n", class_name
);
280 case 'C' : /* private: static */
281 case 'D' : /* private: static */
282 case 'K' : /* protected: static */
283 case 'L' : /* protected: static */
284 case 'S' : /* public: static */
285 case 'T' : /* public: static */
286 is_static
= 1; /* No implicit this pointer */
291 /* FIXME: G,H / O,P / W,X are private / protected / public thunks */
293 free (function_name
);
297 /* If there is an implicit this pointer, const status follows */
302 case 'A': break; /* non-const */
303 case 'B': is_const
= CT_CONST
; break;
304 case 'C': is_const
= CT_VOLATILE
; break;
305 case 'D': is_const
= (CT_CONST
| CT_VOLATILE
); break;
307 free (function_name
);
312 /* Next is the calling convention */
315 case 'A': /* __cdecl */
316 case 'B': /* __cdecl __declspec(dllexport) */
319 sym
->flags
|= SYM_CDECL
;
322 /* Else fall through */
323 case 'C': /* __pascal */
324 case 'D': /* __pascal __declspec(dllexport) */
325 case 'E': /* __thiscall */
326 case 'F': /* __thiscall __declspec(dllexport) */
327 case 'G': /* __stdcall */
328 case 'H': /* __stdcall __declspec(dllexport) */
329 case 'I': /* __fastcall */
330 case 'J': /* __fastcall __declspec(dllexport)*/
331 case 'K': /* default (none given) */
333 sym
->flags
|= SYM_THISCALL
;
335 sym
->flags
|= SYM_STDCALL
;
338 free (function_name
);
342 /* Return type, or @ if 'void' */
345 sym
->return_text
= strdup ("void");
346 sym
->return_type
= ARG_VOID
;
352 if (!demangle_datatype (&name
, &ct
, sym
)) {
353 free (function_name
);
356 sym
->return_text
= ct
.expression
;
357 sym
->return_type
= get_type_constant(ct
.dest_type
, ct
.flags
);
358 ct
.expression
= NULL
;
362 /* Now come the function arguments */
363 while (*name
&& *name
!= 'Z')
365 /* Decode each data type and append it to the argument list */
369 if (!demangle_datatype(&name
, &ct
, sym
)) {
370 free (function_name
);
374 if (strcmp (ct
.expression
, "void"))
376 sym
->arg_text
[sym
->argc
] = ct
.expression
;
377 ct
.expression
= NULL
;
378 sym
->arg_type
[sym
->argc
] = get_type_constant (ct
.dest_type
, ct
.flags
);
379 sym
->arg_flag
[sym
->argc
] = ct
.flags
;
380 sym
->arg_name
[sym
->argc
] = str_create_num (1, sym
->argc
, "arg");
384 break; /* 'void' terminates an argument list */
394 /* Functions are always terminated by 'Z'. If we made it this far and
395 * Don't find it, we have incorrectly identified a data type.
398 free (function_name
);
402 /* Note: '()' after 'Z' means 'throws', but we don't care here */
404 /* Create the function name. Include a unique number because otherwise
405 * overloaded functions could have the same c signature.
409 case (CT_CONST
| CT_VOLATILE
): const_status
= "_const_volatile"; break;
410 case CT_CONST
: const_status
= "_const"; break;
411 case CT_VOLATILE
: const_status
= "_volatile"; break;
412 default: const_status
= "_"; break;
414 sym
->function_name
= str_create_num (4, hash
, class_name
, "_",
415 function_name
, is_static
? "_static" : const_status
);
417 assert (sym
->return_text
);
419 assert (sym
->function_name
);
422 free (function_name
);
425 puts ("Demangled symbol OK");
431 /*******************************************************************
434 * Attempt to demangle a C++ data type, which may be compound.
435 * a compound type is made up of a number of simple types. e.g:
436 * char** = (pointer to (pointer to (char)))
438 * Uses a simple recursive descent algorithm that is broken
439 * and/or incomplete, without a doubt ;-)
441 static char *demangle_datatype (char **str
, compound_type
*ct
,
446 assert (str
&& *str
);
451 if (!get_constraints_convention_1 (&iter
, ct
))
456 /* MS type: __int8,__int16 etc */
457 ct
->flags
|= CT_EXTENDED
;
463 case 'C': case 'D': case 'E': case 'F': case 'G':
464 case 'H': case 'I': case 'J': case 'K': case 'M':
465 case 'N': case 'O': case 'X': case 'Z':
466 /* Simple data types */
467 ct
->dest_type
= *iter
++;
468 if (!get_constraints_convention_2 (&iter
, ct
))
470 ct
->expression
= get_type_string (ct
->dest_type
, ct
->flags
);
474 /* Class/struct/union */
475 ct
->dest_type
= *iter
++;
476 if (*iter
== '0' || *iter
== '1')
478 /* Referring to class type (implicit 'this') */
484 /* Apply our constraints to the base type (struct xxx *) */
485 stripped
= strdup (sym
->arg_text
[0]);
487 fatal ("Out of Memory");
489 /* If we're a reference, re-use the pointer already in the type */
490 if (!(ct
->flags
& CT_BY_REFERENCE
))
491 stripped
[ strlen (stripped
) - 2] = '\0'; /* otherwise, strip it */
493 ct
->expression
= str_create (2, ct
->flags
& CT_CONST
? "const " :
494 ct
->flags
& CT_VOLATILE
? "volatile " : "", stripped
);
497 else if (*iter
!= '@')
499 /* The name of the class/struct, followed by '@@' */
500 char *struct_name
= iter
;
501 while (*iter
&& *iter
++ != '@') ;
504 struct_name
= str_substring (struct_name
, iter
- 2);
505 ct
->expression
= str_create (4, ct
->flags
& CT_CONST
? "const " :
506 ct
->flags
& CT_VOLATILE
? "volatile " : "", "struct ",
507 struct_name
, ct
->flags
& CT_BY_REFERENCE
? " *" : "");
511 case 'Q': /* FIXME: Array Just treated as pointer currently */
512 case 'P': /* Pointer */
514 compound_type sub_ct
;
517 ct
->dest_type
= *iter
++;
518 if (!get_constraints_convention_2 (&iter
, ct
))
521 /* FIXME: P6 = Function pointer, others who knows.. */
526 int sub_expressions
= 0;
527 /* FIXME: there are a tons of memory leaks here */
528 /* FIXME: this is still broken in some cases and it has to be
529 * merged with the function prototype parsing above...
531 iter
+= iter
[1] == 'A' ? 2 : 3; /* FIXME */
532 if (!demangle_datatype (&iter
, &sub_ct
, sym
))
534 ct
->expression
= str_create(2, sub_ct
.expression
, " (*)(");
541 if (!demangle_datatype (&iter
, &sub_ct
, sym
))
544 ct
->expression
= str_create(3, ct
->expression
, ", ", sub_ct
.expression
);
546 ct
->expression
= str_create(2, ct
->expression
, sub_ct
.expression
);
547 while (*iter
== '@') iter
++;
550 } else while (*iter
== '@') iter
++;
552 ct
->expression
= str_create(2, ct
->expression
, ")");
559 /* Recurse to get the pointed-to type */
560 if (!demangle_datatype (&iter
, &sub_ct
, sym
))
563 ct
->expression
= get_pointer_type_string (ct
, sub_ct
.expression
);
569 case '0': case '1': case '2': case '3': case '4':
570 case '5': case '6': case '7': case '8': case '9':
571 /* Referring back to previously parsed type */
572 if (sym
->argc
>= (size_t)('0' - *iter
))
574 ct
->dest_type
= sym
->arg_type
['0' - *iter
];
575 ct
->expression
= strdup (sym
->arg_text
['0' - *iter
]);
584 return (char *)(*str
= iter
);
589 * There are two conventions for specifying data type constaints. I
590 * don't know how the compiler chooses between them, but I suspect it
591 * is based on ensuring that linker names are unique.
592 * Convention 1. The data type modifier is given first, followed
593 * by the data type it operates on. '?' means passed by value,
594 * 'A' means passed by reference. Note neither of these characters
595 * is a valid base data type. This is then followed by a character
596 * specifying constness or volatilty.
597 * Convention 2. The base data type (which is never '?' or 'A') is
598 * given first. The character modifier is optionally given after
599 * the base type character. If a valid character mofifier is present,
600 * then it only applies to the current data type if the character
601 * after that is not 'A' 'B' or 'C' (Because this makes a convention 1
602 * constraint for the next data type).
604 * The conventions are usually mixed within the same symbol.
605 * Since 'C' is both a qualifier and a data type, I suspect that
606 * convention 1 allows specifying e.g. 'volatile signed char*'. In
607 * convention 2 this would be 'CC' which is ambigious (i.e. Is it two
608 * pointers, or a single pointer + modifier?). In convention 1 it
609 * is encoded as '?CC' which is not ambigious. This probably
610 * holds true for some other types as well.
613 /*******************************************************************
614 * get_constraints_convention_1
616 * Get type constraint information for a data type
618 static char *get_constraints_convention_1 (char **str
, compound_type
*ct
)
620 char *iter
= *str
, **retval
= str
;
622 if (ct
->have_qualifiers
)
623 return (char *)*str
; /* Previously got constraints for this type */
625 if (*iter
== '?' || *iter
== 'A')
627 ct
->have_qualifiers
= 1;
628 ct
->flags
|= (*iter
++ == '?' ? 0 : CT_BY_REFERENCE
);
633 break; /* non-const, non-volatile */
635 ct
->flags
|= CT_CONST
;
638 ct
->flags
|= CT_VOLATILE
;
645 return (char *)(*retval
= iter
);
649 /*******************************************************************
650 * get_constraints_convention_2
652 * Get type constraint information for a data type
654 static char *get_constraints_convention_2 (char **str
, compound_type
*ct
)
656 char *iter
= *str
, **retval
= str
;
658 /* FIXME: Why do arrays have both convention 1 & 2 constraints? */
659 if (ct
->have_qualifiers
&& ct
->dest_type
!= 'Q')
660 return (char *)*str
; /* Previously got constraints for this type */
662 ct
->have_qualifiers
= 1; /* Even if none, we've got all we're getting */
667 if (iter
[1] != 'A' && iter
[1] != 'B' && iter
[1] != 'C')
671 ct
->flags
|= CT_CONST
;
675 /* See note above, if we find 'C' it is _not_ a signed char */
676 ct
->flags
|= CT_VOLATILE
;
681 return (char *)(*retval
= iter
);
685 /*******************************************************************
688 * Return a string containing the name of a data type
690 static char *get_type_string (const char c
, const int constraints
)
692 const char *type_string
;
694 if (constraints
& CT_EXTENDED
)
698 case 'D': type_string
= "__int8"; break;
699 case 'E': type_string
= "unsigned __int8"; break;
700 case 'F': type_string
= "__int16"; break;
701 case 'G': type_string
= "unsigned __int16"; break;
702 case 'H': type_string
= "__int32"; break;
703 case 'I': type_string
= "unsigned __int32"; break;
704 case 'J': type_string
= "__int64"; break;
705 case 'K': type_string
= "unsigned __int64"; break;
706 case 'L': type_string
= "__int128"; break;
707 case 'M': type_string
= "unsigned __int128"; break;
708 case 'N': type_string
= "int"; break; /* bool */
709 case 'W': type_string
= "WCHAR"; break; /* wchar_t */
718 case 'C': /* Signed char, fall through */
719 case 'D': type_string
= "char"; break;
720 case 'E': type_string
= "unsigned char"; break;
721 case 'F': type_string
= "short int"; break;
722 case 'G': type_string
= "unsigned short int"; break;
723 case 'H': type_string
= "int"; break;
724 case 'I': type_string
= "unsigned int"; break;
725 case 'J': type_string
= "long"; break;
726 case 'K': type_string
= "unsigned long"; break;
727 case 'M': type_string
= "float"; break;
728 case 'N': type_string
= "double"; break;
729 case 'O': type_string
= "long double"; break;
730 /* FIXME: T = union */
732 case 'V': type_string
= "struct"; break;
733 case 'X': return strdup ("void");
734 case 'Z': return strdup ("...");
740 return str_create (3, constraints
& CT_CONST
? "const " :
741 constraints
& CT_VOLATILE
? "volatile " : "", type_string
,
742 constraints
& CT_BY_REFERENCE
? " *" : "");
746 /*******************************************************************
749 * Get the ARG_* constant for this data type
751 static int get_type_constant (const char c
, const int constraints
)
753 /* Any reference type is really a pointer */
754 if (constraints
& CT_BY_REFERENCE
)
759 case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
779 /*******************************************************************
780 * get_pointer_type_string
782 * Return a string containing 'pointer to expression'
784 static char *get_pointer_type_string (compound_type
*ct
,
785 const char *expression
)
787 /* FIXME: set a compound flag for bracketing expression if needed */
788 return str_create (3, ct
->flags
& CT_CONST
? "const " :
789 ct
->flags
& CT_VOLATILE
? "volatile " : "", expression
,
790 ct
->flags
& CT_BY_REFERENCE
? " **" : " *");