2 * Demangle VC++ symbols into C function prototypes
4 * Copyright 2000 Jon Griffiths
8 /* Type for parsing mangled types */
9 typedef struct _compound_type
18 /* Initialise a compound type structure */
19 #define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0)
21 /* free the memory used by a compound structure */
22 #define FREE_CT(ct) do { if (ct.expression) free (ct.expression); } while (0)
25 /* Internal functions */
26 static char *demangle_datatype (char **str
, compound_type
*ct
,
29 static char *get_constraints_convention_1 (char **str
, compound_type
*ct
);
31 static char *get_constraints_convention_2 (char **str
, compound_type
*ct
);
33 static char *get_type_string (const char c
, const int constraints
);
35 static int get_type_constant (const char c
, const int constraints
);
37 static char *get_pointer_type_string (compound_type
*ct
,
38 const char *expression
);
41 /*******************************************************************
44 * Demangle a C++ linker symbol into a C prototype
46 int symbol_demangle (parsed_symbol
*sym
)
49 int is_static
= 0, is_const
= 0;
50 char *function_name
= NULL
;
51 char *class_name
= NULL
;
53 static unsigned int hash
= 0; /* In case of overloaded functions */
55 assert (globals
.do_code
);
56 assert (sym
&& sym
->symbol
);
60 /* MS mangled names always begin with '?' */
66 puts ("Attempting to demangle symbol");
68 /* Then function name or operator code */
71 /* C++ operator code (one character, or two if the first is '_') */
74 case '0': function_name
= strdup ("ctor"); break;
75 case '1': function_name
= strdup ("dtor"); break;
76 case '2': function_name
= strdup ("operator_new"); break;
77 case '3': function_name
= strdup ("operator_delete"); break;
78 case '4': function_name
= strdup ("operator_equals"); break;
79 case '5': function_name
= strdup ("operator_5"); break;
80 case '6': function_name
= strdup ("operator_6"); break;
81 case '7': function_name
= strdup ("operator_7"); break;
82 case '8': function_name
= strdup ("operator_equals_equals"); break;
83 case '9': function_name
= strdup ("operator_not_equals"); break;
84 case 'E': function_name
= strdup ("operator_plus_plus"); break;
85 case 'H': function_name
= strdup ("operator_plus"); break;
87 /* FIXME: Seems to be some kind of escape character - overloads? */
90 case '7': /* FIXME: Compiler generated default copy/assignment ctor? */
92 case 'E': function_name
= strdup ("_unknown_E"); break;
93 case 'G': function_name
= strdup ("_unknown_G"); break;
99 /* FIXME: Other operators */
106 /* Type or function name terminated by '@' */
107 function_name
= name
;
108 while (*name
&& *name
++ != '@') ;
111 function_name
= str_substring (function_name
, name
- 1);
114 /* Either a class name, or '@' if the symbol is not a class member */
117 class_name
= strdup ("global"); /* Non member function (or a datatype) */
122 /* Class the function is associated with, terminated by '@@' */
124 while (*name
&& *name
++ != '@') ;
127 class_name
= str_substring (class_name
, name
- 2);
130 /* Note: This is guesswork on my part, but it seems to work:
131 * 'Q' Means the function is passed an implicit 'this' pointer.
132 * 'S' Means static member function, i.e. no implicit 'this' pointer.
133 * 'Y' Is used for datatypes and functions, so there is no 'this' pointer.
134 * This character also implies some other things:
135 * 'Y','S' = The character after the calling convention is always the
136 * start of the return type code.
137 * 'Q' Character after the calling convention is 'const'ness code
138 * (only non static member functions can be const).
139 * 'U' also occurs, it seems to behave like Q, but probably implies
146 /* Implicit 'this' pointer */
147 sym
->arg_text
[sym
->argc
] = str_create (3, "struct ", class_name
, " *");
148 sym
->arg_type
[sym
->argc
] = ARG_POINTER
;
149 sym
->arg_flag
[sym
->argc
] = 0;
150 sym
->arg_name
[sym
->argc
++] = strdup ("_this");
151 /* New struct definitions can be 'grep'ed out for making a fixup header */
153 printf ("struct %s { int _FIXME; };\n", class_name
);
164 /* Next is the calling convention */
168 sym
->calling_convention
= strdup ("__cdecl");
170 case 'B': /* FIXME: Something to do with __declspec(dllexport)? */
171 case 'I': /* __fastcall */
173 sym
->calling_convention
= strdup ("__stdcall");
179 /* If the symbol is associated with a class, its 'const' status follows */
184 else if (*name
!= 'E')
189 /* Return type, or @ if 'void' */
192 sym
->return_text
= strdup ("void");
193 sym
->return_type
= ARG_VOID
;
199 if (!demangle_datatype (&name
, &ct
, sym
))
201 sym
->return_text
= ct
.expression
;
202 sym
->return_type
= get_type_constant(ct
.dest_type
, ct
.flags
);
203 ct
.expression
= NULL
;
207 /* Now come the function arguments */
208 while (*name
&& *name
!= 'Z')
210 /* Decode each data type and append it to the argument list */
214 if (!demangle_datatype(&name
, &ct
, sym
))
217 if (strcmp (ct
.expression
, "void"))
219 sym
->arg_text
[sym
->argc
] = ct
.expression
;
220 ct
.expression
= NULL
;
221 sym
->arg_type
[sym
->argc
] = get_type_constant (ct
.dest_type
, ct
.flags
);
222 sym
->arg_flag
[sym
->argc
] = ct
.flags
;
223 sym
->arg_name
[sym
->argc
] = str_create_num (1, sym
->argc
, "arg");
227 break; /* 'void' terminates an argument list */
237 /* Functions are always terminated by 'Z'. If we made it this far and
238 * Don't find it, we have incorrectly identified a data type.
243 /* Note: '()' after 'Z' means 'throws', but we don't care here */
245 /* Create the function name. Include a unique number because otherwise
246 * overloaded functions could have the same c signature.
248 sym
->function_name
= str_create_num (4, hash
, class_name
, "_",
249 function_name
, is_static
? "_static" : is_const
? "_const" : "_");
251 assert (sym
->return_text
);
252 assert (sym
->calling_convention
);
253 assert (sym
->function_name
);
256 free (function_name
);
259 puts ("Demangled symbol OK");
265 /*******************************************************************
268 * Attempt to demangle a C++ data type, which may be compound.
269 * a compound type is made up of a number of simple types. e.g:
270 * char** = (pointer to (pointer to (char)))
272 * Uses a simple recursive descent algorithm that is broken
273 * and/or incomplete, without a doubt ;-)
275 static char *demangle_datatype (char **str
, compound_type
*ct
,
280 assert (str
&& *str
);
285 if (!get_constraints_convention_1 (&iter
, ct
))
290 case 'C': case 'D': case 'E': case 'F': case 'G':
291 case 'H': case 'I': case 'J': case 'K': case 'M':
292 case 'N': case 'O': case 'X': case 'Z':
293 /* Simple data types */
294 ct
->dest_type
= *iter
++;
295 if (!get_constraints_convention_2 (&iter
, ct
))
297 ct
->expression
= get_type_string (ct
->dest_type
, ct
->flags
);
301 /* Class/struct/union */
302 ct
->dest_type
= *iter
++;
303 if (*iter
== '0' || *iter
== '1')
305 /* Referring to class type (implicit 'this') */
311 /* Apply our constraints to the base type (struct xxx *) */
312 stripped
= strdup (sym
->arg_text
[0]);
314 fatal ("Out of Memory");
316 /* If we're a reference, re-use the pointer already in the type */
317 if (!ct
->flags
& CT_BY_REFERENCE
)
318 stripped
[ strlen (stripped
) - 2] = '\0'; /* otherwise, strip it */
320 ct
->expression
= str_create (2, ct
->flags
& CT_CONST
? "const " :
321 ct
->flags
& CT_VOLATILE
? "volatile " : "", stripped
);
324 else if (*iter
== '_')
326 /* The name of the class/struct, followed by '@@' */
327 char *struct_name
= ++iter
;
328 while (*iter
&& *iter
++ != '@') ;
331 struct_name
= str_substring (struct_name
, iter
- 2);
332 ct
->expression
= str_create (4, ct
->flags
& CT_CONST
? "const " :
333 ct
->flags
& CT_VOLATILE
? "volatile " : "", "struct ",
334 struct_name
, ct
->flags
& CT_BY_REFERENCE
? " *" : "");
338 case 'Q': /* FIXME: Array Just treated as pointer currently */
339 case 'P': /* Pointer */
341 compound_type sub_ct
;
344 ct
->dest_type
= *iter
++;
345 if (!get_constraints_convention_2 (&iter
, ct
))
348 /* FIXME: P6 = Function pointer, others who knows.. */
352 /* Recurse to get the pointed-to type */
353 if (!demangle_datatype (&iter
, &sub_ct
, sym
))
356 ct
->expression
= get_pointer_type_string (ct
, sub_ct
.expression
);
361 case '0': case '1': case '2': case '3': case '4':
362 case '5': case '6': case '7': case '8': case '9':
363 /* Referring back to previously parsed type */
364 if (sym
->argc
>= (size_t)('0' - *iter
))
366 ct
->dest_type
= sym
->arg_type
['0' - *iter
];
367 ct
->expression
= strdup (sym
->arg_text
['0' - *iter
]);
376 return (char *)(*str
= iter
);
381 * There are two conventions for specifying data type constaints. I
382 * don't know how the compiler chooses between them, but I suspect it
383 * is based on ensuring that linker names are unique.
384 * Convention 1. The data type modifier is given first, followed
385 * by the data type it operates on. '?' means passed by value,
386 * 'A' means passed by reference. Note neither of these characters
387 * is a valid base data type. This is then followed by a character
388 * specifying constness or volatilty.
389 * Convention 2. The base data type (which is never '?' or 'A') is
390 * given first. The character modifier is optionally given after
391 * the base type character. If a valid character mofifier is present,
392 * then it only applies to the current data type if the character
393 * after that is not 'A' 'B' or 'C' (Because this makes a convention 1
394 * constraint for the next data type).
396 * The conventions are usually mixed within the same symbol.
397 * Since 'C' is both a qualifier and a data type, I suspect that
398 * convention 1 allows specifying e.g. 'volatile signed char*'. In
399 * convention 2 this would be 'CC' which is ambigious (i.e. Is it two
400 * pointers, or a single pointer + modifier?). In convention 1 it
401 * is encoded as '?CC' which is not ambigious. This probably
402 * holds true for some other types as well.
405 /*******************************************************************
406 * get_constraints_convention_1
408 * Get type constraint information for a data type
410 static char *get_constraints_convention_1 (char **str
, compound_type
*ct
)
412 char *iter
= *str
, **retval
= str
;
414 if (ct
->have_qualifiers
)
415 return (char *)*str
; /* Previously got constraints for this type */
417 if (*iter
== '?' || *iter
== 'A')
419 ct
->have_qualifiers
= 1;
420 ct
->flags
|= (*iter
++ == '?' ? 0 : CT_BY_REFERENCE
);
425 break; /* non-const, non-volatile */
427 ct
->flags
|= CT_CONST
;
430 ct
->flags
|= CT_VOLATILE
;
437 return (char *)(*retval
= iter
);
441 /*******************************************************************
442 * get_constraints_convention_2
444 * Get type constraint information for a data type
446 static char *get_constraints_convention_2 (char **str
, compound_type
*ct
)
448 char *iter
= *str
, **retval
= str
;
450 /* FIXME: Why do arrays have both convention 1 & 2 constraints? */
451 if (ct
->have_qualifiers
&& ct
->dest_type
!= 'Q')
452 return (char *)*str
; /* Previously got constraints for this type */
454 ct
->have_qualifiers
= 1; /* Even if none, we've got all we're getting */
459 if (iter
[1] != 'A' && iter
[1] != 'B' && iter
[1] != 'C')
463 ct
->flags
|= CT_CONST
;
467 /* See note above, if we find 'C' it is _not_ a signed char */
468 ct
->flags
|= CT_VOLATILE
;
473 return (char *)(*retval
= iter
);
477 /*******************************************************************
480 * Return a string containing the name of a data type
482 static char *get_type_string (const char c
, const int constraints
)
488 case 'C': /* Signed char, fall through */
489 case 'D': type_string
= "char"; break;
490 case 'E': type_string
= "unsigned char"; break;
491 case 'F': type_string
= "short int"; break;
492 case 'G': type_string
= "unsigned short int"; break;
493 case 'H': type_string
= "int"; break;
494 case 'I': type_string
= "unsigned int"; break;
495 case 'J': type_string
= "long"; break;
496 case 'K': type_string
= "unsigned long"; break;
497 case 'M': type_string
= "float"; break;
498 case 'N': type_string
= "double"; break;
499 case 'O': type_string
= "long double"; break;
501 case 'V': type_string
= "struct"; break;
502 case 'X': return strdup ("void");
503 case 'Z': return strdup ("...");
508 return str_create (3, constraints
& CT_CONST
? "const " :
509 constraints
& CT_VOLATILE
? "volatile " : "", type_string
,
510 constraints
& CT_BY_REFERENCE
? " *" : "");
514 /*******************************************************************
517 * Get the ARG_* constant for this data type
519 static int get_type_constant (const char c
, const int constraints
)
521 /* Any reference type is really a pointer */
522 if (constraints
& CT_BY_REFERENCE
)
527 case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
531 return -1; /* FIXME */
547 /*******************************************************************
548 * get_pointer_type_string
550 * Return a string containing 'pointer to expression'
552 static char *get_pointer_type_string (compound_type
*ct
,
553 const char *expression
)
555 /* FIXME: set a compound flag for bracketing expression if needed */
556 return str_create (3, ct
->flags
& CT_CONST
? "const " :
557 ct
->flags
& CT_VOLATILE
? "volatile " : "", expression
,
558 ct
->flags
& CT_BY_REFERENCE
? " **" : " *");