Drop SYSLEVEL checks from relay debugging, since they break debugging
[wine/hacks.git] / tools / specmaker / msmangle.c
blobd5f3f6b7bdec3e8a1e1008f6274de248744823f4
1 /*
2 * Demangle VC++ symbols into C function prototypes
4 * Copyright 2000 Jon Griffiths
5 */
6 #include "specmaker.h"
8 /* Type for parsing mangled types */
9 typedef struct _compound_type
11 char dest_type;
12 int flags;
13 int have_qualifiers;
14 char *expression;
15 } compound_type;
18 /* Initialise a compound type structure */
19 #define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0)
21 /* free the memory used by a compound structure */
22 #define FREE_CT(ct) do { if (ct.expression) free (ct.expression); } while (0)
24 /* Flags for data types */
25 #define DATA_VTABLE 0x1
27 /* Internal functions */
28 static char *demangle_datatype (char **str, compound_type *ct,
29 parsed_symbol* sym);
31 static char *get_constraints_convention_1 (char **str, compound_type *ct);
33 static char *get_constraints_convention_2 (char **str, compound_type *ct);
35 static char *get_type_string (const char c, const int constraints);
37 static int get_type_constant (const char c, const int constraints);
39 static char *get_pointer_type_string (compound_type *ct,
40 const char *expression);
43 /*******************************************************************
44 * demangle_symbol
46 * Demangle a C++ linker symbol into a C prototype
48 int symbol_demangle (parsed_symbol *sym)
50 compound_type ct;
51 int is_static = 0, is_const = 0;
52 char *function_name = NULL;
53 char *class_name = NULL;
54 char *name, *const_status;
55 static unsigned int hash = 0; /* In case of overloaded functions */
56 unsigned int data_flags = 0;
58 assert (globals.do_code);
59 assert (sym && sym->symbol);
61 hash++;
63 /* MS mangled names always begin with '?' */
64 name = sym->symbol;
65 if (*name++ != '?')
66 return -1;
68 if (VERBOSE)
69 puts ("Attempting to demangle symbol");
71 /* Then function name or operator code */
72 if (*name == '?')
74 /* C++ operator code (one character, or two if the first is '_') */
75 switch (*++name)
77 case '0': function_name = strdup ("ctor"); break;
78 case '1': function_name = strdup ("dtor"); break;
79 case '2': function_name = strdup ("operator_new"); break;
80 case '3': function_name = strdup ("operator_delete"); break;
81 case '4': function_name = strdup ("operator_equals"); break;
82 case '5': function_name = strdup ("operator_shiftright"); break;
83 case '6': function_name = strdup ("operator_shiftleft"); break;
84 case '7': function_name = strdup ("operator_not"); break;
85 case '8': function_name = strdup ("operator_equalsequals"); break;
86 case '9': function_name = strdup ("operator_notequals"); break;
87 case 'A': function_name = strdup ("operator_array"); break;
88 case 'C': function_name = strdup ("operator_dereference"); break;
89 case 'D': function_name = strdup ("operator_multiply"); break;
90 case 'E': function_name = strdup ("operator_plusplus"); break;
91 case 'F': function_name = strdup ("operator_minusminus"); break;
92 case 'G': function_name = strdup ("operator_minus"); break;
93 case 'H': function_name = strdup ("operator_plus"); break;
94 case 'I': function_name = strdup ("operator_address"); break;
95 case 'J': function_name = strdup ("operator_dereferencememberptr"); break;
96 case 'K': function_name = strdup ("operator_divide"); break;
97 case 'L': function_name = strdup ("operator_modulo"); break;
98 case 'M': function_name = strdup ("operator_lessthan"); break;
99 case 'N': function_name = strdup ("operator_lessthanequal"); break;
100 case 'O': function_name = strdup ("operator_greaterthan"); break;
101 case 'P': function_name = strdup ("operator_greaterthanequal"); break;
102 case 'Q': function_name = strdup ("operator_comma"); break;
103 case 'R': function_name = strdup ("operator_functioncall"); break;
104 case 'S': function_name = strdup ("operator_compliment"); break;
105 case 'T': function_name = strdup ("operator_xor"); break;
106 case 'U': function_name = strdup ("operator_logicalor"); break;
107 case 'V': function_name = strdup ("operator_logicaland"); break;
108 case 'W': function_name = strdup ("operator_or"); break;
109 case 'X': function_name = strdup ("operator_multiplyequals"); break;
110 case 'Y': function_name = strdup ("operator_plusequals"); break;
111 case 'Z': function_name = strdup ("operator_minusequals"); break;
112 case '_':
113 switch (*++name)
115 case '0': function_name = strdup ("operator_divideequals"); break;
116 case '1': function_name = strdup ("operator_moduloequals"); break;
117 case '2': function_name = strdup ("operator_shiftrightequals"); break;
118 case '3': function_name = strdup ("operator_shiftleftequals"); break;
119 case '4': function_name = strdup ("operator_andequals"); break;
120 case '5': function_name = strdup ("operator_orequals"); break;
121 case '6': function_name = strdup ("operator_xorequals"); break;
122 case '7': function_name = strdup ("vftable"); data_flags = DATA_VTABLE; break;
123 case '8': function_name = strdup ("vbtable"); data_flags = DATA_VTABLE; break;
124 case '9': function_name = strdup ("vcall"); data_flags = DATA_VTABLE; break;
125 case 'A': function_name = strdup ("typeof"); data_flags = DATA_VTABLE; break;
126 case 'B': function_name = strdup ("local_static_guard"); data_flags = DATA_VTABLE; break;
127 case 'C': function_name = strdup ("string"); data_flags = DATA_VTABLE; break;
128 case 'D': function_name = strdup ("vbase_dtor"); data_flags = DATA_VTABLE; break;
129 case 'E': function_name = strdup ("vector_dtor"); break;
130 case 'G': function_name = strdup ("scalar_dtor"); break;
131 case 'H': function_name = strdup ("vector_ctor_iter"); break;
132 case 'I': function_name = strdup ("vector_dtor_iter"); break;
133 case 'J': function_name = strdup ("vector_vbase_ctor_iter"); break;
134 case 'L': function_name = strdup ("eh_vector_ctor_iter"); break;
135 case 'M': function_name = strdup ("eh_vector_dtor_iter"); break;
136 case 'N': function_name = strdup ("eh_vector_vbase_ctor_iter"); break;
137 case 'O': function_name = strdup ("copy_ctor_closure"); break;
138 case 'S': function_name = strdup ("local_vftable"); data_flags = DATA_VTABLE; break;
139 case 'T': function_name = strdup ("local_vftable_ctor_closure"); break;
140 case 'U': function_name = strdup ("operator_new_vector"); break;
141 case 'V': function_name = strdup ("operator_delete_vector"); break;
142 case 'X': function_name = strdup ("placement_new_closure"); break;
143 case 'Y': function_name = strdup ("placement_delete_closure"); break;
144 default:
145 return -1;
147 break;
148 default:
149 /* FIXME: Other operators */
150 return -1;
152 name++;
154 else
156 /* Type or function name terminated by '@' */
157 function_name = name;
158 while (*name && *name++ != '@') ;
159 if (!*name)
160 return -1;
161 function_name = str_substring (function_name, name - 1);
164 /* Either a class name, or '@' if the symbol is not a class member */
165 if (*name == '@')
167 class_name = strdup ("global"); /* Non member function (or a datatype) */
168 name++;
170 else
172 /* Class the function is associated with, terminated by '@@' */
173 class_name = name;
174 while (*name && *name++ != '@') ;
175 if (*name++ != '@')
176 return -1;
177 class_name = str_substring (class_name, name - 2);
180 /* Function/Data type and access level */
181 /* FIXME: why 2 possible letters for each option? */
182 switch(*name++)
184 /* Data */
186 case '0' : /* private static */
187 case '1' : /* protected static */
188 case '2' : /* public static */
189 is_static = 1;
190 /* Fall through */
191 case '3' : /* non static */
192 case '4' : /* non static */
193 /* Data members need to be implemented: report */
194 INIT_CT (ct);
195 if (!demangle_datatype (&name, &ct, sym))
197 if (VERBOSE)
198 printf ("/*FIXME: %s: unknown data*/\n", sym->symbol);
199 return -1;
201 sym->flags |= SYM_DATA;
202 sym->argc = 1;
203 sym->arg_name[0] = str_create (5, OUTPUT_UC_DLL_NAME, "_", class_name,
204 is_static ? "static_" : "_", function_name);
205 sym->arg_text[0] = str_create (3, ct.expression, " ", sym->arg_name[0]);
206 FREE_CT (ct);
207 return 0;
208 break;
210 case '6' : /* compiler generated static */
211 case '7' : /* compiler generated static */
212 if (data_flags & DATA_VTABLE)
214 sym->flags |= SYM_DATA;
215 sym->argc = 1;
216 sym->arg_name[0] = str_create (5, OUTPUT_UC_DLL_NAME, "_", class_name,
217 "_", function_name);
218 sym->arg_text[0] = str_create (2, "void *", sym->arg_name[0]);
220 if (VERBOSE)
221 puts ("Demangled symbol OK [vtable]");
222 return 0;
224 return -1;
225 break;
227 /* Functions */
229 case 'E' : /* private virtual */
230 case 'F' : /* private virtual */
231 case 'M' : /* protected virtual */
232 case 'N' : /* protected virtual */
233 case 'U' : /* public virtual */
234 case 'V' : /* public virtual */
235 /* Virtual functions need to be added to the exported vtable: report */
236 if (VERBOSE)
237 printf ("/*FIXME %s: %s::%s is virtual-add to vftable*/\n", sym->symbol,
238 class_name, function_name);
239 /* Fall through */
240 case 'A' : /* private */
241 case 'B' : /* private */
242 case 'I' : /* protected */
243 case 'J' : /* protected */
244 case 'Q' : /* public */
245 case 'R' : /* public */
246 /* Implicit 'this' pointer */
247 sym->arg_text [sym->argc] = str_create (3, "struct ", class_name, " *");
248 sym->arg_type [sym->argc] = ARG_POINTER;
249 sym->arg_flag [sym->argc] = 0;
250 sym->arg_name [sym->argc++] = strdup ("_this");
251 /* New struct definitions can be 'grep'ed out for making a fixup header */
252 if (VERBOSE)
253 printf ("struct %s { void **vtable; /*FIXME: class definition */ };\n", class_name);
254 break;
255 case 'C' : /* private: static */
256 case 'D' : /* private: static */
257 case 'K' : /* protected: static */
258 case 'L' : /* protected: static */
259 case 'S' : /* public: static */
260 case 'T' : /* public: static */
261 is_static = 1; /* No implicit this pointer */
262 break;
263 case 'Y' :
264 case 'Z' :
265 break;
266 /* FIXME: G,H / O,P / W,X are private / protected / public thunks */
267 default:
268 return -1;
271 /* If there is an implicit this pointer, const status follows */
272 if (sym->argc)
274 switch (*name++)
276 case 'A': break; /* non-const */
277 case 'B': is_const = CT_CONST; break;
278 case 'C': is_const = CT_VOLATILE; break;
279 case 'D': is_const = (CT_CONST | CT_VOLATILE); break;
280 default:
281 return -1;
285 /* Next is the calling convention */
286 switch (*name++)
288 case 'A': /* __cdecl */
289 case 'B': /* __cdecl __declspec(dllexport) */
290 if (!sym->argc)
292 sym->flags |= SYM_CDECL;
293 break;
295 /* Else fall through */
296 case 'C': /* __pascal */
297 case 'D': /* __pascal __declspec(dllexport) */
298 case 'E': /* __thiscall */
299 case 'F': /* __thiscall __declspec(dllexport) */
300 case 'G': /* __stdcall */
301 case 'H': /* __stdcall __declspec(dllexport) */
302 case 'I': /* __fastcall */
303 case 'J': /* __fastcall __declspec(dllexport)*/
304 case 'K': /* default (none given) */
305 if (sym->argc)
306 sym->flags |= SYM_THISCALL;
307 else
308 sym->flags |= SYM_STDCALL;
309 break;
310 default:
311 return -1;
314 /* Return type, or @ if 'void' */
315 if (*name == '@')
317 sym->return_text = strdup ("void");
318 sym->return_type = ARG_VOID;
319 name++;
321 else
323 INIT_CT (ct);
324 if (!demangle_datatype (&name, &ct, sym))
325 return -1;
326 sym->return_text = ct.expression;
327 sym->return_type = get_type_constant(ct.dest_type, ct.flags);
328 ct.expression = NULL;
329 FREE_CT (ct);
332 /* Now come the function arguments */
333 while (*name && *name != 'Z')
335 /* Decode each data type and append it to the argument list */
336 if (*name != '@')
338 INIT_CT (ct);
339 if (!demangle_datatype(&name, &ct, sym))
340 return -1;
342 if (strcmp (ct.expression, "void"))
344 sym->arg_text [sym->argc] = ct.expression;
345 ct.expression = NULL;
346 sym->arg_type [sym->argc] = get_type_constant (ct.dest_type, ct.flags);
347 sym->arg_flag [sym->argc] = ct.flags;
348 sym->arg_name[sym->argc] = str_create_num (1, sym->argc, "arg");
349 sym->argc++;
351 else
352 break; /* 'void' terminates an argument list */
353 FREE_CT (ct);
355 else
356 name++;
359 while (*name == '@')
360 name++;
362 /* Functions are always terminated by 'Z'. If we made it this far and
363 * Don't find it, we have incorrectly identified a data type.
365 if (*name != 'Z')
366 return -1;
368 /* Note: '()' after 'Z' means 'throws', but we don't care here */
370 /* Create the function name. Include a unique number because otherwise
371 * overloaded functions could have the same c signature.
373 switch (is_const)
375 case (CT_CONST | CT_VOLATILE): const_status = "_const_volatile"; break;
376 case CT_CONST: const_status = "_const"; break;
377 case CT_VOLATILE: const_status = "_volatile"; break;
378 default: const_status = "_"; break;
380 sym->function_name = str_create_num (4, hash, class_name, "_",
381 function_name, is_static ? "_static" : const_status);
383 assert (sym->return_text);
384 assert (sym->flags);
385 assert (sym->function_name);
387 free (class_name);
388 free (function_name);
390 if (VERBOSE)
391 puts ("Demangled symbol OK");
393 return 0;
397 /*******************************************************************
398 * demangle_datatype
400 * Attempt to demangle a C++ data type, which may be compound.
401 * a compound type is made up of a number of simple types. e.g:
402 * char** = (pointer to (pointer to (char)))
404 * Uses a simple recursive descent algorithm that is broken
405 * and/or incomplete, without a doubt ;-)
407 static char *demangle_datatype (char **str, compound_type *ct,
408 parsed_symbol* sym)
410 char *iter;
412 assert (str && *str);
413 assert (ct);
415 iter = *str;
417 if (!get_constraints_convention_1 (&iter, ct))
418 return NULL;
420 if (*iter == '_')
422 /* MS type: __int8,__int16 etc */
423 ct->flags |= CT_EXTENDED;
424 iter++;
427 switch (*iter)
429 case 'C': case 'D': case 'E': case 'F': case 'G':
430 case 'H': case 'I': case 'J': case 'K': case 'M':
431 case 'N': case 'O': case 'X': case 'Z':
432 /* Simple data types */
433 ct->dest_type = *iter++;
434 if (!get_constraints_convention_2 (&iter, ct))
435 return NULL;
436 ct->expression = get_type_string (ct->dest_type, ct->flags);
437 break;
438 case 'U':
439 case 'V':
440 /* Class/struct/union */
441 ct->dest_type = *iter++;
442 if (*iter == '0' || *iter == '1')
444 /* Referring to class type (implicit 'this') */
445 char *stripped;
446 if (!sym->argc)
447 return NULL;
449 iter++;
450 /* Apply our constraints to the base type (struct xxx *) */
451 stripped = strdup (sym->arg_text [0]);
452 if (!stripped)
453 fatal ("Out of Memory");
455 /* If we're a reference, re-use the pointer already in the type */
456 if (!ct->flags & CT_BY_REFERENCE)
457 stripped[ strlen (stripped) - 2] = '\0'; /* otherwise, strip it */
459 ct->expression = str_create (2, ct->flags & CT_CONST ? "const " :
460 ct->flags & CT_VOLATILE ? "volatile " : "", stripped);
461 free (stripped);
463 else if (*iter != '@')
465 /* The name of the class/struct, followed by '@@' */
466 char *struct_name = iter;
467 while (*iter && *iter++ != '@') ;
468 if (*iter++ != '@')
469 return NULL;
470 struct_name = str_substring (struct_name, iter - 2);
471 ct->expression = str_create (4, ct->flags & CT_CONST ? "const " :
472 ct->flags & CT_VOLATILE ? "volatile " : "", "struct ",
473 struct_name, ct->flags & CT_BY_REFERENCE ? " *" : "");
474 free (struct_name);
476 break;
477 case 'Q': /* FIXME: Array Just treated as pointer currently */
478 case 'P': /* Pointer */
480 compound_type sub_ct;
481 INIT_CT (sub_ct);
483 ct->dest_type = *iter++;
484 if (!get_constraints_convention_2 (&iter, ct))
485 return NULL;
487 /* FIXME: P6 = Function pointer, others who knows.. */
488 if (isdigit (*iter))
489 return NULL;
491 /* Recurse to get the pointed-to type */
492 if (!demangle_datatype (&iter, &sub_ct, sym))
493 return NULL;
495 ct->expression = get_pointer_type_string (ct, sub_ct.expression);
497 FREE_CT (sub_ct);
499 break;
500 case '0': case '1': case '2': case '3': case '4':
501 case '5': case '6': case '7': case '8': case '9':
502 /* Referring back to previously parsed type */
503 if (sym->argc >= (size_t)('0' - *iter))
504 return NULL;
505 ct->dest_type = sym->arg_type ['0' - *iter];
506 ct->expression = strdup (sym->arg_text ['0' - *iter]);
507 iter++;
508 break;
509 default :
510 return NULL;
512 if (!ct->expression)
513 return NULL;
515 return (char *)(*str = iter);
519 /* Constraints:
520 * There are two conventions for specifying data type constaints. I
521 * don't know how the compiler chooses between them, but I suspect it
522 * is based on ensuring that linker names are unique.
523 * Convention 1. The data type modifier is given first, followed
524 * by the data type it operates on. '?' means passed by value,
525 * 'A' means passed by reference. Note neither of these characters
526 * is a valid base data type. This is then followed by a character
527 * specifying constness or volatilty.
528 * Convention 2. The base data type (which is never '?' or 'A') is
529 * given first. The character modifier is optionally given after
530 * the base type character. If a valid character mofifier is present,
531 * then it only applies to the current data type if the character
532 * after that is not 'A' 'B' or 'C' (Because this makes a convention 1
533 * constraint for the next data type).
535 * The conventions are usually mixed within the same symbol.
536 * Since 'C' is both a qualifier and a data type, I suspect that
537 * convention 1 allows specifying e.g. 'volatile signed char*'. In
538 * convention 2 this would be 'CC' which is ambigious (i.e. Is it two
539 * pointers, or a single pointer + modifier?). In convention 1 it
540 * is encoded as '?CC' which is not ambigious. This probably
541 * holds true for some other types as well.
544 /*******************************************************************
545 * get_constraints_convention_1
547 * Get type constraint information for a data type
549 static char *get_constraints_convention_1 (char **str, compound_type *ct)
551 char *iter = *str, **retval = str;
553 if (ct->have_qualifiers)
554 return (char *)*str; /* Previously got constraints for this type */
556 if (*iter == '?' || *iter == 'A')
558 ct->have_qualifiers = 1;
559 ct->flags |= (*iter++ == '?' ? 0 : CT_BY_REFERENCE);
561 switch (*iter++)
563 case 'A' :
564 break; /* non-const, non-volatile */
565 case 'B' :
566 ct->flags |= CT_CONST;
567 break;
568 case 'C' :
569 ct->flags |= CT_VOLATILE;
570 break;
571 default :
572 return NULL;
576 return (char *)(*retval = iter);
580 /*******************************************************************
581 * get_constraints_convention_2
583 * Get type constraint information for a data type
585 static char *get_constraints_convention_2 (char **str, compound_type *ct)
587 char *iter = *str, **retval = str;
589 /* FIXME: Why do arrays have both convention 1 & 2 constraints? */
590 if (ct->have_qualifiers && ct->dest_type != 'Q')
591 return (char *)*str; /* Previously got constraints for this type */
593 ct->have_qualifiers = 1; /* Even if none, we've got all we're getting */
595 switch (*iter)
597 case 'A' :
598 if (iter[1] != 'A' && iter[1] != 'B' && iter[1] != 'C')
599 iter++;
600 break;
601 case 'B' :
602 ct->flags |= CT_CONST;
603 iter++;
604 break;
605 case 'C' :
606 /* See note above, if we find 'C' it is _not_ a signed char */
607 ct->flags |= CT_VOLATILE;
608 iter++;
609 break;
612 return (char *)(*retval = iter);
616 /*******************************************************************
617 * get_type_string
619 * Return a string containing the name of a data type
621 static char *get_type_string (const char c, const int constraints)
623 char *type_string;
625 if (constraints & CT_EXTENDED)
627 switch (c)
629 case 'D': type_string = "__int8"; break;
630 case 'E': type_string = "__uint8"; break;
631 case 'F': type_string = "__int16"; break;
632 case 'G': type_string = "__uint16"; break;
633 case 'H': type_string = "__int32"; break;
634 case 'I': type_string = "__uint32"; break;
635 case 'J': type_string = "__int64"; break;
636 case 'K': type_string = "__uint64"; break;
637 case 'L': type_string = "__int128"; break;
638 case 'M': type_string = "__uint128"; break;
639 case 'N': type_string = "int"; break; /* bool */
640 case 'W': type_string = "WCHAR"; break; /* wchar_t */
641 default:
642 return NULL;
645 else
647 switch (c)
649 case 'C': /* Signed char, fall through */
650 case 'D': type_string = "char"; break;
651 case 'E': type_string = "unsigned char"; break;
652 case 'F': type_string = "short int"; break;
653 case 'G': type_string = "unsigned short int"; break;
654 case 'H': type_string = "int"; break;
655 case 'I': type_string = "unsigned int"; break;
656 case 'J': type_string = "long"; break;
657 case 'K': type_string = "unsigned long"; break;
658 case 'M': type_string = "float"; break;
659 case 'N': type_string = "double"; break;
660 case 'O': type_string = "long double"; break;
661 /* FIXME: T = union */
662 case 'U':
663 case 'V': type_string = "struct"; break;
664 case 'X': return strdup ("void");
665 case 'Z': return strdup ("...");
666 default:
667 return NULL;
671 return str_create (3, constraints & CT_CONST ? "const " :
672 constraints & CT_VOLATILE ? "volatile " : "", type_string,
673 constraints & CT_BY_REFERENCE ? " *" : "");
677 /*******************************************************************
678 * get_type_constant
680 * Get the ARG_* constant for this data type
682 static int get_type_constant (const char c, const int constraints)
684 /* Any reference type is really a pointer */
685 if (constraints & CT_BY_REFERENCE)
686 return ARG_POINTER;
688 switch (c)
690 case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
691 case 'J': case 'K':
692 return ARG_LONG;
693 case 'M':
694 return -1; /* FIXME */
695 case 'N': case 'O':
696 return ARG_DOUBLE;
697 case 'P': case 'Q':
698 return ARG_POINTER;
699 case 'U': case 'V':
700 return ARG_STRUCT;
701 case 'X':
702 return ARG_VOID;
703 case 'Z':
704 default:
705 return -1;
710 /*******************************************************************
711 * get_pointer_type_string
713 * Return a string containing 'pointer to expression'
715 static char *get_pointer_type_string (compound_type *ct,
716 const char *expression)
718 /* FIXME: set a compound flag for bracketing expression if needed */
719 return str_create (3, ct->flags & CT_CONST ? "const " :
720 ct->flags & CT_VOLATILE ? "volatile " : "", expression,
721 ct->flags & CT_BY_REFERENCE ? " **" : " *");