Fixed WM_GETTEXTLENGTH handling.
[wine/multimedia.git] / tools / specmaker / msmangle.c
bloba418efcd83c07c94e897f69fbece8486878b5161
1 /*
2 * Demangle VC++ symbols into C function prototypes
4 * Copyright 2000 Jon Griffiths
5 */
6 #include "specmaker.h"
8 /* Type for parsing mangled types */
9 typedef struct _compound_type
11 char dest_type;
12 int flags;
13 int have_qualifiers;
14 char *expression;
15 } compound_type;
18 /* Initialise a compound type structure */
19 #define INIT_CT(ct) do { memset (&ct, 0, sizeof (ct)); } while (0)
21 /* free the memory used by a compound structure */
22 #define FREE_CT(ct) do { if (ct.expression) free (ct.expression); } while (0)
25 /* Internal functions */
26 static char *demangle_datatype (char **str, compound_type *ct,
27 parsed_symbol* sym);
29 static char *get_constraints_convention_1 (char **str, compound_type *ct);
31 static char *get_constraints_convention_2 (char **str, compound_type *ct);
33 static char *get_type_string (const char c, const int constraints);
35 static int get_type_constant (const char c, const int constraints);
37 static char *get_pointer_type_string (compound_type *ct,
38 const char *expression);
41 /*******************************************************************
42 * demangle_symbol
44 * Demangle a C++ linker symbol into a C prototype
46 int symbol_demangle (parsed_symbol *sym)
48 compound_type ct;
49 int is_static = 0, is_const = 0;
50 char *function_name = NULL;
51 char *class_name = NULL;
52 char *name;
53 static unsigned int hash = 0; /* In case of overloaded functions */
55 assert (globals.do_code);
56 assert (sym && sym->symbol);
58 hash++;
60 /* MS mangled names always begin with '?' */
61 name = sym->symbol;
62 if (*name++ != '?')
63 return -1;
65 if (VERBOSE)
66 puts ("Attempting to demangle symbol");
68 /* Then function name or operator code */
69 if (*name == '?')
71 /* C++ operator code (one character, or two if the first is '_') */
72 switch (*++name)
74 case '0': function_name = strdup ("ctor"); break;
75 case '1': function_name = strdup ("dtor"); break;
76 case '2': function_name = strdup ("operator_new"); break;
77 case '3': function_name = strdup ("operator_delete"); break;
78 case '4': function_name = strdup ("operator_equals"); break;
79 case '5': function_name = strdup ("operator_shiftright"); break;
80 case '6': function_name = strdup ("operator_shiftleft"); break;
81 case '7': function_name = strdup ("operator_not"); break;
82 case '8': function_name = strdup ("operator_equalsequals"); break;
83 case '9': function_name = strdup ("operator_notequals"); break;
84 case 'A': function_name = strdup ("operator_array"); break;
85 case 'C': function_name = strdup ("operator_dereference"); break;
86 case 'D': function_name = strdup ("operator_multiply"); break;
87 case 'E': function_name = strdup ("operator_plusplus"); break;
88 case 'F': function_name = strdup ("operator_minusminus"); break;
89 case 'G': function_name = strdup ("operator_minus"); break;
90 case 'H': function_name = strdup ("operator_plus"); break;
91 case 'I': function_name = strdup ("operator_address"); break;
92 case 'J': function_name = strdup ("operator_dereferencememberptr"); break;
93 case 'K': function_name = strdup ("operator_divide"); break;
94 case 'L': function_name = strdup ("operator_modulo"); break;
95 case 'M': function_name = strdup ("operator_lessthan"); break;
96 case 'N': function_name = strdup ("operator_lessthanequal"); break;
97 case 'O': function_name = strdup ("operator_greaterthan"); break;
98 case 'P': function_name = strdup ("operator_greaterthanequal"); break;
99 case 'R': function_name = strdup ("operator_functioncall"); break;
100 case 'S': function_name = strdup ("operator_compliment"); break;
101 case 'T': function_name = strdup ("operator_xor"); break;
102 case 'U': function_name = strdup ("operator_logicalor"); break;
103 case 'V': function_name = strdup ("operator_logicaland"); break;
104 case 'W': function_name = strdup ("operator_or"); break;
105 case 'X': function_name = strdup ("operator_multiplyequals"); break;
106 case 'Y': function_name = strdup ("operator_plusequals"); break;
107 case 'Z': function_name = strdup ("operator_minusequals"); break;
108 case '_':
109 switch (*++name)
111 case '0': function_name = strdup ("operator_divideequals"); break;
112 case '1': function_name = strdup ("operator_moduloequals"); break;
113 case '2': function_name = strdup ("operator_shiftrightequals"); break;
114 case '3': function_name = strdup ("operator_shiftleftequals"); break;
115 case '4': function_name = strdup ("operator_andequals"); break;
116 case '5': function_name = strdup ("operator_orequals"); break;
117 case '6': function_name = strdup ("operator_xorequals"); break;
118 /* FIXME: These look like static vtable/rtti information ? */
119 case 'E': function_name = strdup ("_unknown_E"); break;
120 case 'G': function_name = strdup ("_unknown_G"); break;
121 default:
122 return -1;
124 break;
125 default:
126 /* FIXME: Other operators */
127 return -1;
129 name++;
131 else
133 /* Type or function name terminated by '@' */
134 function_name = name;
135 while (*name && *name++ != '@') ;
136 if (!*name)
137 return -1;
138 function_name = str_substring (function_name, name - 1);
141 /* Either a class name, or '@' if the symbol is not a class member */
142 if (*name == '@')
144 class_name = strdup ("global"); /* Non member function (or a datatype) */
145 name++;
147 else
149 /* Class the function is associated with, terminated by '@@' */
150 class_name = name;
151 while (*name && *name++ != '@') ;
152 if (*name++ != '@')
153 return -1;
154 class_name = str_substring (class_name, name - 2);
157 /* Note: This is guesswork on my part, but it seems to work:
158 * 'Q' Means the function is passed an implicit 'this' pointer.
159 * 'S' Means static member function, i.e. no implicit 'this' pointer.
160 * 'Y' Is used for datatypes and functions, so there is no 'this' pointer.
161 * This character also implies some other things:
162 * 'Y','S' = The character after the calling convention is always the
163 * start of the return type code.
164 * 'Q' Character after the calling convention is 'const'ness code
165 * (only non static member functions can be const).
166 * 'U' also occurs, it seems to behave like Q, but probably implies
167 * something else.
169 switch(*name++)
171 case 'U' :
172 case 'Q' :
173 /* Implicit 'this' pointer */
174 sym->arg_text [sym->argc] = str_create (3, "struct ", class_name, " *");
175 sym->arg_type [sym->argc] = ARG_POINTER;
176 sym->arg_flag [sym->argc] = 0;
177 sym->arg_name [sym->argc++] = strdup ("_this");
178 /* New struct definitions can be 'grep'ed out for making a fixup header */
179 if (VERBOSE)
180 printf ("struct %s { int _FIXME; };\n", class_name);
181 break;
182 case 'S' :
183 is_static = 1;
184 break;
185 case 'Y' :
186 break;
187 default:
188 return -1;
191 /* Next is the calling convention */
192 switch (*name++)
194 case 'A':
195 sym->calling_convention = strdup ("__cdecl");
196 break;
197 case 'B': /* FIXME: Something to do with __declspec(dllexport)? */
198 case 'I': /* __fastcall */
199 case 'G':
200 sym->calling_convention = strdup ("__stdcall");
201 break;
202 default:
203 return -1;
206 /* If the symbol is associated with a class, its 'const' status follows */
207 if (sym->argc)
209 if (*name == 'B')
210 is_const = 1;
211 else if (*name != 'E')
212 return -1;
213 name++;
216 /* Return type, or @ if 'void' */
217 if (*name == '@')
219 sym->return_text = strdup ("void");
220 sym->return_type = ARG_VOID;
221 name++;
223 else
225 INIT_CT (ct);
226 if (!demangle_datatype (&name, &ct, sym))
227 return -1;
228 sym->return_text = ct.expression;
229 sym->return_type = get_type_constant(ct.dest_type, ct.flags);
230 ct.expression = NULL;
231 FREE_CT (ct);
234 /* Now come the function arguments */
235 while (*name && *name != 'Z')
237 /* Decode each data type and append it to the argument list */
238 if (*name != '@')
240 INIT_CT (ct);
241 if (!demangle_datatype(&name, &ct, sym))
242 return -1;
244 if (strcmp (ct.expression, "void"))
246 sym->arg_text [sym->argc] = ct.expression;
247 ct.expression = NULL;
248 sym->arg_type [sym->argc] = get_type_constant (ct.dest_type, ct.flags);
249 sym->arg_flag [sym->argc] = ct.flags;
250 sym->arg_name[sym->argc] = str_create_num (1, sym->argc, "arg");
251 sym->argc++;
253 else
254 break; /* 'void' terminates an argument list */
255 FREE_CT (ct);
257 else
258 name++;
261 while (*name == '@')
262 name++;
264 /* Functions are always terminated by 'Z'. If we made it this far and
265 * Don't find it, we have incorrectly identified a data type.
267 if (*name != 'Z')
268 return -1;
270 /* Note: '()' after 'Z' means 'throws', but we don't care here */
272 /* Create the function name. Include a unique number because otherwise
273 * overloaded functions could have the same c signature.
275 sym->function_name = str_create_num (4, hash, class_name, "_",
276 function_name, is_static ? "_static" : is_const ? "_const" : "_");
278 assert (sym->return_text);
279 assert (sym->calling_convention);
280 assert (sym->function_name);
282 free (class_name);
283 free (function_name);
285 if (VERBOSE)
286 puts ("Demangled symbol OK");
288 return 0;
292 /*******************************************************************
293 * demangle_datatype
295 * Attempt to demangle a C++ data type, which may be compound.
296 * a compound type is made up of a number of simple types. e.g:
297 * char** = (pointer to (pointer to (char)))
299 * Uses a simple recursive descent algorithm that is broken
300 * and/or incomplete, without a doubt ;-)
302 static char *demangle_datatype (char **str, compound_type *ct,
303 parsed_symbol* sym)
305 char *iter;
307 assert (str && *str);
308 assert (ct);
310 iter = *str;
312 if (!get_constraints_convention_1 (&iter, ct))
313 return NULL;
315 switch (*iter)
317 case '_':
318 if (*++iter != 'N') /* _N = bool */
319 return NULL;
320 iter++;
321 ct->dest_type = 'I'; /* treat as int */
322 if (!get_constraints_convention_2 (&iter, ct))
323 return NULL;
324 ct->expression = get_type_string (ct->dest_type, ct->flags);
325 break;
326 case 'C': case 'D': case 'E': case 'F': case 'G':
327 case 'H': case 'I': case 'J': case 'K': case 'M':
328 case 'N': case 'O': case 'X': case 'Z':
329 /* Simple data types */
330 ct->dest_type = *iter++;
331 if (!get_constraints_convention_2 (&iter, ct))
332 return NULL;
333 ct->expression = get_type_string (ct->dest_type, ct->flags);
334 break;
335 case 'U':
336 case 'V':
337 /* Class/struct/union */
338 ct->dest_type = *iter++;
339 if (*iter == '0' || *iter == '1')
341 /* Referring to class type (implicit 'this') */
342 char *stripped;
343 if (!sym->argc)
344 return NULL;
346 iter++;
347 /* Apply our constraints to the base type (struct xxx *) */
348 stripped = strdup (sym->arg_text [0]);
349 if (!stripped)
350 fatal ("Out of Memory");
352 /* If we're a reference, re-use the pointer already in the type */
353 if (!ct->flags & CT_BY_REFERENCE)
354 stripped[ strlen (stripped) - 2] = '\0'; /* otherwise, strip it */
356 ct->expression = str_create (2, ct->flags & CT_CONST ? "const " :
357 ct->flags & CT_VOLATILE ? "volatile " : "", stripped);
358 free (stripped);
360 else if (*iter == '_')
362 /* The name of the class/struct, followed by '@@' */
363 char *struct_name = ++iter;
364 while (*iter && *iter++ != '@') ;
365 if (*iter++ != '@')
366 return NULL;
367 struct_name = str_substring (struct_name, iter - 2);
368 ct->expression = str_create (4, ct->flags & CT_CONST ? "const " :
369 ct->flags & CT_VOLATILE ? "volatile " : "", "struct ",
370 struct_name, ct->flags & CT_BY_REFERENCE ? " *" : "");
371 free (struct_name);
373 break;
374 case 'Q': /* FIXME: Array Just treated as pointer currently */
375 case 'P': /* Pointer */
377 compound_type sub_ct;
378 INIT_CT (sub_ct);
380 ct->dest_type = *iter++;
381 if (!get_constraints_convention_2 (&iter, ct))
382 return NULL;
384 /* FIXME: P6 = Function pointer, others who knows.. */
385 if (isdigit (*iter))
386 return NULL;
388 /* Recurse to get the pointed-to type */
389 if (!demangle_datatype (&iter, &sub_ct, sym))
390 return NULL;
392 ct->expression = get_pointer_type_string (ct, sub_ct.expression);
394 FREE_CT (sub_ct);
396 break;
397 case '0': case '1': case '2': case '3': case '4':
398 case '5': case '6': case '7': case '8': case '9':
399 /* Referring back to previously parsed type */
400 if (sym->argc >= (size_t)('0' - *iter))
401 return NULL;
402 ct->dest_type = sym->arg_type ['0' - *iter];
403 ct->expression = strdup (sym->arg_text ['0' - *iter]);
404 iter++;
405 break;
406 default :
407 return NULL;
409 if (!ct->expression)
410 return NULL;
412 return (char *)(*str = iter);
416 /* Constraints:
417 * There are two conventions for specifying data type constaints. I
418 * don't know how the compiler chooses between them, but I suspect it
419 * is based on ensuring that linker names are unique.
420 * Convention 1. The data type modifier is given first, followed
421 * by the data type it operates on. '?' means passed by value,
422 * 'A' means passed by reference. Note neither of these characters
423 * is a valid base data type. This is then followed by a character
424 * specifying constness or volatilty.
425 * Convention 2. The base data type (which is never '?' or 'A') is
426 * given first. The character modifier is optionally given after
427 * the base type character. If a valid character mofifier is present,
428 * then it only applies to the current data type if the character
429 * after that is not 'A' 'B' or 'C' (Because this makes a convention 1
430 * constraint for the next data type).
432 * The conventions are usually mixed within the same symbol.
433 * Since 'C' is both a qualifier and a data type, I suspect that
434 * convention 1 allows specifying e.g. 'volatile signed char*'. In
435 * convention 2 this would be 'CC' which is ambigious (i.e. Is it two
436 * pointers, or a single pointer + modifier?). In convention 1 it
437 * is encoded as '?CC' which is not ambigious. This probably
438 * holds true for some other types as well.
441 /*******************************************************************
442 * get_constraints_convention_1
444 * Get type constraint information for a data type
446 static char *get_constraints_convention_1 (char **str, compound_type *ct)
448 char *iter = *str, **retval = str;
450 if (ct->have_qualifiers)
451 return (char *)*str; /* Previously got constraints for this type */
453 if (*iter == '?' || *iter == 'A')
455 ct->have_qualifiers = 1;
456 ct->flags |= (*iter++ == '?' ? 0 : CT_BY_REFERENCE);
458 switch (*iter++)
460 case 'A' :
461 break; /* non-const, non-volatile */
462 case 'B' :
463 ct->flags |= CT_CONST;
464 break;
465 case 'C' :
466 ct->flags |= CT_VOLATILE;
467 break;
468 default :
469 return NULL;
473 return (char *)(*retval = iter);
477 /*******************************************************************
478 * get_constraints_convention_2
480 * Get type constraint information for a data type
482 static char *get_constraints_convention_2 (char **str, compound_type *ct)
484 char *iter = *str, **retval = str;
486 /* FIXME: Why do arrays have both convention 1 & 2 constraints? */
487 if (ct->have_qualifiers && ct->dest_type != 'Q')
488 return (char *)*str; /* Previously got constraints for this type */
490 ct->have_qualifiers = 1; /* Even if none, we've got all we're getting */
492 switch (*iter)
494 case 'A' :
495 if (iter[1] != 'A' && iter[1] != 'B' && iter[1] != 'C')
496 iter++;
497 break;
498 case 'B' :
499 ct->flags |= CT_CONST;
500 iter++;
501 break;
502 case 'C' :
503 /* See note above, if we find 'C' it is _not_ a signed char */
504 ct->flags |= CT_VOLATILE;
505 iter++;
506 break;
509 return (char *)(*retval = iter);
513 /*******************************************************************
514 * get_type_string
516 * Return a string containing the name of a data type
518 static char *get_type_string (const char c, const int constraints)
520 char *type_string;
522 switch (c)
524 case 'C': /* Signed char, fall through */
525 case 'D': type_string = "char"; break;
526 case 'E': type_string = "unsigned char"; break;
527 case 'F': type_string = "short int"; break;
528 case 'G': type_string = "unsigned short int"; break;
529 case 'H': type_string = "int"; break;
530 case 'I': type_string = "unsigned int"; break;
531 case 'J': type_string = "long"; break;
532 case 'K': type_string = "unsigned long"; break;
533 case 'M': type_string = "float"; break;
534 case 'N': type_string = "double"; break;
535 case 'O': type_string = "long double"; break;
536 case 'U':
537 case 'V': type_string = "struct"; break;
538 case 'X': return strdup ("void");
539 case 'Z': return strdup ("...");
540 default:
541 return NULL;
544 return str_create (3, constraints & CT_CONST ? "const " :
545 constraints & CT_VOLATILE ? "volatile " : "", type_string,
546 constraints & CT_BY_REFERENCE ? " *" : "");
550 /*******************************************************************
551 * get_type_constant
553 * Get the ARG_* constant for this data type
555 static int get_type_constant (const char c, const int constraints)
557 /* Any reference type is really a pointer */
558 if (constraints & CT_BY_REFERENCE)
559 return ARG_POINTER;
561 switch (c)
563 case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
564 case 'J': case 'K':
565 return ARG_LONG;
566 case 'M':
567 return -1; /* FIXME */
568 case 'N': case 'O':
569 return ARG_DOUBLE;
570 case 'P': case 'Q':
571 return ARG_POINTER;
572 case 'U': case 'V':
573 return ARG_STRUCT;
574 case 'X':
575 return ARG_VOID;
576 case 'Z':
577 default:
578 return -1;
583 /*******************************************************************
584 * get_pointer_type_string
586 * Return a string containing 'pointer to expression'
588 static char *get_pointer_type_string (compound_type *ct,
589 const char *expression)
591 /* FIXME: set a compound flag for bracketing expression if needed */
592 return str_create (3, ct->flags & CT_CONST ? "const " :
593 ct->flags & CT_VOLATILE ? "volatile " : "", expression,
594 ct->flags & CT_BY_REFERENCE ? " **" : " *");