Update libiberty demangler to support Rust v0 name mangling
[valgrind.git] / coregrind / m_demangle / rust-demangle.c
blob9ad424e85cd0e6b082cd10cc471e40be6854c62f
1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2021 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file. (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Library General Public License for more details.
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>. */
31 #if 0 /* in valgrind */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35 #endif /* ! in valgrind */
37 #if 0 /* in valgrind */
38 #include "safe-ctype.h"
39 #endif /* ! in valgrind */
41 #if 0 /* in valgrind */
42 #include <inttypes.h>
43 #include <sys/types.h>
44 #include <string.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #endif /* ! in valgrind */
49 #if 0 /* in valgrind */
50 #ifdef HAVE_STRING_H
51 #include <string.h>
52 #else
53 extern size_t strlen(const char *s);
54 extern int strncmp(const char *s1, const char *s2, size_t n);
55 extern void *memset(void *s, int c, size_t n);
56 #endif
57 #endif /* ! in valgrind */
59 #if 0 /* in valgrind */
60 #include <demangle.h>
61 #include "libiberty.h"
62 #endif /* ! in valgrind */
64 #include "vg_libciface.h"
66 #include "ansidecl.h"
67 #include "demangle.h"
68 #include "safe-ctype.h"
70 typedef UChar uint8_t;
71 typedef Char int8_t;
72 typedef UShort uint16_t;
73 typedef Short int16_t;
74 typedef UInt uint32_t;
75 typedef Int int32_t;
76 typedef ULong uint64_t;
77 typedef Long int64_t;
78 #define PRIu64 "llu"
79 #define PRIx64 "llx"
81 struct rust_demangler
83 const char *sym;
84 size_t sym_len;
86 void *callback_opaque;
87 demangle_callbackref callback;
89 /* Position of the next character to read from the symbol. */
90 size_t next;
92 /* Non-zero if any error occurred. */
93 int errored;
95 /* Non-zero if nothing should be printed. */
96 int skipping_printing;
98 /* Non-zero if printing should be verbose (e.g. include hashes). */
99 int verbose;
101 /* Rust mangling version, with legacy mangling being -1. */
102 int version;
104 uint64_t bound_lifetime_depth;
107 /* Parsing functions. */
109 static char
110 peek (const struct rust_demangler *rdm)
112 if (rdm->next < rdm->sym_len)
113 return rdm->sym[rdm->next];
114 return 0;
117 static int
118 eat (struct rust_demangler *rdm, char c)
120 if (peek (rdm) == c)
122 rdm->next++;
123 return 1;
125 else
126 return 0;
129 static char
130 next (struct rust_demangler *rdm)
132 char c = peek (rdm);
133 if (!c)
134 rdm->errored = 1;
135 else
136 rdm->next++;
137 return c;
140 static uint64_t
141 parse_integer_62 (struct rust_demangler *rdm)
143 char c;
144 uint64_t x;
146 if (eat (rdm, '_'))
147 return 0;
149 x = 0;
150 while (!eat (rdm, '_'))
152 c = next (rdm);
153 x *= 62;
154 if (ISDIGIT (c))
155 x += c - '0';
156 else if (ISLOWER (c))
157 x += 10 + (c - 'a');
158 else if (ISUPPER (c))
159 x += 10 + 26 + (c - 'A');
160 else
162 rdm->errored = 1;
163 return 0;
166 return x + 1;
169 static uint64_t
170 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
172 if (!eat (rdm, tag))
173 return 0;
174 return 1 + parse_integer_62 (rdm);
177 static uint64_t
178 parse_disambiguator (struct rust_demangler *rdm)
180 return parse_opt_integer_62 (rdm, 's');
183 static size_t
184 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
186 char c;
187 size_t hex_len;
189 hex_len = 0;
190 *value = 0;
192 while (!eat (rdm, '_'))
194 *value <<= 4;
196 c = next (rdm);
197 if (ISDIGIT (c))
198 *value |= c - '0';
199 else if (c >= 'a' && c <= 'f')
200 *value |= 10 + (c - 'a');
201 else
203 rdm->errored = 1;
204 return 0;
206 hex_len++;
209 return hex_len;
212 struct rust_mangled_ident
214 /* ASCII part of the identifier. */
215 const char *ascii;
216 size_t ascii_len;
218 /* Punycode insertion codes for Unicode codepoints, if any. */
219 const char *punycode;
220 size_t punycode_len;
223 static struct rust_mangled_ident
224 parse_ident (struct rust_demangler *rdm)
226 char c;
227 size_t start, len;
228 int is_punycode = 0;
229 struct rust_mangled_ident ident;
231 ident.ascii = NULL;
232 ident.ascii_len = 0;
233 ident.punycode = NULL;
234 ident.punycode_len = 0;
236 if (rdm->version != -1)
237 is_punycode = eat (rdm, 'u');
239 c = next (rdm);
240 if (!ISDIGIT (c))
242 rdm->errored = 1;
243 return ident;
245 len = c - '0';
247 if (c != '0')
248 while (ISDIGIT (peek (rdm)))
249 len = len * 10 + (next (rdm) - '0');
251 /* Skip past the optional `_` separator (v0). */
252 if (rdm->version != -1)
253 eat (rdm, '_');
255 start = rdm->next;
256 rdm->next += len;
257 /* Check for overflows. */
258 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
260 rdm->errored = 1;
261 return ident;
264 ident.ascii = rdm->sym + start;
265 ident.ascii_len = len;
267 if (is_punycode)
269 ident.punycode_len = 0;
270 while (ident.ascii_len > 0)
272 ident.ascii_len--;
274 /* The last '_' is a separator between ascii & punycode. */
275 if (ident.ascii[ident.ascii_len] == '_')
276 break;
278 ident.punycode_len++;
280 if (!ident.punycode_len)
282 rdm->errored = 1;
283 return ident;
285 ident.punycode = ident.ascii + (len - ident.punycode_len);
288 if (ident.ascii_len == 0)
289 ident.ascii = NULL;
291 return ident;
294 /* Printing functions. */
296 static void
297 print_str (struct rust_demangler *rdm, const char *data, size_t len)
299 if (!rdm->errored && !rdm->skipping_printing)
300 rdm->callback (data, len, rdm->callback_opaque);
303 #define PRINT(s) print_str (rdm, s, strlen (s))
305 static void
306 print_uint64 (struct rust_demangler *rdm, uint64_t x)
308 char s[21];
309 snprintf (s, 21, "%" PRIu64, x);
310 PRINT (s);
313 static void
314 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
316 char s[17];
317 snprintf (s, 17, "%" PRIx64, x);
318 PRINT (s);
321 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
322 static int
323 decode_lower_hex_nibble (char nibble)
325 if ('0' <= nibble && nibble <= '9')
326 return nibble - '0';
327 if ('a' <= nibble && nibble <= 'f')
328 return 0xa + (nibble - 'a');
329 return -1;
332 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
333 static char
334 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
336 char c = 0;
337 size_t escape_len = 0;
338 int lo_nibble = -1, hi_nibble = -1;
340 if (len < 3 || e[0] != '$')
341 return 0;
343 e++;
344 len--;
346 if (e[0] == 'C')
348 escape_len = 1;
350 c = ',';
352 else if (len > 2)
354 escape_len = 2;
356 if (e[0] == 'S' && e[1] == 'P')
357 c = '@';
358 else if (e[0] == 'B' && e[1] == 'P')
359 c = '*';
360 else if (e[0] == 'R' && e[1] == 'F')
361 c = '&';
362 else if (e[0] == 'L' && e[1] == 'T')
363 c = '<';
364 else if (e[0] == 'G' && e[1] == 'T')
365 c = '>';
366 else if (e[0] == 'L' && e[1] == 'P')
367 c = '(';
368 else if (e[0] == 'R' && e[1] == 'P')
369 c = ')';
370 else if (e[0] == 'u' && len > 3)
372 escape_len = 3;
374 hi_nibble = decode_lower_hex_nibble (e[1]);
375 if (hi_nibble < 0)
376 return 0;
377 lo_nibble = decode_lower_hex_nibble (e[2]);
378 if (lo_nibble < 0)
379 return 0;
381 /* Only allow non-control ASCII characters. */
382 if (hi_nibble > 7)
383 return 0;
384 c = (hi_nibble << 4) | lo_nibble;
385 if (c < 0x20)
386 return 0;
390 if (!c || len <= escape_len || e[escape_len] != '$')
391 return 0;
393 *out_len = 2 + escape_len;
394 return c;
397 static void
398 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
400 char unescaped;
401 uint8_t *out, *p, d;
402 size_t len, cap, punycode_pos, j;
403 /* Punycode parameters and state. */
404 uint32_t c;
405 size_t base, t_min, t_max, skew, damp, bias, i;
406 size_t delta, w, k, t;
408 if (rdm->errored || rdm->skipping_printing)
409 return;
411 if (rdm->version == -1)
413 /* Ignore leading underscores preceding escape sequences.
414 The mangler inserts an underscore to make sure the
415 identifier begins with a XID_Start character. */
416 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
417 && ident.ascii[1] == '$')
419 ident.ascii++;
420 ident.ascii_len--;
423 while (ident.ascii_len > 0)
425 /* Handle legacy escape sequences ("$...$", ".." or "."). */
426 if (ident.ascii[0] == '$')
428 unescaped
429 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
430 if (unescaped)
431 print_str (rdm, &unescaped, 1);
432 else
434 /* Unexpected escape sequence, print the rest verbatim. */
435 print_str (rdm, ident.ascii, ident.ascii_len);
436 return;
439 else if (ident.ascii[0] == '.')
441 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
443 /* ".." becomes "::" */
444 PRINT ("::");
445 len = 2;
447 else
449 PRINT (".");
450 len = 1;
453 else
455 /* Print everything before the next escape sequence, at once. */
456 for (len = 0; len < ident.ascii_len; len++)
457 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
458 break;
460 print_str (rdm, ident.ascii, len);
463 ident.ascii += len;
464 ident.ascii_len -= len;
467 return;
470 if (!ident.punycode)
472 print_str (rdm, ident.ascii, ident.ascii_len);
473 return;
476 len = 0;
477 cap = 4;
478 while (cap < ident.ascii_len)
480 cap *= 2;
481 /* Check for overflows. */
482 if ((cap * 4) / 4 != cap)
484 rdm->errored = 1;
485 return;
489 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
490 out = (uint8_t *)xmalloc (cap * 4);
491 if (!out)
493 rdm->errored = 1;
494 return;
497 /* Populate initial output from ASCII fragment. */
498 for (len = 0; len < ident.ascii_len; len++)
500 p = out + 4 * len;
501 p[0] = 0;
502 p[1] = 0;
503 p[2] = 0;
504 p[3] = ident.ascii[len];
507 /* Punycode parameters and initial state. */
508 base = 36;
509 t_min = 1;
510 t_max = 26;
511 skew = 38;
512 damp = 700;
513 bias = 72;
514 i = 0;
515 c = 0x80;
517 punycode_pos = 0;
518 while (punycode_pos < ident.punycode_len)
520 /* Read one delta value. */
521 delta = 0;
522 w = 1;
523 k = 0;
526 k += base;
527 t = k < bias ? 0 : (k - bias);
528 if (t < t_min)
529 t = t_min;
530 if (t > t_max)
531 t = t_max;
533 if (punycode_pos >= ident.punycode_len)
534 goto cleanup;
535 d = ident.punycode[punycode_pos++];
537 if (ISLOWER (d))
538 d = d - 'a';
539 else if (ISDIGIT (d))
540 d = 26 + (d - '0');
541 else
543 rdm->errored = 1;
544 goto cleanup;
547 delta += d * w;
548 w *= base - t;
550 while (d >= t);
552 /* Compute the new insert position and character. */
553 len++;
554 i += delta;
555 c += i / len;
556 i %= len;
558 /* Ensure enough space is available. */
559 if (cap < len)
561 cap *= 2;
562 /* Check for overflows. */
563 if ((cap * 4) / 4 != cap || cap < len)
565 rdm->errored = 1;
566 goto cleanup;
569 p = (uint8_t *)xrealloc (out, cap * 4);
570 if (!p)
572 rdm->errored = 1;
573 goto cleanup;
575 out = p;
577 /* Move the characters after the insert position. */
578 p = out + i * 4;
579 memmove (p + 4, p, (len - i - 1) * 4);
581 /* Insert the new character, as UTF-8 bytes. */
582 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
583 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
584 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
585 p[3] = 0x80 | (c & 0x3f);
587 /* If there are no more deltas, decoding is complete. */
588 if (punycode_pos == ident.punycode_len)
589 break;
591 i++;
593 /* Perform bias adaptation. */
594 delta /= damp;
595 damp = 2;
597 delta += delta / len;
598 k = 0;
599 while (delta > ((base - t_min) * t_max) / 2)
601 delta /= base - t_min;
602 k += base;
604 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
607 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
608 for (i = 0, j = 0; i < len * 4; i++)
609 if (out[i] != 0)
610 out[j++] = out[i];
612 print_str (rdm, (const char *)out, j);
614 cleanup:
615 free (out);
618 /* Print the lifetime according to the previously decoded index.
619 An index of `0` always refers to `'_`, but starting with `1`,
620 indices refer to late-bound lifetimes introduced by a binder. */
621 static void
622 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
624 char c;
625 uint64_t depth;
627 PRINT ("'");
628 if (lt == 0)
630 PRINT ("_");
631 return;
634 depth = rdm->bound_lifetime_depth - lt;
635 /* Try to print lifetimes alphabetically first. */
636 if (depth < 26)
638 c = 'a' + depth;
639 print_str (rdm, &c, 1);
641 else
643 /* Use `'_123` after running out of letters. */
644 PRINT ("_");
645 print_uint64 (rdm, depth);
649 /* Demangling functions. */
651 static void demangle_binder (struct rust_demangler *rdm);
652 static void demangle_path (struct rust_demangler *rdm, int in_value);
653 static void demangle_generic_arg (struct rust_demangler *rdm);
654 static void demangle_type (struct rust_demangler *rdm);
655 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
656 static void demangle_dyn_trait (struct rust_demangler *rdm);
657 static void demangle_const (struct rust_demangler *rdm);
658 static void demangle_const_uint (struct rust_demangler *rdm);
659 static void demangle_const_int (struct rust_demangler *rdm);
660 static void demangle_const_bool (struct rust_demangler *rdm);
661 static void demangle_const_char (struct rust_demangler *rdm);
663 /* Optionally enter a binder ('G') for late-bound lifetimes,
664 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
665 to the caller (via depth level, which the caller should reset). */
666 static void
667 demangle_binder (struct rust_demangler *rdm)
669 uint64_t i, bound_lifetimes;
671 if (rdm->errored)
672 return;
674 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
675 if (bound_lifetimes > 0)
677 PRINT ("for<");
678 for (i = 0; i < bound_lifetimes; i++)
680 if (i > 0)
681 PRINT (", ");
682 rdm->bound_lifetime_depth++;
683 print_lifetime_from_index (rdm, 1);
685 PRINT ("> ");
689 static void
690 demangle_path (struct rust_demangler *rdm, int in_value)
692 char tag, ns;
693 int was_skipping_printing;
694 size_t i, backref, old_next;
695 uint64_t dis;
696 struct rust_mangled_ident name;
698 if (rdm->errored)
699 return;
701 switch (tag = next (rdm))
703 case 'C':
704 dis = parse_disambiguator (rdm);
705 name = parse_ident (rdm);
707 print_ident (rdm, name);
708 if (rdm->verbose)
710 PRINT ("[");
711 print_uint64_hex (rdm, dis);
712 PRINT ("]");
714 break;
715 case 'N':
716 ns = next (rdm);
717 if (!ISLOWER (ns) && !ISUPPER (ns))
719 rdm->errored = 1;
720 return;
723 demangle_path (rdm, in_value);
725 dis = parse_disambiguator (rdm);
726 name = parse_ident (rdm);
728 if (ISUPPER (ns))
730 /* Special namespaces, like closures and shims. */
731 PRINT ("::{");
732 switch (ns)
734 case 'C':
735 PRINT ("closure");
736 break;
737 case 'S':
738 PRINT ("shim");
739 break;
740 default:
741 print_str (rdm, &ns, 1);
743 if (name.ascii || name.punycode)
745 PRINT (":");
746 print_ident (rdm, name);
748 PRINT ("#");
749 print_uint64 (rdm, dis);
750 PRINT ("}");
752 else
754 /* Implementation-specific/unspecified namespaces. */
756 if (name.ascii || name.punycode)
758 PRINT ("::");
759 print_ident (rdm, name);
762 break;
763 case 'M':
764 case 'X':
765 /* Ignore the `impl`'s own path.*/
766 parse_disambiguator (rdm);
767 was_skipping_printing = rdm->skipping_printing;
768 rdm->skipping_printing = 1;
769 demangle_path (rdm, in_value);
770 rdm->skipping_printing = was_skipping_printing;
771 /* fallthrough */
772 case 'Y':
773 PRINT ("<");
774 demangle_type (rdm);
775 if (tag != 'M')
777 PRINT (" as ");
778 demangle_path (rdm, 0);
780 PRINT (">");
781 break;
782 case 'I':
783 demangle_path (rdm, in_value);
784 if (in_value)
785 PRINT ("::");
786 PRINT ("<");
787 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
789 if (i > 0)
790 PRINT (", ");
791 demangle_generic_arg (rdm);
793 PRINT (">");
794 break;
795 case 'B':
796 backref = parse_integer_62 (rdm);
797 if (!rdm->skipping_printing)
799 old_next = rdm->next;
800 rdm->next = backref;
801 demangle_path (rdm, in_value);
802 rdm->next = old_next;
804 break;
805 default:
806 rdm->errored = 1;
807 return;
811 static void
812 demangle_generic_arg (struct rust_demangler *rdm)
814 uint64_t lt;
815 if (eat (rdm, 'L'))
817 lt = parse_integer_62 (rdm);
818 print_lifetime_from_index (rdm, lt);
820 else if (eat (rdm, 'K'))
821 demangle_const (rdm);
822 else
823 demangle_type (rdm);
826 static const char *
827 basic_type (char tag)
829 switch (tag)
831 case 'b':
832 return "bool";
833 case 'c':
834 return "char";
835 case 'e':
836 return "str";
837 case 'u':
838 return "()";
839 case 'a':
840 return "i8";
841 case 's':
842 return "i16";
843 case 'l':
844 return "i32";
845 case 'x':
846 return "i64";
847 case 'n':
848 return "i128";
849 case 'i':
850 return "isize";
851 case 'h':
852 return "u8";
853 case 't':
854 return "u16";
855 case 'm':
856 return "u32";
857 case 'y':
858 return "u64";
859 case 'o':
860 return "u128";
861 case 'j':
862 return "usize";
863 case 'f':
864 return "f32";
865 case 'd':
866 return "f64";
867 case 'z':
868 return "!";
869 case 'p':
870 return "_";
871 case 'v':
872 return "...";
874 default:
875 return NULL;
879 static void
880 demangle_type (struct rust_demangler *rdm)
882 char tag;
883 size_t i, old_next, backref;
884 uint64_t lt, old_bound_lifetime_depth;
885 const char *basic;
886 struct rust_mangled_ident abi;
888 if (rdm->errored)
889 return;
891 tag = next (rdm);
893 basic = basic_type (tag);
894 if (basic)
896 PRINT (basic);
897 return;
900 switch (tag)
902 case 'R':
903 case 'Q':
904 PRINT ("&");
905 if (eat (rdm, 'L'))
907 lt = parse_integer_62 (rdm);
908 if (lt)
910 print_lifetime_from_index (rdm, lt);
911 PRINT (" ");
914 if (tag != 'R')
915 PRINT ("mut ");
916 demangle_type (rdm);
917 break;
918 case 'P':
919 case 'O':
920 PRINT ("*");
921 if (tag != 'P')
922 PRINT ("mut ");
923 else
924 PRINT ("const ");
925 demangle_type (rdm);
926 break;
927 case 'A':
928 case 'S':
929 PRINT ("[");
930 demangle_type (rdm);
931 if (tag == 'A')
933 PRINT ("; ");
934 demangle_const (rdm);
936 PRINT ("]");
937 break;
938 case 'T':
939 PRINT ("(");
940 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
942 if (i > 0)
943 PRINT (", ");
944 demangle_type (rdm);
946 if (i == 1)
947 PRINT (",");
948 PRINT (")");
949 break;
950 case 'F':
951 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
952 demangle_binder (rdm);
954 if (eat (rdm, 'U'))
955 PRINT ("unsafe ");
957 if (eat (rdm, 'K'))
959 if (eat (rdm, 'C'))
961 abi.ascii = "C";
962 abi.ascii_len = 1;
964 else
966 abi = parse_ident (rdm);
967 if (!abi.ascii || abi.punycode)
969 rdm->errored = 1;
970 goto restore;
974 PRINT ("extern \"");
976 /* If the ABI had any `-`, they were replaced with `_`,
977 so the parts between `_` have to be re-joined with `-`. */
978 for (i = 0; i < abi.ascii_len; i++)
980 if (abi.ascii[i] == '_')
982 print_str (rdm, abi.ascii, i);
983 PRINT ("-");
984 abi.ascii += i + 1;
985 abi.ascii_len -= i + 1;
986 i = 0;
989 print_str (rdm, abi.ascii, abi.ascii_len);
991 PRINT ("\" ");
994 PRINT ("fn(");
995 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
997 if (i > 0)
998 PRINT (", ");
999 demangle_type (rdm);
1001 PRINT (")");
1003 if (eat (rdm, 'u'))
1005 /* Skip printing the return type if it's 'u', i.e. `()`. */
1007 else
1009 PRINT (" -> ");
1010 demangle_type (rdm);
1013 /* Restore `bound_lifetime_depth` to outside the binder. */
1014 restore:
1015 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1016 break;
1017 case 'D':
1018 PRINT ("dyn ");
1020 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1021 demangle_binder (rdm);
1023 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1025 if (i > 0)
1026 PRINT (" + ");
1027 demangle_dyn_trait (rdm);
1030 /* Restore `bound_lifetime_depth` to outside the binder. */
1031 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1033 if (!eat (rdm, 'L'))
1035 rdm->errored = 1;
1036 return;
1038 lt = parse_integer_62 (rdm);
1039 if (lt)
1041 PRINT (" + ");
1042 print_lifetime_from_index (rdm, lt);
1044 break;
1045 case 'B':
1046 backref = parse_integer_62 (rdm);
1047 if (!rdm->skipping_printing)
1049 old_next = rdm->next;
1050 rdm->next = backref;
1051 demangle_type (rdm);
1052 rdm->next = old_next;
1054 break;
1055 default:
1056 /* Go back to the tag, so `demangle_path` also sees it. */
1057 rdm->next--;
1058 demangle_path (rdm, 0);
1062 /* A trait in a trait object may have some "existential projections"
1063 (i.e. associated type bindings) after it, which should be printed
1064 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1065 To this end, this method will keep the `<...>` of an 'I' path
1066 open, by omitting the `>`, and return `Ok(true)` in that case. */
1067 static int
1068 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1070 int open;
1071 size_t i, old_next, backref;
1073 open = 0;
1075 if (rdm->errored)
1076 return open;
1078 if (eat (rdm, 'B'))
1080 backref = parse_integer_62 (rdm);
1081 if (!rdm->skipping_printing)
1083 old_next = rdm->next;
1084 rdm->next = backref;
1085 open = demangle_path_maybe_open_generics (rdm);
1086 rdm->next = old_next;
1089 else if (eat (rdm, 'I'))
1091 demangle_path (rdm, 0);
1092 PRINT ("<");
1093 open = 1;
1094 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1096 if (i > 0)
1097 PRINT (", ");
1098 demangle_generic_arg (rdm);
1101 else
1102 demangle_path (rdm, 0);
1103 return open;
1106 static void
1107 demangle_dyn_trait (struct rust_demangler *rdm)
1109 int open;
1110 struct rust_mangled_ident name;
1112 if (rdm->errored)
1113 return;
1115 open = demangle_path_maybe_open_generics (rdm);
1117 while (eat (rdm, 'p'))
1119 if (!open)
1120 PRINT ("<");
1121 else
1122 PRINT (", ");
1123 open = 1;
1125 name = parse_ident (rdm);
1126 print_ident (rdm, name);
1127 PRINT (" = ");
1128 demangle_type (rdm);
1131 if (open)
1132 PRINT (">");
1135 static void
1136 demangle_const (struct rust_demangler *rdm)
1138 char ty_tag;
1139 size_t old_next, backref;
1141 if (rdm->errored)
1142 return;
1144 if (eat (rdm, 'B'))
1146 backref = parse_integer_62 (rdm);
1147 if (!rdm->skipping_printing)
1149 old_next = rdm->next;
1150 rdm->next = backref;
1151 demangle_const (rdm);
1152 rdm->next = old_next;
1154 return;
1157 ty_tag = next (rdm);
1158 switch (ty_tag)
1160 /* Placeholder. */
1161 case 'p':
1162 PRINT ("_");
1163 return;
1165 /* Unsigned integer types. */
1166 case 'h':
1167 case 't':
1168 case 'm':
1169 case 'y':
1170 case 'o':
1171 case 'j':
1172 demangle_const_uint (rdm);
1173 break;
1175 /* Signed integer types. */
1176 case 'a':
1177 case 's':
1178 case 'l':
1179 case 'x':
1180 case 'n':
1181 case 'i':
1182 demangle_const_int (rdm);
1183 break;
1185 /* Boolean. */
1186 case 'b':
1187 demangle_const_bool (rdm);
1188 break;
1190 /* Character. */
1191 case 'c':
1192 demangle_const_char (rdm);
1193 break;
1195 default:
1196 rdm->errored = 1;
1197 return;
1200 if (rdm->errored)
1201 return;
1203 if (rdm->verbose)
1205 PRINT (": ");
1206 PRINT (basic_type (ty_tag));
1210 static void
1211 demangle_const_uint (struct rust_demangler *rdm)
1213 size_t hex_len;
1214 uint64_t value;
1216 if (rdm->errored)
1217 return;
1219 hex_len = parse_hex_nibbles (rdm, &value);
1221 if (hex_len > 16)
1223 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1224 PRINT ("0x");
1225 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1227 else if (hex_len > 0)
1228 print_uint64 (rdm, value);
1229 else
1230 rdm->errored = 1;
1233 static void
1234 demangle_const_int (struct rust_demangler *rdm)
1236 if (eat (rdm, 'n'))
1237 PRINT ("-");
1238 demangle_const_uint (rdm);
1241 static void
1242 demangle_const_bool (struct rust_demangler *rdm)
1244 uint64_t value;
1246 if (parse_hex_nibbles (rdm, &value) != 1)
1248 rdm->errored = 1;
1249 return;
1252 if (value == 0)
1253 PRINT ("false");
1254 else if (value == 1)
1255 PRINT ("true");
1256 else
1257 rdm->errored = 1;
1260 static void
1261 demangle_const_char (struct rust_demangler *rdm)
1263 size_t hex_len;
1264 uint64_t value;
1266 hex_len = parse_hex_nibbles (rdm, &value);
1268 if (hex_len == 0 || hex_len > 8)
1270 rdm->errored = 1;
1271 return;
1274 /* Match Rust's character "debug" output as best as we can. */
1275 PRINT ("'");
1276 if (value == '\t')
1277 PRINT ("\\t");
1278 else if (value == '\r')
1279 PRINT ("\\r");
1280 else if (value == '\n')
1281 PRINT ("\\n");
1282 else if (value > ' ' && value < '~')
1283 /* Rust also considers many non-ASCII codepoints to be printable, but
1284 that logic is not easily ported to C. */
1285 print_str (rdm, (char *) &value, 1);
1286 else
1288 PRINT ("\\u{");
1289 print_uint64_hex (rdm, value);
1290 PRINT ("}");
1292 PRINT ("'");
1295 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1296 The hex digits must contain at least 5 distinct digits. */
1297 static int
1298 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1300 uint16_t seen;
1301 int nibble;
1302 size_t i, count;
1304 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1305 return 0;
1307 seen = 0;
1308 for (i = 0; i < 16; i++)
1310 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1311 if (nibble < 0)
1312 return 0;
1313 seen |= (uint16_t)1 << nibble;
1316 /* Count how many distinct digits were seen. */
1317 count = 0;
1318 while (seen)
1320 if (seen & 1)
1321 count++;
1322 seen >>= 1;
1325 return count >= 5;
1329 rust_demangle_callback (const char *mangled, int options,
1330 demangle_callbackref callback, void *opaque)
1332 const char *p;
1333 struct rust_demangler rdm;
1334 struct rust_mangled_ident ident;
1336 rdm.sym = mangled;
1337 rdm.sym_len = 0;
1339 rdm.callback_opaque = opaque;
1340 rdm.callback = callback;
1342 rdm.next = 0;
1343 rdm.errored = 0;
1344 rdm.skipping_printing = 0;
1345 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1346 rdm.version = 0;
1347 rdm.bound_lifetime_depth = 0;
1349 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1350 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1351 rdm.sym += 2;
1352 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1354 rdm.sym += 3;
1355 rdm.version = -1;
1357 else
1358 return 0;
1360 /* Paths (v0) always start with uppercase characters. */
1361 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1362 return 0;
1364 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1365 for (p = rdm.sym; *p; p++)
1367 rdm.sym_len++;
1369 if (*p == '_' || ISALNUM (*p))
1370 continue;
1372 /* Legacy Rust symbols can also contain [.:$] characters. */
1373 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1374 continue;
1376 return 0;
1379 /* Legacy Rust symbols need to be handled separately. */
1380 if (rdm.version == -1)
1382 /* Legacy Rust symbols always end with E. */
1383 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1384 return 0;
1385 rdm.sym_len--;
1387 /* Legacy Rust symbols also always end with a path segment
1388 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1389 This early check, before any parse_ident calls, should
1390 quickly filter out most C++ symbols unrelated to Rust. */
1391 if (!(rdm.sym_len > 19
1392 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1393 return 0;
1397 ident = parse_ident (&rdm);
1398 if (rdm.errored || !ident.ascii)
1399 return 0;
1401 while (rdm.next < rdm.sym_len);
1403 /* The last path segment should be the hash. */
1404 if (!is_legacy_prefixed_hash (ident))
1405 return 0;
1407 /* Reset the state for a second pass, to print the symbol. */
1408 rdm.next = 0;
1409 if (!rdm.verbose && rdm.sym_len > 19)
1411 /* Hide the last segment, containing the hash, if not verbose. */
1412 rdm.sym_len -= 19;
1417 if (rdm.next > 0)
1418 print_str (&rdm, "::", 2);
1420 ident = parse_ident (&rdm);
1421 print_ident (&rdm, ident);
1423 while (rdm.next < rdm.sym_len);
1425 else
1427 demangle_path (&rdm, 1);
1429 /* Skip instantiating crate. */
1430 if (!rdm.errored && rdm.next < rdm.sym_len)
1432 rdm.skipping_printing = 1;
1433 demangle_path (&rdm, 0);
1436 /* It's an error to not reach the end. */
1437 rdm.errored |= rdm.next != rdm.sym_len;
1440 return !rdm.errored;
1443 /* Growable string buffers. */
1444 struct str_buf
1446 char *ptr;
1447 size_t len;
1448 size_t cap;
1449 int errored;
1452 static void
1453 str_buf_reserve (struct str_buf *buf, size_t extra)
1455 size_t available, min_new_cap, new_cap;
1456 char *new_ptr;
1458 /* Allocation failed before. */
1459 if (buf->errored)
1460 return;
1462 available = buf->cap - buf->len;
1464 if (extra <= available)
1465 return;
1467 min_new_cap = buf->cap + (extra - available);
1469 /* Check for overflows. */
1470 if (min_new_cap < buf->cap)
1472 buf->errored = 1;
1473 return;
1476 new_cap = buf->cap;
1478 if (new_cap == 0)
1479 new_cap = 4;
1481 /* Double capacity until sufficiently large. */
1482 while (new_cap < min_new_cap)
1484 new_cap *= 2;
1486 /* Check for overflows. */
1487 if (new_cap < buf->cap)
1489 buf->errored = 1;
1490 return;
1494 new_ptr = (char *)xrealloc (buf->ptr, new_cap);
1495 if (new_ptr == NULL)
1497 free (buf->ptr);
1498 buf->ptr = NULL;
1499 buf->len = 0;
1500 buf->cap = 0;
1501 buf->errored = 1;
1503 else
1505 buf->ptr = new_ptr;
1506 buf->cap = new_cap;
1510 static void
1511 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1513 str_buf_reserve (buf, len);
1514 if (buf->errored)
1515 return;
1517 memcpy (buf->ptr + buf->len, data, len);
1518 buf->len += len;
1521 static void
1522 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1524 str_buf_append ((struct str_buf *)opaque, data, len);
1527 char *
1528 rust_demangle (const char *mangled, int options)
1530 struct str_buf out;
1531 int success;
1533 out.ptr = NULL;
1534 out.len = 0;
1535 out.cap = 0;
1536 out.errored = 0;
1538 success = rust_demangle_callback (mangled, options,
1539 str_buf_demangle_callback, &out);
1541 if (!success)
1543 free (out.ptr);
1544 return NULL;
1547 str_buf_append (&out, "\0", 1);
1548 return out.ptr;