Daily bump.
[official-gcc.git] / libiberty / rust-demangle.c
blob449941b56dc16461aa4cf182ad95607ee20d4aed
1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2021 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
4 Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file. (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 Library General Public License for more details.
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>. */
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
35 #include "safe-ctype.h"
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
51 #include <demangle.h>
52 #include "libiberty.h"
54 struct rust_demangler
56 const char *sym;
57 size_t sym_len;
59 void *callback_opaque;
60 demangle_callbackref callback;
62 /* Position of the next character to read from the symbol. */
63 size_t next;
65 /* Non-zero if any error occurred. */
66 int errored;
68 /* Non-zero if nothing should be printed. */
69 int skipping_printing;
71 /* Non-zero if printing should be verbose (e.g. include hashes). */
72 int verbose;
74 /* Rust mangling version, with legacy mangling being -1. */
75 int version;
77 uint64_t bound_lifetime_depth;
80 /* Parsing functions. */
82 static char
83 peek (const struct rust_demangler *rdm)
85 if (rdm->next < rdm->sym_len)
86 return rdm->sym[rdm->next];
87 return 0;
90 static int
91 eat (struct rust_demangler *rdm, char c)
93 if (peek (rdm) == c)
95 rdm->next++;
96 return 1;
98 else
99 return 0;
102 static char
103 next (struct rust_demangler *rdm)
105 char c = peek (rdm);
106 if (!c)
107 rdm->errored = 1;
108 else
109 rdm->next++;
110 return c;
113 static uint64_t
114 parse_integer_62 (struct rust_demangler *rdm)
116 char c;
117 uint64_t x;
119 if (eat (rdm, '_'))
120 return 0;
122 x = 0;
123 while (!eat (rdm, '_'))
125 c = next (rdm);
126 x *= 62;
127 if (ISDIGIT (c))
128 x += c - '0';
129 else if (ISLOWER (c))
130 x += 10 + (c - 'a');
131 else if (ISUPPER (c))
132 x += 10 + 26 + (c - 'A');
133 else
135 rdm->errored = 1;
136 return 0;
139 return x + 1;
142 static uint64_t
143 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
145 if (!eat (rdm, tag))
146 return 0;
147 return 1 + parse_integer_62 (rdm);
150 static uint64_t
151 parse_disambiguator (struct rust_demangler *rdm)
153 return parse_opt_integer_62 (rdm, 's');
156 static size_t
157 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
159 char c;
160 size_t hex_len;
162 hex_len = 0;
163 *value = 0;
165 while (!eat (rdm, '_'))
167 *value <<= 4;
169 c = next (rdm);
170 if (ISDIGIT (c))
171 *value |= c - '0';
172 else if (c >= 'a' && c <= 'f')
173 *value |= 10 + (c - 'a');
174 else
176 rdm->errored = 1;
177 return 0;
179 hex_len++;
182 return hex_len;
185 struct rust_mangled_ident
187 /* ASCII part of the identifier. */
188 const char *ascii;
189 size_t ascii_len;
191 /* Punycode insertion codes for Unicode codepoints, if any. */
192 const char *punycode;
193 size_t punycode_len;
196 static struct rust_mangled_ident
197 parse_ident (struct rust_demangler *rdm)
199 char c;
200 size_t start, len;
201 int is_punycode = 0;
202 struct rust_mangled_ident ident;
204 ident.ascii = NULL;
205 ident.ascii_len = 0;
206 ident.punycode = NULL;
207 ident.punycode_len = 0;
209 if (rdm->version != -1)
210 is_punycode = eat (rdm, 'u');
212 c = next (rdm);
213 if (!ISDIGIT (c))
215 rdm->errored = 1;
216 return ident;
218 len = c - '0';
220 if (c != '0')
221 while (ISDIGIT (peek (rdm)))
222 len = len * 10 + (next (rdm) - '0');
224 /* Skip past the optional `_` separator (v0). */
225 if (rdm->version != -1)
226 eat (rdm, '_');
228 start = rdm->next;
229 rdm->next += len;
230 /* Check for overflows. */
231 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
233 rdm->errored = 1;
234 return ident;
237 ident.ascii = rdm->sym + start;
238 ident.ascii_len = len;
240 if (is_punycode)
242 ident.punycode_len = 0;
243 while (ident.ascii_len > 0)
245 ident.ascii_len--;
247 /* The last '_' is a separator between ascii & punycode. */
248 if (ident.ascii[ident.ascii_len] == '_')
249 break;
251 ident.punycode_len++;
253 if (!ident.punycode_len)
255 rdm->errored = 1;
256 return ident;
258 ident.punycode = ident.ascii + (len - ident.punycode_len);
261 if (ident.ascii_len == 0)
262 ident.ascii = NULL;
264 return ident;
267 /* Printing functions. */
269 static void
270 print_str (struct rust_demangler *rdm, const char *data, size_t len)
272 if (!rdm->errored && !rdm->skipping_printing)
273 rdm->callback (data, len, rdm->callback_opaque);
276 #define PRINT(s) print_str (rdm, s, strlen (s))
278 static void
279 print_uint64 (struct rust_demangler *rdm, uint64_t x)
281 char s[21];
282 snprintf (s, 21, "%" PRIu64, x);
283 PRINT (s);
286 static void
287 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
289 char s[17];
290 snprintf (s, 17, "%" PRIx64, x);
291 PRINT (s);
294 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
295 static int
296 decode_lower_hex_nibble (char nibble)
298 if ('0' <= nibble && nibble <= '9')
299 return nibble - '0';
300 if ('a' <= nibble && nibble <= 'f')
301 return 0xa + (nibble - 'a');
302 return -1;
305 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
306 static char
307 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
309 char c = 0;
310 size_t escape_len = 0;
311 int lo_nibble = -1, hi_nibble = -1;
313 if (len < 3 || e[0] != '$')
314 return 0;
316 e++;
317 len--;
319 if (e[0] == 'C')
321 escape_len = 1;
323 c = ',';
325 else if (len > 2)
327 escape_len = 2;
329 if (e[0] == 'S' && e[1] == 'P')
330 c = '@';
331 else if (e[0] == 'B' && e[1] == 'P')
332 c = '*';
333 else if (e[0] == 'R' && e[1] == 'F')
334 c = '&';
335 else if (e[0] == 'L' && e[1] == 'T')
336 c = '<';
337 else if (e[0] == 'G' && e[1] == 'T')
338 c = '>';
339 else if (e[0] == 'L' && e[1] == 'P')
340 c = '(';
341 else if (e[0] == 'R' && e[1] == 'P')
342 c = ')';
343 else if (e[0] == 'u' && len > 3)
345 escape_len = 3;
347 hi_nibble = decode_lower_hex_nibble (e[1]);
348 if (hi_nibble < 0)
349 return 0;
350 lo_nibble = decode_lower_hex_nibble (e[2]);
351 if (lo_nibble < 0)
352 return 0;
354 /* Only allow non-control ASCII characters. */
355 if (hi_nibble > 7)
356 return 0;
357 c = (hi_nibble << 4) | lo_nibble;
358 if (c < 0x20)
359 return 0;
363 if (!c || len <= escape_len || e[escape_len] != '$')
364 return 0;
366 *out_len = 2 + escape_len;
367 return c;
370 static void
371 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
373 char unescaped;
374 uint8_t *out, *p, d;
375 size_t len, cap, punycode_pos, j;
376 /* Punycode parameters and state. */
377 uint32_t c;
378 size_t base, t_min, t_max, skew, damp, bias, i;
379 size_t delta, w, k, t;
381 if (rdm->errored || rdm->skipping_printing)
382 return;
384 if (rdm->version == -1)
386 /* Ignore leading underscores preceding escape sequences.
387 The mangler inserts an underscore to make sure the
388 identifier begins with a XID_Start character. */
389 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
390 && ident.ascii[1] == '$')
392 ident.ascii++;
393 ident.ascii_len--;
396 while (ident.ascii_len > 0)
398 /* Handle legacy escape sequences ("$...$", ".." or "."). */
399 if (ident.ascii[0] == '$')
401 unescaped
402 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
403 if (unescaped)
404 print_str (rdm, &unescaped, 1);
405 else
407 /* Unexpected escape sequence, print the rest verbatim. */
408 print_str (rdm, ident.ascii, ident.ascii_len);
409 return;
412 else if (ident.ascii[0] == '.')
414 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
416 /* ".." becomes "::" */
417 PRINT ("::");
418 len = 2;
420 else
422 PRINT (".");
423 len = 1;
426 else
428 /* Print everything before the next escape sequence, at once. */
429 for (len = 0; len < ident.ascii_len; len++)
430 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
431 break;
433 print_str (rdm, ident.ascii, len);
436 ident.ascii += len;
437 ident.ascii_len -= len;
440 return;
443 if (!ident.punycode)
445 print_str (rdm, ident.ascii, ident.ascii_len);
446 return;
449 len = 0;
450 cap = 4;
451 while (cap < ident.ascii_len)
453 cap *= 2;
454 /* Check for overflows. */
455 if ((cap * 4) / 4 != cap)
457 rdm->errored = 1;
458 return;
462 /* Store the output codepoints as groups of 4 UTF-8 bytes. */
463 out = (uint8_t *)malloc (cap * 4);
464 if (!out)
466 rdm->errored = 1;
467 return;
470 /* Populate initial output from ASCII fragment. */
471 for (len = 0; len < ident.ascii_len; len++)
473 p = out + 4 * len;
474 p[0] = 0;
475 p[1] = 0;
476 p[2] = 0;
477 p[3] = ident.ascii[len];
480 /* Punycode parameters and initial state. */
481 base = 36;
482 t_min = 1;
483 t_max = 26;
484 skew = 38;
485 damp = 700;
486 bias = 72;
487 i = 0;
488 c = 0x80;
490 punycode_pos = 0;
491 while (punycode_pos < ident.punycode_len)
493 /* Read one delta value. */
494 delta = 0;
495 w = 1;
496 k = 0;
499 k += base;
500 t = k < bias ? 0 : (k - bias);
501 if (t < t_min)
502 t = t_min;
503 if (t > t_max)
504 t = t_max;
506 if (punycode_pos >= ident.punycode_len)
507 goto cleanup;
508 d = ident.punycode[punycode_pos++];
510 if (ISLOWER (d))
511 d = d - 'a';
512 else if (ISDIGIT (d))
513 d = 26 + (d - '0');
514 else
516 rdm->errored = 1;
517 goto cleanup;
520 delta += d * w;
521 w *= base - t;
523 while (d >= t);
525 /* Compute the new insert position and character. */
526 len++;
527 i += delta;
528 c += i / len;
529 i %= len;
531 /* Ensure enough space is available. */
532 if (cap < len)
534 cap *= 2;
535 /* Check for overflows. */
536 if ((cap * 4) / 4 != cap || cap < len)
538 rdm->errored = 1;
539 goto cleanup;
542 p = (uint8_t *)realloc (out, cap * 4);
543 if (!p)
545 rdm->errored = 1;
546 goto cleanup;
548 out = p;
550 /* Move the characters after the insert position. */
551 p = out + i * 4;
552 memmove (p + 4, p, (len - i - 1) * 4);
554 /* Insert the new character, as UTF-8 bytes. */
555 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
556 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
557 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
558 p[3] = 0x80 | (c & 0x3f);
560 /* If there are no more deltas, decoding is complete. */
561 if (punycode_pos == ident.punycode_len)
562 break;
564 i++;
566 /* Perform bias adaptation. */
567 delta /= damp;
568 damp = 2;
570 delta += delta / len;
571 k = 0;
572 while (delta > ((base - t_min) * t_max) / 2)
574 delta /= base - t_min;
575 k += base;
577 bias = k + ((base - t_min + 1) * delta) / (delta + skew);
580 /* Remove all the 0 bytes to leave behind an UTF-8 string. */
581 for (i = 0, j = 0; i < len * 4; i++)
582 if (out[i] != 0)
583 out[j++] = out[i];
585 print_str (rdm, (const char *)out, j);
587 cleanup:
588 free (out);
591 /* Print the lifetime according to the previously decoded index.
592 An index of `0` always refers to `'_`, but starting with `1`,
593 indices refer to late-bound lifetimes introduced by a binder. */
594 static void
595 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
597 char c;
598 uint64_t depth;
600 PRINT ("'");
601 if (lt == 0)
603 PRINT ("_");
604 return;
607 depth = rdm->bound_lifetime_depth - lt;
608 /* Try to print lifetimes alphabetically first. */
609 if (depth < 26)
611 c = 'a' + depth;
612 print_str (rdm, &c, 1);
614 else
616 /* Use `'_123` after running out of letters. */
617 PRINT ("_");
618 print_uint64 (rdm, depth);
622 /* Demangling functions. */
624 static void demangle_binder (struct rust_demangler *rdm);
625 static void demangle_path (struct rust_demangler *rdm, int in_value);
626 static void demangle_generic_arg (struct rust_demangler *rdm);
627 static void demangle_type (struct rust_demangler *rdm);
628 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
629 static void demangle_dyn_trait (struct rust_demangler *rdm);
630 static void demangle_const (struct rust_demangler *rdm);
631 static void demangle_const_uint (struct rust_demangler *rdm);
632 static void demangle_const_int (struct rust_demangler *rdm);
633 static void demangle_const_bool (struct rust_demangler *rdm);
634 static void demangle_const_char (struct rust_demangler *rdm);
636 /* Optionally enter a binder ('G') for late-bound lifetimes,
637 printing e.g. `for<'a, 'b> `, and make those lifetimes visible
638 to the caller (via depth level, which the caller should reset). */
639 static void
640 demangle_binder (struct rust_demangler *rdm)
642 uint64_t i, bound_lifetimes;
644 if (rdm->errored)
645 return;
647 bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
648 if (bound_lifetimes > 0)
650 PRINT ("for<");
651 for (i = 0; i < bound_lifetimes; i++)
653 if (i > 0)
654 PRINT (", ");
655 rdm->bound_lifetime_depth++;
656 print_lifetime_from_index (rdm, 1);
658 PRINT ("> ");
662 static void
663 demangle_path (struct rust_demangler *rdm, int in_value)
665 char tag, ns;
666 int was_skipping_printing;
667 size_t i, backref, old_next;
668 uint64_t dis;
669 struct rust_mangled_ident name;
671 if (rdm->errored)
672 return;
674 switch (tag = next (rdm))
676 case 'C':
677 dis = parse_disambiguator (rdm);
678 name = parse_ident (rdm);
680 print_ident (rdm, name);
681 if (rdm->verbose)
683 PRINT ("[");
684 print_uint64_hex (rdm, dis);
685 PRINT ("]");
687 break;
688 case 'N':
689 ns = next (rdm);
690 if (!ISLOWER (ns) && !ISUPPER (ns))
692 rdm->errored = 1;
693 return;
696 demangle_path (rdm, in_value);
698 dis = parse_disambiguator (rdm);
699 name = parse_ident (rdm);
701 if (ISUPPER (ns))
703 /* Special namespaces, like closures and shims. */
704 PRINT ("::{");
705 switch (ns)
707 case 'C':
708 PRINT ("closure");
709 break;
710 case 'S':
711 PRINT ("shim");
712 break;
713 default:
714 print_str (rdm, &ns, 1);
716 if (name.ascii || name.punycode)
718 PRINT (":");
719 print_ident (rdm, name);
721 PRINT ("#");
722 print_uint64 (rdm, dis);
723 PRINT ("}");
725 else
727 /* Implementation-specific/unspecified namespaces. */
729 if (name.ascii || name.punycode)
731 PRINT ("::");
732 print_ident (rdm, name);
735 break;
736 case 'M':
737 case 'X':
738 /* Ignore the `impl`'s own path.*/
739 parse_disambiguator (rdm);
740 was_skipping_printing = rdm->skipping_printing;
741 rdm->skipping_printing = 1;
742 demangle_path (rdm, in_value);
743 rdm->skipping_printing = was_skipping_printing;
744 /* fallthrough */
745 case 'Y':
746 PRINT ("<");
747 demangle_type (rdm);
748 if (tag != 'M')
750 PRINT (" as ");
751 demangle_path (rdm, 0);
753 PRINT (">");
754 break;
755 case 'I':
756 demangle_path (rdm, in_value);
757 if (in_value)
758 PRINT ("::");
759 PRINT ("<");
760 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
762 if (i > 0)
763 PRINT (", ");
764 demangle_generic_arg (rdm);
766 PRINT (">");
767 break;
768 case 'B':
769 backref = parse_integer_62 (rdm);
770 if (!rdm->skipping_printing)
772 old_next = rdm->next;
773 rdm->next = backref;
774 demangle_path (rdm, in_value);
775 rdm->next = old_next;
777 break;
778 default:
779 rdm->errored = 1;
780 return;
784 static void
785 demangle_generic_arg (struct rust_demangler *rdm)
787 uint64_t lt;
788 if (eat (rdm, 'L'))
790 lt = parse_integer_62 (rdm);
791 print_lifetime_from_index (rdm, lt);
793 else if (eat (rdm, 'K'))
794 demangle_const (rdm);
795 else
796 demangle_type (rdm);
799 static const char *
800 basic_type (char tag)
802 switch (tag)
804 case 'b':
805 return "bool";
806 case 'c':
807 return "char";
808 case 'e':
809 return "str";
810 case 'u':
811 return "()";
812 case 'a':
813 return "i8";
814 case 's':
815 return "i16";
816 case 'l':
817 return "i32";
818 case 'x':
819 return "i64";
820 case 'n':
821 return "i128";
822 case 'i':
823 return "isize";
824 case 'h':
825 return "u8";
826 case 't':
827 return "u16";
828 case 'm':
829 return "u32";
830 case 'y':
831 return "u64";
832 case 'o':
833 return "u128";
834 case 'j':
835 return "usize";
836 case 'f':
837 return "f32";
838 case 'd':
839 return "f64";
840 case 'z':
841 return "!";
842 case 'p':
843 return "_";
844 case 'v':
845 return "...";
847 default:
848 return NULL;
852 static void
853 demangle_type (struct rust_demangler *rdm)
855 char tag;
856 size_t i, old_next, backref;
857 uint64_t lt, old_bound_lifetime_depth;
858 const char *basic;
859 struct rust_mangled_ident abi;
861 if (rdm->errored)
862 return;
864 tag = next (rdm);
866 basic = basic_type (tag);
867 if (basic)
869 PRINT (basic);
870 return;
873 switch (tag)
875 case 'R':
876 case 'Q':
877 PRINT ("&");
878 if (eat (rdm, 'L'))
880 lt = parse_integer_62 (rdm);
881 if (lt)
883 print_lifetime_from_index (rdm, lt);
884 PRINT (" ");
887 if (tag != 'R')
888 PRINT ("mut ");
889 demangle_type (rdm);
890 break;
891 case 'P':
892 case 'O':
893 PRINT ("*");
894 if (tag != 'P')
895 PRINT ("mut ");
896 else
897 PRINT ("const ");
898 demangle_type (rdm);
899 break;
900 case 'A':
901 case 'S':
902 PRINT ("[");
903 demangle_type (rdm);
904 if (tag == 'A')
906 PRINT ("; ");
907 demangle_const (rdm);
909 PRINT ("]");
910 break;
911 case 'T':
912 PRINT ("(");
913 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
915 if (i > 0)
916 PRINT (", ");
917 demangle_type (rdm);
919 if (i == 1)
920 PRINT (",");
921 PRINT (")");
922 break;
923 case 'F':
924 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
925 demangle_binder (rdm);
927 if (eat (rdm, 'U'))
928 PRINT ("unsafe ");
930 if (eat (rdm, 'K'))
932 if (eat (rdm, 'C'))
934 abi.ascii = "C";
935 abi.ascii_len = 1;
937 else
939 abi = parse_ident (rdm);
940 if (!abi.ascii || abi.punycode)
942 rdm->errored = 1;
943 goto restore;
947 PRINT ("extern \"");
949 /* If the ABI had any `-`, they were replaced with `_`,
950 so the parts between `_` have to be re-joined with `-`. */
951 for (i = 0; i < abi.ascii_len; i++)
953 if (abi.ascii[i] == '_')
955 print_str (rdm, abi.ascii, i);
956 PRINT ("-");
957 abi.ascii += i + 1;
958 abi.ascii_len -= i + 1;
959 i = 0;
962 print_str (rdm, abi.ascii, abi.ascii_len);
964 PRINT ("\" ");
967 PRINT ("fn(");
968 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
970 if (i > 0)
971 PRINT (", ");
972 demangle_type (rdm);
974 PRINT (")");
976 if (eat (rdm, 'u'))
978 /* Skip printing the return type if it's 'u', i.e. `()`. */
980 else
982 PRINT (" -> ");
983 demangle_type (rdm);
986 /* Restore `bound_lifetime_depth` to outside the binder. */
987 restore:
988 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
989 break;
990 case 'D':
991 PRINT ("dyn ");
993 old_bound_lifetime_depth = rdm->bound_lifetime_depth;
994 demangle_binder (rdm);
996 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
998 if (i > 0)
999 PRINT (" + ");
1000 demangle_dyn_trait (rdm);
1003 /* Restore `bound_lifetime_depth` to outside the binder. */
1004 rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1006 if (!eat (rdm, 'L'))
1008 rdm->errored = 1;
1009 return;
1011 lt = parse_integer_62 (rdm);
1012 if (lt)
1014 PRINT (" + ");
1015 print_lifetime_from_index (rdm, lt);
1017 break;
1018 case 'B':
1019 backref = parse_integer_62 (rdm);
1020 if (!rdm->skipping_printing)
1022 old_next = rdm->next;
1023 rdm->next = backref;
1024 demangle_type (rdm);
1025 rdm->next = old_next;
1027 break;
1028 default:
1029 /* Go back to the tag, so `demangle_path` also sees it. */
1030 rdm->next--;
1031 demangle_path (rdm, 0);
1035 /* A trait in a trait object may have some "existential projections"
1036 (i.e. associated type bindings) after it, which should be printed
1037 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1038 To this end, this method will keep the `<...>` of an 'I' path
1039 open, by omitting the `>`, and return `Ok(true)` in that case. */
1040 static int
1041 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1043 int open;
1044 size_t i, old_next, backref;
1046 open = 0;
1048 if (rdm->errored)
1049 return open;
1051 if (eat (rdm, 'B'))
1053 backref = parse_integer_62 (rdm);
1054 if (!rdm->skipping_printing)
1056 old_next = rdm->next;
1057 rdm->next = backref;
1058 open = demangle_path_maybe_open_generics (rdm);
1059 rdm->next = old_next;
1062 else if (eat (rdm, 'I'))
1064 demangle_path (rdm, 0);
1065 PRINT ("<");
1066 open = 1;
1067 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1069 if (i > 0)
1070 PRINT (", ");
1071 demangle_generic_arg (rdm);
1074 else
1075 demangle_path (rdm, 0);
1076 return open;
1079 static void
1080 demangle_dyn_trait (struct rust_demangler *rdm)
1082 int open;
1083 struct rust_mangled_ident name;
1085 if (rdm->errored)
1086 return;
1088 open = demangle_path_maybe_open_generics (rdm);
1090 while (eat (rdm, 'p'))
1092 if (!open)
1093 PRINT ("<");
1094 else
1095 PRINT (", ");
1096 open = 1;
1098 name = parse_ident (rdm);
1099 print_ident (rdm, name);
1100 PRINT (" = ");
1101 demangle_type (rdm);
1104 if (open)
1105 PRINT (">");
1108 static void
1109 demangle_const (struct rust_demangler *rdm)
1111 char ty_tag;
1112 size_t old_next, backref;
1114 if (rdm->errored)
1115 return;
1117 if (eat (rdm, 'B'))
1119 backref = parse_integer_62 (rdm);
1120 if (!rdm->skipping_printing)
1122 old_next = rdm->next;
1123 rdm->next = backref;
1124 demangle_const (rdm);
1125 rdm->next = old_next;
1127 return;
1130 ty_tag = next (rdm);
1131 switch (ty_tag)
1133 /* Placeholder. */
1134 case 'p':
1135 PRINT ("_");
1136 return;
1138 /* Unsigned integer types. */
1139 case 'h':
1140 case 't':
1141 case 'm':
1142 case 'y':
1143 case 'o':
1144 case 'j':
1145 demangle_const_uint (rdm);
1146 break;
1148 /* Signed integer types. */
1149 case 'a':
1150 case 's':
1151 case 'l':
1152 case 'x':
1153 case 'n':
1154 case 'i':
1155 demangle_const_int (rdm);
1156 break;
1158 /* Boolean. */
1159 case 'b':
1160 demangle_const_bool (rdm);
1161 break;
1163 /* Character. */
1164 case 'c':
1165 demangle_const_char (rdm);
1166 break;
1168 default:
1169 rdm->errored = 1;
1170 return;
1173 if (rdm->errored)
1174 return;
1176 if (rdm->verbose)
1178 PRINT (": ");
1179 PRINT (basic_type (ty_tag));
1183 static void
1184 demangle_const_uint (struct rust_demangler *rdm)
1186 size_t hex_len;
1187 uint64_t value;
1189 if (rdm->errored)
1190 return;
1192 hex_len = parse_hex_nibbles (rdm, &value);
1194 if (hex_len > 16)
1196 /* Print anything that doesn't fit in `uint64_t` verbatim. */
1197 PRINT ("0x");
1198 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1200 else if (hex_len > 0)
1201 print_uint64 (rdm, value);
1202 else
1203 rdm->errored = 1;
1206 static void
1207 demangle_const_int (struct rust_demangler *rdm)
1209 if (eat (rdm, 'n'))
1210 PRINT ("-");
1211 demangle_const_uint (rdm);
1214 static void
1215 demangle_const_bool (struct rust_demangler *rdm)
1217 uint64_t value;
1219 if (parse_hex_nibbles (rdm, &value) != 1)
1221 rdm->errored = 1;
1222 return;
1225 if (value == 0)
1226 PRINT ("false");
1227 else if (value == 1)
1228 PRINT ("true");
1229 else
1230 rdm->errored = 1;
1233 static void
1234 demangle_const_char (struct rust_demangler *rdm)
1236 size_t hex_len;
1237 uint64_t value;
1239 hex_len = parse_hex_nibbles (rdm, &value);
1241 if (hex_len == 0 || hex_len > 8)
1243 rdm->errored = 1;
1244 return;
1247 /* Match Rust's character "debug" output as best as we can. */
1248 PRINT ("'");
1249 if (value == '\t')
1250 PRINT ("\\t");
1251 else if (value == '\r')
1252 PRINT ("\\r");
1253 else if (value == '\n')
1254 PRINT ("\\n");
1255 else if (value > ' ' && value < '~')
1257 /* Rust also considers many non-ASCII codepoints to be printable, but
1258 that logic is not easily ported to C. */
1259 char c = value;
1260 print_str (rdm, &c, 1);
1262 else
1264 PRINT ("\\u{");
1265 print_uint64_hex (rdm, value);
1266 PRINT ("}");
1268 PRINT ("'");
1271 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1272 The hex digits must contain at least 5 distinct digits. */
1273 static int
1274 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1276 uint16_t seen;
1277 int nibble;
1278 size_t i, count;
1280 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1281 return 0;
1283 seen = 0;
1284 for (i = 0; i < 16; i++)
1286 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1287 if (nibble < 0)
1288 return 0;
1289 seen |= (uint16_t)1 << nibble;
1292 /* Count how many distinct digits were seen. */
1293 count = 0;
1294 while (seen)
1296 if (seen & 1)
1297 count++;
1298 seen >>= 1;
1301 return count >= 5;
1305 rust_demangle_callback (const char *mangled, int options,
1306 demangle_callbackref callback, void *opaque)
1308 const char *p;
1309 struct rust_demangler rdm;
1310 struct rust_mangled_ident ident;
1312 rdm.sym = mangled;
1313 rdm.sym_len = 0;
1315 rdm.callback_opaque = opaque;
1316 rdm.callback = callback;
1318 rdm.next = 0;
1319 rdm.errored = 0;
1320 rdm.skipping_printing = 0;
1321 rdm.verbose = (options & DMGL_VERBOSE) != 0;
1322 rdm.version = 0;
1323 rdm.bound_lifetime_depth = 0;
1325 /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1326 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1327 rdm.sym += 2;
1328 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1330 rdm.sym += 3;
1331 rdm.version = -1;
1333 else
1334 return 0;
1336 /* Paths (v0) always start with uppercase characters. */
1337 if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1338 return 0;
1340 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1341 for (p = rdm.sym; *p; p++)
1343 rdm.sym_len++;
1345 if (*p == '_' || ISALNUM (*p))
1346 continue;
1348 /* Legacy Rust symbols can also contain [.:$] characters. */
1349 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
1350 continue;
1352 return 0;
1355 /* Legacy Rust symbols need to be handled separately. */
1356 if (rdm.version == -1)
1358 /* Legacy Rust symbols always end with E. */
1359 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1360 return 0;
1361 rdm.sym_len--;
1363 /* Legacy Rust symbols also always end with a path segment
1364 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1365 This early check, before any parse_ident calls, should
1366 quickly filter out most C++ symbols unrelated to Rust. */
1367 if (!(rdm.sym_len > 19
1368 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1369 return 0;
1373 ident = parse_ident (&rdm);
1374 if (rdm.errored || !ident.ascii)
1375 return 0;
1377 while (rdm.next < rdm.sym_len);
1379 /* The last path segment should be the hash. */
1380 if (!is_legacy_prefixed_hash (ident))
1381 return 0;
1383 /* Reset the state for a second pass, to print the symbol. */
1384 rdm.next = 0;
1385 if (!rdm.verbose && rdm.sym_len > 19)
1387 /* Hide the last segment, containing the hash, if not verbose. */
1388 rdm.sym_len -= 19;
1393 if (rdm.next > 0)
1394 print_str (&rdm, "::", 2);
1396 ident = parse_ident (&rdm);
1397 print_ident (&rdm, ident);
1399 while (rdm.next < rdm.sym_len);
1401 else
1403 demangle_path (&rdm, 1);
1405 /* Skip instantiating crate. */
1406 if (!rdm.errored && rdm.next < rdm.sym_len)
1408 rdm.skipping_printing = 1;
1409 demangle_path (&rdm, 0);
1412 /* It's an error to not reach the end. */
1413 rdm.errored |= rdm.next != rdm.sym_len;
1416 return !rdm.errored;
1419 /* Growable string buffers. */
1420 struct str_buf
1422 char *ptr;
1423 size_t len;
1424 size_t cap;
1425 int errored;
1428 static void
1429 str_buf_reserve (struct str_buf *buf, size_t extra)
1431 size_t available, min_new_cap, new_cap;
1432 char *new_ptr;
1434 /* Allocation failed before. */
1435 if (buf->errored)
1436 return;
1438 available = buf->cap - buf->len;
1440 if (extra <= available)
1441 return;
1443 min_new_cap = buf->cap + (extra - available);
1445 /* Check for overflows. */
1446 if (min_new_cap < buf->cap)
1448 buf->errored = 1;
1449 return;
1452 new_cap = buf->cap;
1454 if (new_cap == 0)
1455 new_cap = 4;
1457 /* Double capacity until sufficiently large. */
1458 while (new_cap < min_new_cap)
1460 new_cap *= 2;
1462 /* Check for overflows. */
1463 if (new_cap < buf->cap)
1465 buf->errored = 1;
1466 return;
1470 new_ptr = (char *)realloc (buf->ptr, new_cap);
1471 if (new_ptr == NULL)
1473 free (buf->ptr);
1474 buf->ptr = NULL;
1475 buf->len = 0;
1476 buf->cap = 0;
1477 buf->errored = 1;
1479 else
1481 buf->ptr = new_ptr;
1482 buf->cap = new_cap;
1486 static void
1487 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1489 str_buf_reserve (buf, len);
1490 if (buf->errored)
1491 return;
1493 memcpy (buf->ptr + buf->len, data, len);
1494 buf->len += len;
1497 static void
1498 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1500 str_buf_append ((struct str_buf *)opaque, data, len);
1503 char *
1504 rust_demangle (const char *mangled, int options)
1506 struct str_buf out;
1507 int success;
1509 out.ptr = NULL;
1510 out.len = 0;
1511 out.cap = 0;
1512 out.errored = 0;
1514 success = rust_demangle_callback (mangled, options,
1515 str_buf_demangle_callback, &out);
1517 if (!success)
1519 free (out.ptr);
1520 return NULL;
1523 str_buf_append (&out, "\0", 1);
1524 return out.ptr;