1 /* Demangler for the Rust programming language
2 Copyright (C) 2016-2020 Free Software Foundation, Inc.
3 Written by David Tolnay (dtolnay@gmail.com).
5 This file is part of the libiberty library.
6 Libiberty is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 In addition to the permissions in the GNU Library General Public
12 License, the Free Software Foundation gives you unlimited permission
13 to link the compiled version of this file into combinations with other
14 programs, and to distribute those combinations without any restriction
15 coming from the use of this file. (The Library Public License
16 restrictions do apply in other respects; for example, they cover
17 modification of the file, and distribution when not linked into a
20 Libiberty is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Library General Public License for more details.
25 You should have received a copy of the GNU Library General Public
26 License along with libiberty; see the file COPYING.LIB.
27 If not, see <http://www.gnu.org/licenses/>. */
34 #include "safe-ctype.h"
37 #include <sys/types.h>
45 extern size_t strlen(const char *s
);
46 extern int strncmp(const char *s1
, const char *s2
, size_t n
);
47 extern void *memset(void *s
, int c
, size_t n
);
51 #include "libiberty.h"
58 void *callback_opaque
;
59 demangle_callbackref callback
;
61 /* Position of the next character to read from the symbol. */
64 /* Non-zero if any error occurred. */
67 /* Non-zero if printing should be verbose (e.g. include hashes). */
70 /* Rust mangling version, with legacy mangling being -1. */
74 /* Parsing functions. */
77 peek (const struct rust_demangler
*rdm
)
79 if (rdm
->next
< rdm
->sym_len
)
80 return rdm
->sym
[rdm
->next
];
85 next (struct rust_demangler
*rdm
)
95 struct rust_mangled_ident
97 /* ASCII part of the identifier. */
102 static struct rust_mangled_ident
103 parse_ident (struct rust_demangler
*rdm
)
107 struct rust_mangled_ident ident
;
121 while (ISDIGIT (peek (rdm
)))
122 len
= len
* 10 + (next (rdm
) - '0');
126 /* Check for overflows. */
127 if ((start
> rdm
->next
) || (rdm
->next
> rdm
->sym_len
))
133 ident
.ascii
= rdm
->sym
+ start
;
134 ident
.ascii_len
= len
;
136 if (ident
.ascii_len
== 0)
142 /* Printing functions. */
145 print_str (struct rust_demangler
*rdm
, const char *data
, size_t len
)
148 rdm
->callback (data
, len
, rdm
->callback_opaque
);
151 #define PRINT(s) print_str (rdm, s, strlen (s))
153 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
155 decode_lower_hex_nibble (char nibble
)
157 if ('0' <= nibble
&& nibble
<= '9')
159 if ('a' <= nibble
&& nibble
<= 'f')
160 return 0xa + (nibble
- 'a');
164 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
166 decode_legacy_escape (const char *e
, size_t len
, size_t *out_len
)
169 size_t escape_len
= 0;
170 int lo_nibble
= -1, hi_nibble
= -1;
172 if (len
< 3 || e
[0] != '$')
188 if (e
[0] == 'S' && e
[1] == 'P')
190 else if (e
[0] == 'B' && e
[1] == 'P')
192 else if (e
[0] == 'R' && e
[1] == 'F')
194 else if (e
[0] == 'L' && e
[1] == 'T')
196 else if (e
[0] == 'G' && e
[1] == 'T')
198 else if (e
[0] == 'L' && e
[1] == 'P')
200 else if (e
[0] == 'R' && e
[1] == 'P')
202 else if (e
[0] == 'u' && len
> 3)
206 hi_nibble
= decode_lower_hex_nibble (e
[1]);
209 lo_nibble
= decode_lower_hex_nibble (e
[2]);
213 /* Only allow non-control ASCII characters. */
216 c
= (hi_nibble
<< 4) | lo_nibble
;
222 if (!c
|| len
<= escape_len
|| e
[escape_len
] != '$')
225 *out_len
= 2 + escape_len
;
230 print_ident (struct rust_demangler
*rdm
, struct rust_mangled_ident ident
)
238 if (rdm
->version
== -1)
240 /* Ignore leading underscores preceding escape sequences.
241 The mangler inserts an underscore to make sure the
242 identifier begins with a XID_Start character. */
243 if (ident
.ascii_len
>= 2 && ident
.ascii
[0] == '_'
244 && ident
.ascii
[1] == '$')
250 while (ident
.ascii_len
> 0)
252 /* Handle legacy escape sequences ("$...$", ".." or "."). */
253 if (ident
.ascii
[0] == '$')
256 = decode_legacy_escape (ident
.ascii
, ident
.ascii_len
, &len
);
258 print_str (rdm
, &unescaped
, 1);
261 /* Unexpected escape sequence, print the rest verbatim. */
262 print_str (rdm
, ident
.ascii
, ident
.ascii_len
);
266 else if (ident
.ascii
[0] == '.')
268 if (ident
.ascii_len
>= 2 && ident
.ascii
[1] == '.')
270 /* ".." becomes "::" */
276 /* "." becomes "-" */
283 /* Print everything before the next escape sequence, at once. */
284 for (len
= 0; len
< ident
.ascii_len
; len
++)
285 if (ident
.ascii
[len
] == '$' || ident
.ascii
[len
] == '.')
288 print_str (rdm
, ident
.ascii
, len
);
292 ident
.ascii_len
-= len
;
299 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
300 The hex digits must contain at least 5 distinct digits. */
302 is_legacy_prefixed_hash (struct rust_mangled_ident ident
)
308 if (ident
.ascii_len
!= 17 || ident
.ascii
[0] != 'h')
312 for (i
= 0; i
< 16; i
++)
314 nibble
= decode_lower_hex_nibble (ident
.ascii
[1 + i
]);
317 seen
|= (uint16_t)1 << nibble
;
320 /* Count how many distinct digits were seen. */
333 rust_demangle_callback (const char *mangled
, int options
,
334 demangle_callbackref callback
, void *opaque
)
337 struct rust_demangler rdm
;
338 struct rust_mangled_ident ident
;
343 rdm
.callback_opaque
= opaque
;
344 rdm
.callback
= callback
;
348 rdm
.verbose
= (options
& DMGL_VERBOSE
) != 0;
351 /* Rust symbols always start with _ZN (legacy). */
352 if (rdm
.sym
[0] == '_' && rdm
.sym
[1] == 'Z' && rdm
.sym
[2] == 'N')
360 /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
361 for (p
= rdm
.sym
; *p
; p
++)
365 if (*p
== '_' || ISALNUM (*p
))
368 if (rdm
.version
== -1 && (*p
== '$' || *p
== '.' || *p
== ':'))
374 /* Legacy Rust symbols need to be handled separately. */
375 if (rdm
.version
== -1)
377 /* Legacy Rust symbols always end with E. */
378 if (!(rdm
.sym_len
> 0 && rdm
.sym
[rdm
.sym_len
- 1] == 'E'))
382 /* Legacy Rust symbols also always end with a path segment
383 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
384 This early check, before any parse_ident calls, should
385 quickly filter out most C++ symbols unrelated to Rust. */
386 if (!(rdm
.sym_len
> 19
387 && !memcmp (&rdm
.sym
[rdm
.sym_len
- 19], "17h", 3)))
392 ident
= parse_ident (&rdm
);
393 if (rdm
.errored
|| !ident
.ascii
)
396 while (rdm
.next
< rdm
.sym_len
);
398 /* The last path segment should be the hash. */
399 if (!is_legacy_prefixed_hash (ident
))
402 /* Reset the state for a second pass, to print the symbol. */
404 if (!rdm
.verbose
&& rdm
.sym_len
> 19)
406 /* Hide the last segment, containing the hash, if not verbose. */
413 print_str (&rdm
, "::", 2);
415 ident
= parse_ident (&rdm
);
416 print_ident (&rdm
, ident
);
418 while (rdm
.next
< rdm
.sym_len
);
426 /* Growable string buffers. */
436 str_buf_reserve (struct str_buf
*buf
, size_t extra
)
438 size_t available
, min_new_cap
, new_cap
;
441 /* Allocation failed before. */
445 available
= buf
->cap
- buf
->len
;
447 if (extra
<= available
)
450 min_new_cap
= buf
->cap
+ (extra
- available
);
452 /* Check for overflows. */
453 if (min_new_cap
< buf
->cap
)
464 /* Double capacity until sufficiently large. */
465 while (new_cap
< min_new_cap
)
469 /* Check for overflows. */
470 if (new_cap
< buf
->cap
)
477 new_ptr
= (char *)realloc (buf
->ptr
, new_cap
);
494 str_buf_append (struct str_buf
*buf
, const char *data
, size_t len
)
496 str_buf_reserve (buf
, len
);
500 memcpy (buf
->ptr
+ buf
->len
, data
, len
);
505 str_buf_demangle_callback (const char *data
, size_t len
, void *opaque
)
507 str_buf_append ((struct str_buf
*)opaque
, data
, len
);
511 rust_demangle (const char *mangled
, int options
)
521 success
= rust_demangle_callback (mangled
, options
,
522 str_buf_demangle_callback
, &out
);
530 str_buf_append (&out
, "\0", 1);