1 // go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks
3 // Copyright 2016 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
10 #include "go-location.h"
11 #include "go-linemap.h"
12 #include "go-encode-id.h"
15 // Return whether the character c can appear in a name that we are
16 // encoding. We only permit ASCII alphanumeric characters.
19 char_needs_encoding(char c
)
23 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
24 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
25 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
26 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
28 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
29 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
30 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
31 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
33 case '0': case '1': case '2': case '3': case '4':
34 case '5': case '6': case '7': case '8': case '9':
41 // Return whether the identifier needs to be translated because it
42 // contains non-ASCII characters.
45 go_id_needs_encoding(const std::string
& str
)
47 for (std::string::const_iterator p
= str
.begin();
50 if (char_needs_encoding(*p
))
55 // Map from characters to the underscore encoding for them.
57 class Special_char_code
62 // Return the simple underscore encoding for C, or 0 if none.
64 code_for(unsigned int c
) const
67 return this->codes_
[c
];
72 // Encodings for characters.
76 // Construct the underscore encoding map.
78 Special_char_code::Special_char_code()
80 memset(this->codes_
, 0, sizeof this->codes_
);
81 this->codes_
['_'] = '_';
82 this->codes_
['.'] = '0';
83 this->codes_
['/'] = '1';
84 this->codes_
['*'] = '2';
85 this->codes_
[','] = '3';
86 this->codes_
['{'] = '4';
87 this->codes_
['}'] = '5';
88 this->codes_
['['] = '6';
89 this->codes_
[']'] = '7';
90 this->codes_
['('] = '8';
91 this->codes_
[')'] = '9';
92 this->codes_
['"'] = 'a';
93 this->codes_
[' '] = 'b';
94 this->codes_
[';'] = 'c';
97 // The singleton Special_char_code.
99 static const Special_char_code special_char_code
;
101 // Pull the next UTF-8 character out of P and store it in *PC. Return
102 // the number of bytes read.
105 fetch_utf8_char(const char* p
, unsigned int* pc
)
107 unsigned char c
= *p
;
114 while ((c
& 0x80) != 0)
119 unsigned int rc
= *p
& ((1 << (7 - len
)) - 1);
120 for (size_t i
= 1; i
< len
; i
++)
122 unsigned int u
= p
[i
];
130 // Encode an identifier using assembler-friendly characters. The
131 // encoding is described in detail near the end of the long comment at
132 // the start of names.cc.
135 go_encode_id(const std::string
&id
)
137 if (Lex::is_invalid_identifier(id
))
139 go_assert(saw_errors());
144 const char* p
= id
.c_str();
145 const char* pend
= p
+ id
.length();
147 // We encode a leading digit, to ensure that no identifier starts
149 if (pend
> p
&& p
[0] >= '0' && p
[0] <= '9')
152 snprintf(buf
, sizeof buf
, "_x%02x", p
[0]);
160 size_t len
= fetch_utf8_char(p
, &c
);
163 if (!char_needs_encoding(c
))
167 char code
= special_char_code
.code_for(c
);
176 snprintf(buf
, sizeof buf
, "_x%02x", c
);
185 snprintf(buf
, sizeof buf
, "_u%04x", c
);
187 snprintf(buf
, sizeof buf
, "_U%08x", c
);
197 // Convert a hex digit string to a unicode codepoint. No checking
198 // to insure that the hex digit is meaningful.
201 hex_digits_to_unicode_codepoint(const char *digits
, unsigned ndig
)
204 for (unsigned i
= 0; i
< ndig
; ++i
) {
206 result
|= Lex::hex_val(digits
[i
]);
211 // Decode/demangle a mangled string produced by go_encode_id(). Returns
212 // empty string if demangling process fails in some way. At the moment
213 // this routine is unused; there is an equivalent routine in the runtime
214 // used for demangling symbols appearing in stack traces.
217 go_decode_id(const std::string
&encoded
)
220 const char* p
= encoded
.c_str();
221 const char* pend
= p
+ encoded
.length();
222 const Location loc
= Linemap::predeclared_location();
226 if (*p
!= '_' || p
+ 1 == pend
)
293 const char* digits
= p
+ 2;
294 if (strlen(digits
) < 2)
296 unsigned int rune
= hex_digits_to_unicode_codepoint(digits
, 2);
297 Lex::append_char(rune
, true, &ret
, loc
);
303 const char* digits
= p
+ 2;
304 if (strlen(digits
) < 4)
306 unsigned int rune
= hex_digits_to_unicode_codepoint(digits
, 4);
307 Lex::append_char(rune
, true, &ret
, loc
);
313 const char* digits
= p
+ 2;
314 if (strlen(digits
) < 8)
316 unsigned int rune
= hex_digits_to_unicode_codepoint(digits
, 8);
317 Lex::append_char(rune
, true, &ret
, loc
);
329 // Encode a struct field tag. This is only used when we need to
330 // create a type descriptor for an anonymous struct type with field
331 // tags. Underscore encoding will be applied to the returned string.
332 // The tag will appear between curly braces, so that is all we have to
336 go_mangle_struct_tag(const std::string
& tag
)
339 const char* p
= tag
.c_str();
340 const char* pend
= p
+ tag
.length();
344 size_t len
= fetch_utf8_char(p
, &c
);
347 else if (c
!= '{' && c
!= '}' && c
!= '\\')