c++: improve verify_constant diagnostic [PR91483]
[official-gcc.git] / gcc / go / gofrontend / go-encode-id.cc
blob7ab65f513b394642c7448f6e21ff91719b1474fc
1 // go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks
3 // Copyright 2016 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 #include "go-system.h"
9 #include "gogo.h"
10 #include "go-location.h"
11 #include "go-linemap.h"
12 #include "go-encode-id.h"
13 #include "lex.h"
15 // Return whether the character c can appear in a name that we are
16 // encoding. We only permit ASCII alphanumeric characters.
18 static bool
19 char_needs_encoding(char c)
21 switch (c)
23 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
24 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
25 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
26 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
27 case 'Y': case 'Z':
28 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
29 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
30 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
31 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
32 case 'y': case 'z':
33 case '0': case '1': case '2': case '3': case '4':
34 case '5': case '6': case '7': case '8': case '9':
35 return false;
36 default:
37 return true;
41 // Return whether the identifier needs to be translated because it
42 // contains non-ASCII characters.
44 bool
45 go_id_needs_encoding(const std::string& str)
47 for (std::string::const_iterator p = str.begin();
48 p != str.end();
49 ++p)
50 if (char_needs_encoding(*p))
51 return true;
52 return false;
55 // Map from characters to the underscore encoding for them.
57 class Special_char_code
59 public:
60 Special_char_code();
62 // Return the simple underscore encoding for C, or 0 if none.
63 char
64 code_for(unsigned int c) const
66 if (c <= 127)
67 return this->codes_[c];
68 return 0;
71 private:
72 // Encodings for characters.
73 char codes_[128];
76 // Construct the underscore encoding map.
78 Special_char_code::Special_char_code()
80 memset(this->codes_, 0, sizeof this->codes_);
81 this->codes_['_'] = '_';
82 this->codes_['.'] = '0';
83 this->codes_['/'] = '1';
84 this->codes_['*'] = '2';
85 this->codes_[','] = '3';
86 this->codes_['{'] = '4';
87 this->codes_['}'] = '5';
88 this->codes_['['] = '6';
89 this->codes_[']'] = '7';
90 this->codes_['('] = '8';
91 this->codes_[')'] = '9';
92 this->codes_['"'] = 'a';
93 this->codes_[' '] = 'b';
94 this->codes_[';'] = 'c';
97 // The singleton Special_char_code.
99 static const Special_char_code special_char_code;
101 // Pull the next UTF-8 character out of P and store it in *PC. Return
102 // the number of bytes read.
104 static size_t
105 fetch_utf8_char(const char* p, unsigned int* pc)
107 unsigned char c = *p;
108 if ((c & 0x80) == 0)
110 *pc = c;
111 return 1;
113 size_t len = 0;
114 while ((c & 0x80) != 0)
116 ++len;
117 c <<= 1;
119 unsigned int rc = *p & ((1 << (7 - len)) - 1);
120 for (size_t i = 1; i < len; i++)
122 unsigned int u = p[i];
123 rc <<= 6;
124 rc |= u & 0x3f;
126 *pc = rc;
127 return len;
130 // Encode an identifier using assembler-friendly characters. The
131 // encoding is described in detail near the end of the long comment at
132 // the start of names.cc.
134 std::string
135 go_encode_id(const std::string &id)
137 if (Lex::is_invalid_identifier(id))
139 go_assert(saw_errors());
140 return id;
143 std::string ret;
144 const char* p = id.c_str();
145 const char* pend = p + id.length();
147 // We encode a leading digit, to ensure that no identifier starts
148 // with a digit.
149 if (pend > p && p[0] >= '0' && p[0] <= '9')
151 char buf[8];
152 snprintf(buf, sizeof buf, "_x%02x", p[0]);
153 ret.append(buf);
154 ++p;
157 while (p < pend)
159 unsigned int c;
160 size_t len = fetch_utf8_char(p, &c);
161 if (len == 1)
163 if (!char_needs_encoding(c))
164 ret.push_back(c);
165 else
167 char code = special_char_code.code_for(c);
168 if (code != 0)
170 ret.push_back('_');
171 ret.push_back(code);
173 else
175 char buf[8];
176 snprintf(buf, sizeof buf, "_x%02x", c);
177 ret.append(buf);
181 else
183 char buf[16];
184 if (c < 0x10000)
185 snprintf(buf, sizeof buf, "_u%04x", c);
186 else
187 snprintf(buf, sizeof buf, "_U%08x", c);
188 ret.append(buf);
191 p += len;
194 return ret;
197 // Convert a hex digit string to a unicode codepoint. No checking
198 // to insure that the hex digit is meaningful.
200 static unsigned
201 hex_digits_to_unicode_codepoint(const char *digits, unsigned ndig)
203 unsigned result = 0;
204 for (unsigned i = 0; i < ndig; ++i) {
205 result <<= 4;
206 result |= Lex::hex_val(digits[i]);
208 return result;
211 // Decode/demangle a mangled string produced by go_encode_id(). Returns
212 // empty string if demangling process fails in some way. At the moment
213 // this routine is unused; there is an equivalent routine in the runtime
214 // used for demangling symbols appearing in stack traces.
216 std::string
217 go_decode_id(const std::string &encoded)
219 std::string ret;
220 const char* p = encoded.c_str();
221 const char* pend = p + encoded.length();
222 const Location loc = Linemap::predeclared_location();
224 while (p < pend)
226 if (*p != '_' || p + 1 == pend)
228 ret.push_back(*p);
229 p++;
230 continue;
233 switch (p[1])
235 case '_':
236 ret.push_back('_');
237 p += 2;
238 break;
239 case '0':
240 ret.push_back('.');
241 p += 2;
242 break;
243 case '1':
244 ret.push_back('/');
245 p += 2;
246 break;
247 case '2':
248 ret.push_back('*');
249 p += 2;
250 break;
251 case '3':
252 ret.push_back(',');
253 p += 2;
254 break;
255 case '4':
256 ret.push_back('{');
257 p += 2;
258 break;
259 case '5':
260 ret.push_back('}');
261 p += 2;
262 break;
263 case '6':
264 ret.push_back('[');
265 p += 2;
266 break;
267 case '7':
268 ret.push_back(']');
269 p += 2;
270 break;
271 case '8':
272 ret.push_back('(');
273 p += 2;
274 break;
275 case '9':
276 ret.push_back(')');
277 p += 2;
278 break;
279 case 'a':
280 ret.push_back('"');
281 p += 2;
282 break;
283 case 'b':
284 ret.push_back(' ');
285 p += 2;
286 break;
287 case 'c':
288 ret.push_back(';');
289 p += 2;
290 break;
291 case 'x':
293 const char* digits = p + 2;
294 if (strlen(digits) < 2)
295 return "";
296 unsigned int rune = hex_digits_to_unicode_codepoint(digits, 2);
297 Lex::append_char(rune, true, &ret, loc);
298 p += 4;
300 break;
301 case 'u':
303 const char* digits = p + 2;
304 if (strlen(digits) < 4)
305 return "";
306 unsigned int rune = hex_digits_to_unicode_codepoint(digits, 4);
307 Lex::append_char(rune, true, &ret, loc);
308 p += 6;
310 break;
311 case 'U':
313 const char* digits = p + 2;
314 if (strlen(digits) < 8)
315 return "";
316 unsigned int rune = hex_digits_to_unicode_codepoint(digits, 8);
317 Lex::append_char(rune, true, &ret, loc);
318 p += 10;
320 break;
321 default:
322 return "";
326 return ret;
329 // Encode a struct field tag. This is only used when we need to
330 // create a type descriptor for an anonymous struct type with field
331 // tags. Underscore encoding will be applied to the returned string.
332 // The tag will appear between curly braces, so that is all we have to
333 // avoid.
335 std::string
336 go_mangle_struct_tag(const std::string& tag)
338 std::string ret;
339 const char* p = tag.c_str();
340 const char* pend = p + tag.length();
341 while (p < pend)
343 unsigned int c;
344 size_t len = fetch_utf8_char(p, &c);
345 if (len > 1)
346 ret.append(p, len);
347 else if (c != '{' && c != '}' && c != '\\')
348 ret.push_back(c);
349 else
351 ret.push_back('\\');
352 ret.push_back(c);
354 p += len;
356 return ret;