1 // go-encode-id.cc -- Go identifier encoding hooks
3 // Copyright 2016 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
10 #include "go-location.h"
11 #include "go-linemap.h"
12 #include "go-encode-id.h"
15 // Return whether the character c is OK to use in the assembler. We
16 // only permit ASCII alphanumeric characters, underscore, and dot.
19 char_needs_encoding(char c
)
23 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
24 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
25 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
26 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
28 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
29 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
30 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
31 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
33 case '0': case '1': case '2': case '3': case '4':
34 case '5': case '6': case '7': case '8': case '9':
42 // Return whether the identifier needs to be translated because it
43 // contains non-ASCII characters.
46 go_id_needs_encoding(const std::string
& str
)
48 for (std::string::const_iterator p
= str
.begin();
51 if (char_needs_encoding(*p
))
56 // Pull the next UTF-8 character out of P and store it in *PC. Return
57 // the number of bytes read.
60 fetch_utf8_char(const char* p
, unsigned int* pc
)
69 while ((c
& 0x80) != 0)
74 unsigned int rc
= *p
& ((1 << (7 - len
)) - 1);
75 for (size_t i
= 1; i
< len
; i
++)
77 unsigned int u
= p
[i
];
85 // Encode an identifier using ASCII characters. The encoding is
86 // described in detail near the end of the long comment at the start
87 // of names.cc. Short version: translate all non-ASCII-alphanumeric
88 // characters into ..uXXXX or ..UXXXXXXXX.
91 go_encode_id(const std::string
&id
)
93 if (Lex::is_invalid_identifier(id
))
95 go_assert(saw_errors());
99 // The encoding is only unambiguous if the input string does not
100 // contain ..u or ..U.
101 go_assert(id
.find("..u") == std::string::npos
);
102 go_assert(id
.find("..U") == std::string::npos
);
105 const char* p
= id
.c_str();
106 const char* pend
= p
+ id
.length();
108 // A leading ".0" is a space introduced before a mangled type name
109 // that starts with a 'u' or 'U', to avoid confusion with the
110 // mangling used here. We don't need a leading ".0", and we don't
111 // want symbols that start with '.', so remove it.
112 if (p
[0] == '.' && p
[1] == '0')
118 size_t len
= fetch_utf8_char(p
, &c
);
121 // At this point we should only be seeing alphanumerics or
122 // underscore or dot.
123 go_assert(!char_needs_encoding(c
));
130 snprintf(buf
, sizeof buf
, "..u%04x", c
);
132 snprintf(buf
, sizeof buf
, "..U%08x", c
);
134 // We don't want a symbol to start with '.', so add a prefix
147 go_selectively_encode_id(const std::string
&id
)
149 if (go_id_needs_encoding(id
))
150 return go_encode_id(id
);
151 return std::string();
154 // Encode a struct field tag. This is only used when we need to
155 // create a type descriptor for an anonymous struct type with field
156 // tags. This mangling is applied before go_encode_id. We skip
157 // alphanumerics and underscore, replace every other single byte
158 // character with .xNN, and leave larger UTF-8 characters for
162 go_mangle_struct_tag(const std::string
& tag
)
165 const char* p
= tag
.c_str();
166 const char* pend
= p
+ tag
.length();
170 size_t len
= fetch_utf8_char(p
, &c
);
173 else if (!char_needs_encoding(c
) && c
!= '.')
178 snprintf(buf
, sizeof buf
, ".x%02x", c
);