2015-05-22 Pascal Obry <obry@adacore.com>
[official-gcc.git] / gcc / java / mangle_name.c
blob53eb926b7aee12c28ad316e980cf93d613b03996
1 /* Shared functions related to mangling names for the GNU compiler
2 for the Java(TM) language.
3 Copyright (C) 2001-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Java and all Java-based marks are trademarks or registered trademarks
22 of Sun Microsystems, Inc. in the United States and other countries.
23 The Free Software Foundation is independent of Sun Microsystems, Inc. */
25 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "jcf.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "vec.h"
34 #include "double-int.h"
35 #include "input.h"
36 #include "alias.h"
37 #include "symtab.h"
38 #include "options.h"
39 #include "wide-int.h"
40 #include "inchash.h"
41 #include "tree.h"
42 #include "java-tree.h"
43 #include "obstack.h"
44 #include "diagnostic-core.h"
46 static void append_unicode_mangled_name (const char *, int);
47 #ifndef HAVE_AS_UTF8
48 static int unicode_mangling_length (const char *, int);
49 #endif
51 extern struct obstack *mangle_obstack;
53 static int
54 utf8_cmp (const unsigned char *str, int length, const char *name)
56 const unsigned char *limit = str + length;
57 int i;
59 for (i = 0; name[i]; ++i)
61 int ch = UTF8_GET (str, limit);
62 if (ch != name[i])
63 return ch - name[i];
66 return str == limit ? 0 : 1;
69 /* A sorted list of all C++ keywords. If you change this, be sure
70 also to change the list in
71 libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */
72 static const char *const cxx_keywords[] =
74 "_Complex",
75 "__alignof",
76 "__alignof__",
77 "__asm",
78 "__asm__",
79 "__attribute",
80 "__attribute__",
81 "__builtin_va_arg",
82 "__complex",
83 "__complex__",
84 "__const",
85 "__const__",
86 "__extension__",
87 "__imag",
88 "__imag__",
89 "__inline",
90 "__inline__",
91 "__label__",
92 "__null",
93 "__real",
94 "__real__",
95 "__restrict",
96 "__restrict__",
97 "__signed",
98 "__signed__",
99 "__typeof",
100 "__typeof__",
101 "__volatile",
102 "__volatile__",
103 "and",
104 "and_eq",
105 "asm",
106 "auto",
107 "bitand",
108 "bitor",
109 "bool",
110 "break",
111 "case",
112 "catch",
113 "char",
114 "class",
115 "compl",
116 "const",
117 "const_cast",
118 "continue",
119 "default",
120 "delete",
121 "do",
122 "double",
123 "dynamic_cast",
124 "else",
125 "enum",
126 "explicit",
127 "export",
128 "extern",
129 "false",
130 "float",
131 "for",
132 "friend",
133 "goto",
134 "if",
135 "inline",
136 "int",
137 "long",
138 "mutable",
139 "namespace",
140 "new",
141 "not",
142 "not_eq",
143 "operator",
144 "or",
145 "or_eq",
146 "private",
147 "protected",
148 "public",
149 "register",
150 "reinterpret_cast",
151 "return",
152 "short",
153 "signed",
154 "sizeof",
155 "static",
156 "static_cast",
157 "struct",
158 "switch",
159 "template",
160 "this",
161 "throw",
162 "true",
163 "try",
164 "typedef",
165 "typeid",
166 "typename",
167 "typeof",
168 "union",
169 "unsigned",
170 "using",
171 "virtual",
172 "void",
173 "volatile",
174 "wchar_t",
175 "while",
176 "xor",
177 "xor_eq"
180 /* Return true if NAME is a C++ keyword. */
182 cxx_keyword_p (const char *name, int length)
184 int last = ARRAY_SIZE (cxx_keywords);
185 int first = 0;
186 int mid = (last + first) / 2;
187 int old = -1;
189 for (mid = (last + first) / 2;
190 mid != old;
191 old = mid, mid = (last + first) / 2)
193 int kwl = strlen (cxx_keywords[mid]);
194 int min_length = kwl > length ? length : kwl;
195 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
197 if (r == 0)
199 int i;
200 /* We've found a match if all the remaining characters are `$'. */
201 for (i = min_length; i < length && name[i] == '$'; ++i)
203 if (i == length)
204 return 1;
205 r = 1;
208 if (r < 0)
209 last = mid;
210 else
211 first = mid;
213 return 0;
216 /* If NAME happens to be a C++ keyword, add `$'. */
217 #define MANGLE_CXX_KEYWORDS(NAME, LEN) \
218 do \
220 if (cxx_keyword_p ((NAME), (LEN))) \
222 char *tmp_buf = (char *)alloca ((LEN)+1); \
223 memcpy (tmp_buf, (NAME), (LEN)); \
224 tmp_buf[LEN]= '$'; \
225 (NAME) = tmp_buf; \
226 (LEN)++; \
229 while (0)
232 /* If the assembler doesn't support UTF8 in symbol names, some
233 characters might need to be escaped. */
235 #ifndef HAVE_AS_UTF8
237 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
238 appropriately mangled (with Unicode escapes if needed) to
239 MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
240 frequently that they could be cached. */
242 void
243 append_gpp_mangled_name (const char *name, int len)
245 int encoded_len, needs_escapes;
246 char buf[6];
248 MANGLE_CXX_KEYWORDS (name, len);
250 encoded_len = unicode_mangling_length (name, len);
251 needs_escapes = encoded_len > 0;
253 sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
254 obstack_grow (mangle_obstack, buf, strlen (buf));
256 if (needs_escapes)
257 append_unicode_mangled_name (name, len);
258 else
259 obstack_grow (mangle_obstack, name, len);
262 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
263 appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
264 Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
265 which case `__U' will be mangled `__U_'. */
267 static void
268 append_unicode_mangled_name (const char *name, int len)
270 const unsigned char *ptr;
271 const unsigned char *limit = (const unsigned char *)name + len;
272 int uuU = 0;
273 for (ptr = (const unsigned char *) name; ptr < limit; )
275 int ch = UTF8_GET(ptr, limit);
277 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
279 obstack_1grow (mangle_obstack, ch);
280 uuU = 0;
282 /* Everything else needs encoding */
283 else
285 char buf [9];
286 if (ch == '_' || ch == 'U')
288 /* Prepare to recognize __U */
289 if (ch == '_' && (uuU < 3))
291 uuU++;
292 obstack_1grow (mangle_obstack, ch);
294 /* We recognize __U that we wish to encode
295 __U_. Finish the encoding. */
296 else if (ch == 'U' && (uuU == 2))
298 uuU = 0;
299 obstack_grow (mangle_obstack, "U_", 2);
301 /* Otherwise, just reset uuU and emit the character we
302 have. */
303 else
305 uuU = 0;
306 obstack_1grow (mangle_obstack, ch);
308 continue;
310 sprintf (buf, "__U%x_", ch);
311 obstack_grow (mangle_obstack, buf, strlen (buf));
312 uuU = 0;
317 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
318 length of the string as mangled (a la g++) including Unicode
319 escapes. If no escapes are needed, return 0. */
321 static int
322 unicode_mangling_length (const char *name, int len)
324 const unsigned char *ptr;
325 const unsigned char *limit = (const unsigned char *)name + len;
326 int need_escapes = 0; /* Whether we need an escape or not */
327 int num_chars = 0; /* Number of characters in the mangled name */
328 int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
329 for (ptr = (const unsigned char *) name; ptr < limit; )
331 int ch = UTF8_GET(ptr, limit);
333 if (ch < 0)
334 error ("internal error - invalid Utf8 name");
335 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
337 num_chars++;
338 uuU = 0;
340 /* Everything else needs encoding */
341 else
343 int encoding_length = 2;
345 if (ch == '_' || ch == 'U')
347 /* It's always at least one character. */
348 num_chars++;
350 /* Prepare to recognize __U */
351 if (ch == '_' && (uuU < 3))
352 uuU++;
354 /* We recognize __U that we wish to encode __U_, we
355 count one more character. */
356 else if (ch == 'U' && (uuU == 2))
358 num_chars++;
359 need_escapes = 1;
360 uuU = 0;
362 /* Otherwise, just reset uuU */
363 else
364 uuU = 0;
366 continue;
369 if (ch > 0xff)
370 encoding_length++;
371 if (ch > 0xfff)
372 encoding_length++;
374 num_chars += (4 + encoding_length);
375 need_escapes = 1;
376 uuU = 0;
379 if (need_escapes)
380 return num_chars;
381 else
382 return 0;
385 #else
387 /* The assembler supports UTF8, we don't use escapes. Mangling is
388 simply <N>NAME. <N> is the number of UTF8 encoded characters that
389 are found in NAME. Note that `java', `lang' and `Object' are used
390 so frequently that they could be cached. */
392 void
393 append_gpp_mangled_name (const char *name, int len)
395 const unsigned char *ptr;
396 const unsigned char *limit;
397 int encoded_len;
398 char buf [6];
400 MANGLE_CXX_KEYWORDS (name, len);
402 limit = (const unsigned char *)name + len;
404 /* Compute the length of the string we wish to mangle. */
405 for (encoded_len = 0, ptr = (const unsigned char *) name;
406 ptr < limit; encoded_len++)
408 int ch = UTF8_GET(ptr, limit);
410 if (ch < 0)
411 error ("internal error - invalid Utf8 name");
414 sprintf (buf, "%d", encoded_len);
415 obstack_grow (mangle_obstack, buf, strlen (buf));
416 obstack_grow (mangle_obstack, name, len);
419 #endif /* HAVE_AS_UTF8 */