* stringpool.c (xxx, yyy): Remove.
[official-gcc.git] / gcc / stringpool.c
blob2b4fefa9525514241f596b95651447356edfe57c
1 /* String pool for GCC.
2 Copyright (C) 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GNU CC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* String pool allocator. All strings allocated by ggc_alloc_string are
22 uniquified and stored in an obstack which is never shrunk. You can
23 associate a tree with a string if you wish; this is used to implement
24 get_identifier.
26 We have our own private hash table implementation which is similar
27 to the one in cpphash.c (actually, it's a further refinement of
28 that code). libiberty's hashtab.c is not used because it requires
29 100% average space overhead per string, which is unacceptable.
30 Also, this algorithm is faster. */
32 #include "config.h"
33 #include "system.h"
34 #include "ggc.h"
35 #include "tree.h"
36 #include "obstack.h"
37 #include "flags.h"
38 #include "toplev.h"
40 /* The "" allocated string. */
41 const char empty_string[] = "";
43 /* Character strings, each containing a single decimal digit.
44 Written this way to save space. */
45 const char digit_vector[] = {
46 '0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
47 '5', 0, '6', 0, '7', 0, '8', 0, '9', 0
50 static struct obstack string_stack;
52 /* This is the hash entry associated with each string. It lives in
53 the hash table; only the string lives in the obstack. Note that
54 the string is not necessarily NUL terminated. */
56 struct str_header
58 const char *ptr;
59 tree data; /* for get_identifier */
60 unsigned int len;
63 /* This is the hash table structure. There's only one. */
64 struct str_hash
66 struct str_header *entries;
67 size_t nslots; /* total slots in the entries array */
68 size_t nelements; /* number of live elements */
70 /* table usage statistics */
71 unsigned int searches;
72 unsigned int collisions;
74 #define INITIAL_HASHSIZE (16*1024)
76 static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 };
78 enum insert_option { INSERT, NO_INSERT };
80 static struct str_header *alloc_string PARAMS ((const char *, size_t,
81 enum insert_option));
82 static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t));
83 static void mark_string_hash PARAMS ((void *));
84 static struct str_header *expand_string_table PARAMS ((struct str_header *));
86 /* Convenience macro for iterating over the hash table. E is set to
87 each live entry in turn. */
88 #define FORALL_STRINGS(E) \
89 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
90 if (E->ptr != NULL)
91 /* block here */
93 /* Likewise, but tests ->data instead of ->ptr (for cases where we only
94 care about entries with ->data set) */
95 #define FORALL_IDS(E) \
96 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
97 if (E->data != NULL)
99 /* 0 while creating built-in identifiers. */
100 static int do_identifier_warnings;
102 /* Initialize the string pool. */
103 void
104 init_stringpool ()
106 gcc_obstack_init (&string_stack);
107 ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash);
109 /* Strings need no alignment. */
110 obstack_alignment_mask (&string_stack) = 0;
112 string_hash.entries = (struct str_header *)
113 xcalloc (string_hash.nslots, sizeof (struct str_header));
116 /* Enable warnings on similar identifiers (if requested).
117 Done after the built-in identifiers are created. */
118 void
119 start_identifier_warnings ()
121 do_identifier_warnings = 1;
124 /* Record the size of an identifier node for the language in use.
125 SIZE is the total size in bytes.
126 This is called by the language-specific files. This must be
127 called before allocating any identifiers. */
128 void
129 set_identifier_size (size)
130 int size;
132 tree_code_length[(int) IDENTIFIER_NODE]
133 = (size - sizeof (struct tree_common)) / sizeof (tree);
136 /* Calculate the hash of the string STR, which is of length LEN. */
137 static inline unsigned int
138 calc_hash (str, len)
139 const unsigned char *str;
140 size_t len;
142 size_t n = len;
143 unsigned int r = 0;
144 #define HASHSTEP(r, c) ((r) * 67 + (c - 113));
146 while (n--)
147 r = HASHSTEP (r, *str++);
149 return r + len;
150 #undef HASHSTEP
153 /* Internal primitive: returns the header structure for the string of
154 length LENGTH, containing CONTENTS. If that string already exists
155 in the table, returns the existing entry. If the string hasn't
156 been seen before and the last argument is INSERT, inserts and returns
157 a new entry. Otherwise returns NULL. */
158 static struct str_header *
159 alloc_string (contents, length, insert)
160 const char *contents;
161 size_t length;
162 enum insert_option insert;
164 unsigned int hash = calc_hash ((const unsigned char *)contents, length);
165 unsigned int hash2;
166 unsigned int index;
167 size_t sizemask;
168 struct str_header *entry;
169 struct str_header *entries = string_hash.entries;
171 sizemask = string_hash.nslots - 1;
172 index = hash & sizemask;
174 /* hash2 must be odd, so we're guaranteed to visit every possible
175 location in the table during rehashing. */
176 hash2 = ((hash * 17) & sizemask) | 1;
177 string_hash.searches++;
179 for (;;)
181 entry = entries + index;
183 if (entry->ptr == NULL)
184 break;
186 if (entry->len == length
187 && !memcmp (entry->ptr, contents, length))
188 return entry;
190 index = (index + hash2) & sizemask;
191 string_hash.collisions++;
194 if (insert == NO_INSERT)
195 return NULL;
197 obstack_grow0 (&string_stack, contents, length);
198 entry->ptr = (const char *) obstack_finish (&string_stack);
199 entry->len = length;
200 entry->data = NULL;
202 if (++string_hash.nelements * 4 < string_hash.nslots * 3)
203 return entry;
205 /* Must expand the string table. */
206 return expand_string_table (entry);
209 /* Subroutine of alloc_string which doubles the size of the hash table
210 and rehashes all the strings into the new table. Returns the entry
211 in the new table corresponding to ENTRY. */
212 static struct str_header *
213 expand_string_table (entry)
214 struct str_header *entry;
216 struct str_header *nentries;
217 struct str_header *e, *nentry = NULL;
218 size_t size, sizemask;
220 size = string_hash.nslots * 2;
221 nentries = (struct str_header *) xcalloc (size, sizeof (struct str_header));
222 sizemask = size - 1;
224 FORALL_STRINGS (e)
226 unsigned int index, hash, hash2;
228 hash = calc_hash ((const unsigned char *) e->ptr, e->len);
229 hash2 = ((hash * 17) & sizemask) | 1;
230 index = hash & sizemask;
232 for (;;)
234 if (nentries[index].ptr == NULL)
236 nentries[index].ptr = e->ptr;
237 nentries[index].len = e->len;
238 nentries[index].data = e->data;
239 if (e == entry)
240 nentry = nentries + index;
241 break;
244 index = (index + hash2) & sizemask;
248 free (string_hash.entries);
249 string_hash.entries = nentries;
250 string_hash.nslots = size;
251 return nentry;
254 /* Allocate and return a string constant of length LENGTH, containing
255 CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
256 nul-terminated string, and the length is calculated using strlen.
257 If the same string constant has been allocated before, that copy is
258 returned this time too. */
260 const char *
261 ggc_alloc_string (contents, length)
262 const char *contents;
263 int length;
265 struct str_header *str;
267 if (length == -1)
268 length = strlen (contents);
270 if (length == 0)
271 return empty_string;
272 if (length == 1 && contents[0] >= '0' && contents[0] <= '9')
273 return digit_string (contents[0] - '0');
275 str = alloc_string (contents, length, INSERT);
276 return str->ptr;
279 /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
280 If an identifier with that name has previously been referred to,
281 the same node is returned this time. */
282 tree
283 get_identifier (text)
284 const char *text;
286 tree idp;
287 struct str_header *str;
288 size_t length = strlen (text);
290 str = alloc_string (text, length, INSERT);
291 idp = str->data;
292 if (idp == NULL)
294 if (TREE_CODE_LENGTH (IDENTIFIER_NODE) < 0)
295 abort (); /* set_identifier_size hasn't been called. */
297 /* If this identifier is longer than the clash-warning length,
298 do a brute force search of the entire table for clashes. */
299 if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len)
301 struct str_header *e;
302 FORALL_IDS (e)
304 if (e->len >= (size_t)id_clash_len
305 && !strncmp (e->ptr, text, id_clash_len))
307 warning ("\"%s\" and \"%s\" identical in first %d characters",
308 text, e->ptr, id_clash_len);
309 break;
314 idp = make_node (IDENTIFIER_NODE);
315 IDENTIFIER_LENGTH (idp) = length;
316 IDENTIFIER_POINTER (idp) = str->ptr;
317 #ifdef GATHER_STATISTICS
318 id_string_size += length;
319 #endif
320 str->data = idp;
322 return idp;
325 /* If an identifier with the name TEXT (a null-terminated string) has
326 previously been referred to, return that node; otherwise return
327 NULL_TREE. */
329 tree
330 maybe_get_identifier (text)
331 const char *text;
333 struct str_header *str;
334 size_t length = strlen (text);
336 str = alloc_string (text, length, NO_INSERT);
337 if (str)
338 return str->data; /* N.B. str->data might be null here, if the
339 string has been used but not as an identifier. */
340 return NULL_TREE;
343 /* Look up an identifier with the name TEXT, replace its identifier
344 node with NODE, and return the old identifier node. This is used
345 by languages which need to enable and disable keywords based on
346 context; e.g. see remember_protocol_qualifiers in objc/objc-act.c. */
347 tree
348 set_identifier (text, node)
349 const char *text;
350 tree node;
352 struct str_header *str;
353 tree old;
354 size_t length = strlen (text);
356 str = alloc_string (text, length, INSERT);
357 old = str->data; /* might be null */
358 str->data = node;
359 return old;
362 /* Report some basic statistics about the string pool. */
364 void
365 stringpool_statistics ()
367 size_t nelts, nids, overhead, headers;
368 size_t total_bytes, longest, sum_of_squares;
369 double exp_len, exp_len2, exp2_len;
370 struct str_header *e;
371 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
372 ? (x) \
373 : ((x) < 1024*1024*10 \
374 ? (x) / 1024 \
375 : (x) / (1024*1024))))
376 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
378 total_bytes = longest = sum_of_squares = nids = 0;
379 FORALL_STRINGS (e)
381 size_t n = e->len;
383 total_bytes += n;
384 sum_of_squares += n*n;
385 if (n > longest)
386 longest = n;
387 if (e->data)
388 nids++;
391 nelts = string_hash.nelements;
392 overhead = obstack_memory_used (&string_stack) - total_bytes;
393 headers = string_hash.nslots * sizeof (struct str_header);
395 fprintf (stderr,
396 "\nString pool\n\
397 entries\t\t%lu\n\
398 identifiers\t%lu (%.2f%%)\n\
399 slots\t\t%lu\n\
400 bytes\t\t%lu%c (%lu%c overhead)\n\
401 table size\t%lu%c\n",
402 (unsigned long) nelts,
403 (unsigned long) nids, nids * 100.0 / nelts,
404 (unsigned long) string_hash.nslots,
405 SCALE (total_bytes), LABEL (total_bytes),
406 SCALE (overhead), LABEL (overhead),
407 SCALE (headers), LABEL (headers));
409 exp_len = (double)total_bytes / (double)nelts;
410 exp2_len = exp_len * exp_len;
411 exp_len2 = (double)sum_of_squares / (double)nelts;
413 fprintf (stderr,
414 "coll/search\t%.4f\n\
415 ins/search\t%.4f\n\
416 avg. entry\t%.2f bytes (+/- %.2f)\n\
417 longest entry\t%lu\n",
418 (double) string_hash.collisions / (double) string_hash.searches,
419 (double) nelts / (double) string_hash.searches,
420 exp_len, approx_sqrt (exp_len2 - exp2_len),
421 (unsigned long) longest);
422 #undef SCALE
423 #undef LABEL
426 /* Mark the string hash for GC. */
428 static void
429 mark_string_hash (arg)
430 void *arg ATTRIBUTE_UNUSED;
432 struct str_header *h;
434 FORALL_IDS (h)
436 ggc_mark_tree (h->data);