* extend.texi: Improve documentation of volatile asms.
[official-gcc.git] / gcc / stringpool.c
blob199d51ff0b9b76713732b022cb79642a983f5492
1 /* String pool for GCC.
2 Copyright (C) 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GNU CC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* String pool allocator. All strings allocated by ggc_alloc_string are
22 uniquified and stored in an obstack which is never shrunk. You can
23 associate a tree with a string if you wish; this is used to implement
24 get_identifier.
26 We have our own private hash table implementation which is similar
27 to the one in cpphash.c (actually, it's a further refinement of
28 that code). libiberty's hashtab.c is not used because it requires
29 100% average space overhead per string, which is unacceptable.
30 Also, this algorithm is faster. */
32 #include "config.h"
33 #include "system.h"
34 #include "ggc.h"
35 #include "tree.h"
36 #include "obstack.h"
37 #include "flags.h"
38 #include "toplev.h"
40 /* The "" allocated string. */
41 const char empty_string[] = "";
43 /* Character strings, each containing a single decimal digit.
44 Written this way to save space. */
45 const char digit_vector[] = {
46 '0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
47 '5', 0, '6', 0, '7', 0, '8', 0, '9', 0
50 static struct obstack string_stack;
52 /* This is the hash entry associated with each string. It lives in
53 the hash table; only the string lives in the obstack. Note that
54 the string is not necessarily NUL terminated. */
56 struct str_header
58 const char *ptr;
59 tree data; /* for get_identifier */
60 unsigned int len;
63 /* This is the hash table structure. There's only one. */
64 struct str_hash
66 struct str_header *entries;
67 size_t nslots; /* total slots in the entries array */
68 size_t nelements; /* number of live elements */
70 /* table usage statistics */
71 unsigned int searches;
72 unsigned int collisions;
74 #define INITIAL_HASHSIZE (16*1024)
76 static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 };
78 enum insert_option { INSERT, NO_INSERT };
80 static struct str_header *alloc_string PARAMS ((const char *, size_t,
81 enum insert_option));
82 static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t));
83 static void mark_string_hash PARAMS ((void *));
84 static struct str_header *expand_string_table PARAMS ((struct str_header *));
86 /* Convenience macro for iterating over the hash table. E is set to
87 each live entry in turn. */
88 #define FORALL_STRINGS(E) \
89 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
90 if (E->ptr != NULL)
91 /* block here */
93 /* Likewise, but tests ->data instead of ->ptr (for cases where we only
94 care about entries with ->data set) */
95 #define FORALL_IDS(E) \
96 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
97 if (E->data != NULL)
99 /* 0 while creating built-in identifiers. */
100 static int do_identifier_warnings;
102 /* Initialize the string pool. */
103 void
104 init_stringpool ()
106 gcc_obstack_init (&string_stack);
107 ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash);
109 /* Strings need no alignment. */
110 obstack_alignment_mask (&string_stack) = 0;
112 string_hash.entries = (struct str_header *)
113 xcalloc (string_hash.nslots, sizeof (struct str_header));
116 /* Enable warnings on similar identifiers (if requested).
117 Done after the built-in identifiers are created. */
118 void
119 start_identifier_warnings ()
121 do_identifier_warnings = 1;
124 /* Record the size of an identifier node for the language in use.
125 SIZE is the total size in bytes.
126 This is called by the language-specific files. This must be
127 called before allocating any identifiers. */
128 void
129 set_identifier_size (size)
130 int size;
132 tree_code_length[(int) IDENTIFIER_NODE]
133 = (size - sizeof (struct tree_common)) / sizeof (tree);
136 /* Calculate the hash of the string STR, which is of length LEN. */
137 static inline unsigned int
138 calc_hash (str, len)
139 const unsigned char *str;
140 size_t len;
142 size_t n = len;
143 unsigned int r = 0;
144 #define HASHSTEP(r, c) ((r) * 67 + (c - 113));
146 while (n--)
147 r = HASHSTEP (r, *str++);
149 return r + len;
150 #undef HASHSTEP
153 /* Internal primitive: returns the header structure for the string of
154 length LENGTH, containing CONTENTS. If that string already exists
155 in the table, returns the existing entry. If the string hasn't
156 been seen before and the last argument is INSERT, inserts and returns
157 a new entry. Otherwise returns NULL. */
158 static struct str_header *
159 alloc_string (contents, length, insert)
160 const char *contents;
161 size_t length;
162 enum insert_option insert;
164 unsigned int hash = calc_hash ((const unsigned char *)contents, length);
165 unsigned int hash2;
166 unsigned int index;
167 size_t sizemask;
168 struct str_header *entry;
169 struct str_header *entries = string_hash.entries;
171 sizemask = string_hash.nslots - 1;
172 index = hash & sizemask;
174 /* hash2 must be odd, so we're guaranteed to visit every possible
175 location in the table during rehashing. */
176 hash2 = ((hash * 17) & sizemask) | 1;
177 string_hash.searches++;
179 for (;;)
181 entry = entries + index;
183 if (entry->ptr == NULL)
184 break;
186 if (entry->len == length
187 && !memcmp (entry->ptr, contents, length))
188 return entry;
190 index = (index + hash2) & sizemask;
191 string_hash.collisions++;
194 if (insert == NO_INSERT)
195 return NULL;
197 obstack_grow0 (&string_stack, contents, length);
198 entry->ptr = (const char *) obstack_finish (&string_stack);
199 entry->len = length;
200 entry->data = NULL;
202 if (++string_hash.nelements * 4 < string_hash.nslots * 3)
203 return entry;
205 /* Must expand the string table. */
206 return expand_string_table (entry);
209 /* Subroutine of alloc_string which doubles the size of the hash table
210 and rehashes all the strings into the new table. Returns the entry
211 in the new table corresponding to ENTRY. */
212 static struct str_header *
213 expand_string_table (entry)
214 struct str_header *entry;
216 struct str_header *nentries;
217 struct str_header *e, *nentry = NULL;
218 size_t size, sizemask;
220 size = string_hash.nslots * 2;
221 nentries = (struct str_header *) xcalloc (size, sizeof (struct str_header));
222 sizemask = size - 1;
224 FORALL_STRINGS (e)
226 unsigned int index, hash, hash2;
228 hash = calc_hash ((const unsigned char *) e->ptr, e->len);
229 hash2 = ((hash * 17) & sizemask) | 1;
230 index = hash & sizemask;
232 for (;;)
234 if (nentries[index].ptr == NULL)
236 nentries[index].ptr = e->ptr;
237 nentries[index].len = e->len;
238 nentries[index].data = e->data;
239 if (e == entry)
240 nentry = nentries + index;
241 break;
244 index = (index + hash2) & sizemask;
248 free (string_hash.entries);
249 string_hash.entries = nentries;
250 string_hash.nslots = size;
251 return nentry;
254 /* Allocate and return a string constant of length LENGTH, containing
255 CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
256 nul-terminated string, and the length is calculated using strlen.
257 If the same string constant has been allocated before, that copy is
258 returned this time too. */
260 const char *
261 ggc_alloc_string (contents, length)
262 const char *contents;
263 int length;
265 struct str_header *str;
267 if (length == -1)
268 length = strlen (contents);
270 if (length == 0)
271 return empty_string;
272 if (length == 1 && contents[0] >= '0' && contents[0] <= '9')
273 return digit_string (contents[0] - '0');
275 str = alloc_string (contents, length, INSERT);
276 return str->ptr;
279 int xxx, yyy;
281 /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
282 If an identifier with that name has previously been referred to,
283 the same node is returned this time. */
284 tree
285 get_identifier (text)
286 const char *text;
288 tree idp;
289 struct str_header *str;
290 size_t length = strlen (text);
292 str = alloc_string (text, length, INSERT);
293 idp = str->data;
294 if (idp == NULL)
296 if (TREE_CODE_LENGTH (IDENTIFIER_NODE) < 0)
297 abort (); /* set_identifier_size hasn't been called. */
299 /* If this identifier is longer than the clash-warning length,
300 do a brute force search of the entire table for clashes. */
301 if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len)
303 struct str_header *e;
304 FORALL_IDS (e)
306 if (e->len >= (size_t)id_clash_len
307 && !strncmp (e->ptr, text, id_clash_len))
309 warning ("\"%s\" and \"%s\" identical in first %d characters",
310 text, e->ptr, id_clash_len);
311 break;
316 if (strncmp (text, "_Z", 2) == 0)
317 ++xxx;
318 else
319 ++yyy;
321 idp = make_node (IDENTIFIER_NODE);
322 IDENTIFIER_LENGTH (idp) = length;
323 IDENTIFIER_POINTER (idp) = str->ptr;
324 #ifdef GATHER_STATISTICS
325 id_string_size += length;
326 #endif
327 str->data = idp;
329 return idp;
332 /* If an identifier with the name TEXT (a null-terminated string) has
333 previously been referred to, return that node; otherwise return
334 NULL_TREE. */
336 tree
337 maybe_get_identifier (text)
338 const char *text;
340 struct str_header *str;
341 size_t length = strlen (text);
343 str = alloc_string (text, length, NO_INSERT);
344 if (str)
345 return str->data; /* N.B. str->data might be null here, if the
346 string has been used but not as an identifier. */
347 return NULL_TREE;
350 /* Look up an identifier with the name TEXT, replace its identifier
351 node with NODE, and return the old identifier node. This is used
352 by languages which need to enable and disable keywords based on
353 context; e.g. see remember_protocol_qualifiers in objc/objc-act.c. */
354 tree
355 set_identifier (text, node)
356 const char *text;
357 tree node;
359 struct str_header *str;
360 tree old;
361 size_t length = strlen (text);
363 str = alloc_string (text, length, INSERT);
364 old = str->data; /* might be null */
365 str->data = node;
366 return old;
369 /* Report some basic statistics about the string pool. */
371 void
372 stringpool_statistics ()
374 size_t nelts, nids, overhead, headers;
375 size_t total_bytes, longest, sum_of_squares;
376 double exp_len, exp_len2, exp2_len;
377 struct str_header *e;
378 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
379 ? (x) \
380 : ((x) < 1024*1024*10 \
381 ? (x) / 1024 \
382 : (x) / (1024*1024))))
383 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
385 total_bytes = longest = sum_of_squares = nids = 0;
386 FORALL_STRINGS (e)
388 size_t n = e->len;
390 total_bytes += n;
391 sum_of_squares += n*n;
392 if (n > longest)
393 longest = n;
394 if (e->data)
395 nids++;
398 nelts = string_hash.nelements;
399 overhead = obstack_memory_used (&string_stack) - total_bytes;
400 headers = string_hash.nslots * sizeof (struct str_header);
402 fprintf (stderr,
403 "\nString pool\n\
404 entries\t\t%lu\n\
405 identifiers\t%lu (%.2f%%)\n\
406 slots\t\t%lu\n\
407 bytes\t\t%lu%c (%lu%c overhead)\n\
408 table size\t%lu%c\n",
409 (unsigned long) nelts,
410 (unsigned long) nids, nids * 100.0 / nelts,
411 (unsigned long) string_hash.nslots,
412 SCALE (total_bytes), LABEL (total_bytes),
413 SCALE (overhead), LABEL (overhead),
414 SCALE (headers), LABEL (headers));
416 exp_len = (double)total_bytes / (double)nelts;
417 exp2_len = exp_len * exp_len;
418 exp_len2 = (double)sum_of_squares / (double)nelts;
420 fprintf (stderr,
421 "coll/search\t%.4f\n\
422 ins/search\t%.4f\n\
423 avg. entry\t%.2f bytes (+/- %.2f)\n\
424 longest entry\t%lu\n",
425 (double) string_hash.collisions / (double) string_hash.searches,
426 (double) nelts / (double) string_hash.searches,
427 exp_len, approx_sqrt (exp_len2 - exp2_len),
428 (unsigned long) longest);
429 #undef SCALE
430 #undef LABEL
433 /* Mark the string hash for GC. */
435 static void
436 mark_string_hash (arg)
437 void *arg ATTRIBUTE_UNUSED;
439 struct str_header *h;
441 FORALL_IDS (h)
443 ggc_mark_tree (h->data);