2000-05-02 Jeff Sturm <jsturm@one-point.com>
[official-gcc.git] / gcc / stringpool.c
blob0346dcfe34cba3bc9128cac72bd4f80dd9e45c6d
1 /* String pool for GCC.
2 Copyright (C) 2000 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GNU CC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* String pool allocator. All strings allocated by ggc_alloc_string are
22 uniquified and stored in an obstack which is never shrunk. You can
23 associate a tree with a string if you wish; this is used to implement
24 get_identifier.
26 We have our own private hash table implementation which is similar
27 to the one in cpphash.c (actually, it's a further refinement of
28 that code). libiberty's hashtab.c is not used because it requires
29 100% average space overhead per string, which is unacceptable.
30 Also, this algorithm is faster. */
32 #include "config.h"
33 #include "system.h"
34 #include "ggc.h"
35 #include "tree.h"
36 #include "obstack.h"
37 #include "flags.h"
38 #include "toplev.h"
40 /* The "" allocated string. */
41 const char empty_string[] = "";
43 /* Character strings, each containing a single decimal digit.
44 Written this way to save space. */
45 const char digit_vector[] = {
46 '0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
47 '5', 0, '6', 0, '7', 0, '8', 0, '9', 0
50 static struct obstack string_stack;
52 /* Each hashnode is just a pointer to a TREE_IDENTIFIER. */
53 typedef struct tree_identifier *sp_hashnode;
55 #define SP_EMPTY(NODE) ((NODE) == NULL)
56 #define SP_LEN(NODE) ((NODE)->length)
57 #define SP_TREE(NODE) ((tree) NODE)
58 #define SP_STR(NODE) ((NODE)->pointer)
59 #define SP_VALID(NODE) (TREE_CODE (SP_TREE (NODE)) == IDENTIFIER_NODE)
61 /* This is the hash table structure. There's only one. */
62 struct str_hash
64 sp_hashnode *entries;
65 size_t nslots; /* total slots in the entries array */
66 size_t nelements; /* number of live elements */
68 /* table usage statistics */
69 unsigned int searches;
70 unsigned int collisions;
72 #define INITIAL_HASHSIZE (16*1024)
74 static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 };
76 enum insert_option { INSERT, NO_INSERT };
78 static sp_hashnode alloc_ident PARAMS ((const char *, size_t,
79 enum insert_option));
80 static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t));
81 static void mark_string_hash PARAMS ((void *));
82 static void expand_string_table PARAMS ((void));
84 /* Convenience macro for iterating over the hash table. E is set to
85 each live entry in turn. */
86 #define FORALL_IDS(E) \
87 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
88 if (!SP_EMPTY (*E) && SP_VALID (*E))
90 /* 0 while creating built-in identifiers. */
91 static int do_identifier_warnings;
93 /* Initialize the string pool. */
94 void
95 init_stringpool ()
97 gcc_obstack_init (&string_stack);
98 ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash);
100 /* Strings need no alignment. */
101 obstack_alignment_mask (&string_stack) = 0;
103 string_hash.entries = (sp_hashnode *)
104 xcalloc (string_hash.nslots, sizeof (sp_hashnode));
107 /* Enable warnings on similar identifiers (if requested).
108 Done after the built-in identifiers are created. */
109 void
110 start_identifier_warnings ()
112 do_identifier_warnings = 1;
115 /* Record the size of an identifier node for the language in use.
116 SIZE is the total size in bytes.
117 This is called by the language-specific files. This must be
118 called before allocating any identifiers. */
119 void
120 set_identifier_size (size)
121 int size;
123 tree_code_length[(int) IDENTIFIER_NODE]
124 = (size - sizeof (struct tree_common)) / sizeof (tree);
127 /* Calculate the hash of the string STR, which is of length LEN. */
128 static inline unsigned int
129 calc_hash (str, len)
130 const unsigned char *str;
131 size_t len;
133 size_t n = len;
134 unsigned int r = 0;
135 #define HASHSTEP(r, c) ((r) * 67 + (c - 113));
137 while (n--)
138 r = HASHSTEP (r, *str++);
140 return r + len;
141 #undef HASHSTEP
144 /* Internal primitive: returns the header structure for the identifier
145 of length LENGTH, containing CONTENTS. If that identifier already
146 exists in the table, returns the existing entry. If the identifier
147 hasn't been seen before and the last argument is INSERT, inserts
148 and returns a new entry. Otherwise returns NULL. */
149 static sp_hashnode
150 alloc_ident (contents, length, insert)
151 const char *contents;
152 size_t length;
153 enum insert_option insert;
155 unsigned int hash = calc_hash ((const unsigned char *)contents, length);
156 unsigned int hash2;
157 unsigned int index;
158 size_t sizemask;
159 sp_hashnode entry;
161 sizemask = string_hash.nslots - 1;
162 index = hash & sizemask;
164 /* hash2 must be odd, so we're guaranteed to visit every possible
165 location in the table during rehashing. */
166 hash2 = ((hash * 17) & sizemask) | 1;
167 string_hash.searches++;
169 for (;;)
171 entry = string_hash.entries[index];
173 if (SP_EMPTY (entry))
174 break;
176 if ((size_t) SP_LEN (entry) == length
177 && !memcmp (SP_STR (entry), contents, length))
178 return entry;
180 index = (index + hash2) & sizemask;
181 string_hash.collisions++;
184 if (insert == NO_INSERT)
185 return NULL;
187 entry = (sp_hashnode) make_node (IDENTIFIER_NODE);
188 string_hash.entries[index] = entry;
189 SP_STR (entry) = ggc_alloc_string (contents, length);
190 SP_LEN (entry) = length;
191 /* This is not yet an identifier. */
192 TREE_SET_CODE (entry, ERROR_MARK);
194 if (++string_hash.nelements * 4 >= string_hash.nslots * 3)
195 /* Must expand the string table. */
196 expand_string_table ();
198 return entry;
201 /* Subroutine of alloc_ident which doubles the size of the hash table
202 and rehashes all the strings into the new table. Returns the entry
203 in the new table corresponding to ENTRY. */
204 static void
205 expand_string_table ()
207 sp_hashnode *nentries;
208 sp_hashnode *e;
209 size_t size, sizemask;
211 size = string_hash.nslots * 2;
212 nentries = (sp_hashnode *) xcalloc (size, sizeof (sp_hashnode));
213 sizemask = size - 1;
215 FORALL_IDS (e)
217 unsigned int index, hash, hash2;
219 hash = calc_hash ((const unsigned char *) SP_STR (*e), SP_LEN (*e));
220 hash2 = ((hash * 17) & sizemask) | 1;
221 index = hash & sizemask;
223 for (;;)
225 if (SP_EMPTY (nentries[index]))
227 nentries[index] = *e;
228 break;
231 index = (index + hash2) & sizemask;
235 free (string_hash.entries);
236 string_hash.entries = nentries;
237 string_hash.nslots = size;
240 /* Allocate and return a string constant of length LENGTH, containing
241 CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
242 nul-terminated string, and the length is calculated using strlen.
243 If the same string constant has been allocated before, that copy is
244 returned this time too. */
246 const char *
247 ggc_alloc_string (contents, length)
248 const char *contents;
249 int length;
251 if (length == -1)
252 length = strlen (contents);
254 if (length == 0)
255 return empty_string;
256 if (length == 1 && contents[0] >= '0' && contents[0] <= '9')
257 return digit_string (contents[0] - '0');
259 obstack_grow0 (&string_stack, contents, length);
260 return obstack_finish (&string_stack);
263 /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
264 If an identifier with that name has previously been referred to,
265 the same node is returned this time. */
266 tree
267 get_identifier (text)
268 const char *text;
270 sp_hashnode node;
271 size_t length = strlen (text);
273 node = alloc_ident (text, length, INSERT);
274 if (!SP_VALID (node))
276 /* If this identifier is longer than the clash-warning length,
277 do a brute force search of the entire table for clashes. */
278 if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len)
280 sp_hashnode *e;
281 FORALL_IDS (e)
283 if (SP_LEN (*e) >= id_clash_len
284 && !strncmp (SP_STR (*e), text, id_clash_len))
286 warning ("\"%s\" and \"%s\" identical in first %d characters",
287 text, SP_STR (*e), id_clash_len);
288 break;
293 TREE_SET_CODE (node, IDENTIFIER_NODE);
294 #ifdef GATHER_STATISTICS
295 id_string_size += length;
296 #endif
299 return SP_TREE (node);
302 /* If an identifier with the name TEXT (a null-terminated string) has
303 previously been referred to, return that node; otherwise return
304 NULL_TREE. */
306 tree
307 maybe_get_identifier (text)
308 const char *text;
310 sp_hashnode node;
311 size_t length = strlen (text);
313 node = alloc_ident (text, length, NO_INSERT);
314 if (!SP_EMPTY (node) && SP_VALID (node))
315 return SP_TREE (node);
317 return NULL_TREE;
320 /* Report some basic statistics about the string pool. */
322 void
323 stringpool_statistics ()
325 size_t nelts, nids, overhead, headers;
326 size_t total_bytes, longest, sum_of_squares;
327 double exp_len, exp_len2, exp2_len;
328 sp_hashnode *e;
329 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
330 ? (x) \
331 : ((x) < 1024*1024*10 \
332 ? (x) / 1024 \
333 : (x) / (1024*1024))))
334 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
336 total_bytes = longest = sum_of_squares = nids = 0;
337 FORALL_IDS (e)
339 size_t n = SP_LEN (*e);
341 total_bytes += n;
342 sum_of_squares += n*n;
343 if (n > longest)
344 longest = n;
345 if (SP_VALID (*e))
346 nids++;
349 nelts = string_hash.nelements;
350 overhead = obstack_memory_used (&string_stack) - total_bytes;
351 headers = string_hash.nslots * sizeof (sp_hashnode);
353 fprintf (stderr,
354 "\nString pool\n\
355 entries\t\t%lu\n\
356 identifiers\t%lu (%.2f%%)\n\
357 slots\t\t%lu\n\
358 bytes\t\t%lu%c (%lu%c overhead)\n\
359 table size\t%lu%c\n",
360 (unsigned long) nelts,
361 (unsigned long) nids, nids * 100.0 / nelts,
362 (unsigned long) string_hash.nslots,
363 SCALE (total_bytes), LABEL (total_bytes),
364 SCALE (overhead), LABEL (overhead),
365 SCALE (headers), LABEL (headers));
367 exp_len = (double)total_bytes / (double)nelts;
368 exp2_len = exp_len * exp_len;
369 exp_len2 = (double)sum_of_squares / (double)nelts;
371 fprintf (stderr,
372 "coll/search\t%.4f\n\
373 ins/search\t%.4f\n\
374 avg. entry\t%.2f bytes (+/- %.2f)\n\
375 longest entry\t%lu\n",
376 (double) string_hash.collisions / (double) string_hash.searches,
377 (double) nelts / (double) string_hash.searches,
378 exp_len, approx_sqrt (exp_len2 - exp2_len),
379 (unsigned long) longest);
380 #undef SCALE
381 #undef LABEL
384 /* Mark the string hash for GC. */
386 static void
387 mark_string_hash (arg)
388 void *arg ATTRIBUTE_UNUSED;
390 sp_hashnode *h;
392 FORALL_IDS (h)
394 ggc_mark_tree (SP_TREE (*h));