1 /* String pool for GCC.
2 Copyright (C) 2000 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GNU CC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* String pool allocator. All strings allocated by ggc_alloc_string are
22 uniquified and stored in an obstack which is never shrunk. You can
23 associate a tree with a string if you wish; this is used to implement
26 We have our own private hash table implementation which is similar
27 to the one in cpphash.c (actually, it's a further refinement of
28 that code). libiberty's hashtab.c is not used because it requires
29 100% average space overhead per string, which is unacceptable.
30 Also, this algorithm is faster. */
40 /* The "" allocated string. */
41 const char empty_string
[] = "";
43 /* Character strings, each containing a single decimal digit.
44 Written this way to save space. */
45 const char digit_vector
[] = {
46 '0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
47 '5', 0, '6', 0, '7', 0, '8', 0, '9', 0
50 static struct obstack string_stack
;
52 /* Each hashnode is just a pointer to a TREE_IDENTIFIER. */
53 typedef struct tree_identifier
*sp_hashnode
;
55 #define SP_EMPTY(NODE) ((NODE) == NULL)
56 #define SP_LEN(NODE) ((NODE)->length)
57 #define SP_TREE(NODE) ((tree) NODE)
58 #define SP_STR(NODE) ((NODE)->pointer)
59 #define SP_VALID(NODE) (TREE_CODE (SP_TREE (NODE)) == IDENTIFIER_NODE)
61 /* This is the hash table structure. There's only one. */
65 size_t nslots
; /* total slots in the entries array */
66 size_t nelements
; /* number of live elements */
68 /* table usage statistics */
69 unsigned int searches
;
70 unsigned int collisions
;
72 #define INITIAL_HASHSIZE (16*1024)
74 static struct str_hash string_hash
= { 0, INITIAL_HASHSIZE
, 0, 0, 0 };
76 enum insert_option
{ INSERT
, NO_INSERT
};
78 static sp_hashnode alloc_ident
PARAMS ((const char *, size_t,
80 static inline unsigned int calc_hash
PARAMS ((const unsigned char *, size_t));
81 static void mark_string_hash
PARAMS ((void *));
82 static void expand_string_table
PARAMS ((void));
84 /* Convenience macro for iterating over the hash table. E is set to
85 each live entry in turn. */
86 #define FORALL_IDS(E) \
87 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
88 if (!SP_EMPTY (*E) && SP_VALID (*E))
90 /* 0 while creating built-in identifiers. */
91 static int do_identifier_warnings
;
93 /* Initialize the string pool. */
97 gcc_obstack_init (&string_stack
);
98 ggc_add_root (&string_hash
, 1, sizeof string_hash
, mark_string_hash
);
100 /* Strings need no alignment. */
101 obstack_alignment_mask (&string_stack
) = 0;
103 string_hash
.entries
= (sp_hashnode
*)
104 xcalloc (string_hash
.nslots
, sizeof (sp_hashnode
));
107 /* Enable warnings on similar identifiers (if requested).
108 Done after the built-in identifiers are created. */
110 start_identifier_warnings ()
112 do_identifier_warnings
= 1;
115 /* Record the size of an identifier node for the language in use.
116 SIZE is the total size in bytes.
117 This is called by the language-specific files. This must be
118 called before allocating any identifiers. */
120 set_identifier_size (size
)
123 tree_code_length
[(int) IDENTIFIER_NODE
]
124 = (size
- sizeof (struct tree_common
)) / sizeof (tree
);
127 /* Calculate the hash of the string STR, which is of length LEN. */
128 static inline unsigned int
130 const unsigned char *str
;
135 #define HASHSTEP(r, c) ((r) * 67 + (c - 113));
138 r
= HASHSTEP (r
, *str
++);
144 /* Internal primitive: returns the header structure for the identifier
145 of length LENGTH, containing CONTENTS. If that identifier already
146 exists in the table, returns the existing entry. If the identifier
147 hasn't been seen before and the last argument is INSERT, inserts
148 and returns a new entry. Otherwise returns NULL. */
150 alloc_ident (contents
, length
, insert
)
151 const char *contents
;
153 enum insert_option insert
;
155 unsigned int hash
= calc_hash ((const unsigned char *)contents
, length
);
161 sizemask
= string_hash
.nslots
- 1;
162 index
= hash
& sizemask
;
164 /* hash2 must be odd, so we're guaranteed to visit every possible
165 location in the table during rehashing. */
166 hash2
= ((hash
* 17) & sizemask
) | 1;
167 string_hash
.searches
++;
171 entry
= string_hash
.entries
[index
];
173 if (SP_EMPTY (entry
))
176 if ((size_t) SP_LEN (entry
) == length
177 && !memcmp (SP_STR (entry
), contents
, length
))
180 index
= (index
+ hash2
) & sizemask
;
181 string_hash
.collisions
++;
184 if (insert
== NO_INSERT
)
187 entry
= (sp_hashnode
) make_node (IDENTIFIER_NODE
);
188 string_hash
.entries
[index
] = entry
;
189 SP_STR (entry
) = ggc_alloc_string (contents
, length
);
190 SP_LEN (entry
) = length
;
191 /* This is not yet an identifier. */
192 TREE_SET_CODE (entry
, ERROR_MARK
);
194 if (++string_hash
.nelements
* 4 >= string_hash
.nslots
* 3)
195 /* Must expand the string table. */
196 expand_string_table ();
201 /* Subroutine of alloc_ident which doubles the size of the hash table
202 and rehashes all the strings into the new table. Returns the entry
203 in the new table corresponding to ENTRY. */
205 expand_string_table ()
207 sp_hashnode
*nentries
;
209 size_t size
, sizemask
;
211 size
= string_hash
.nslots
* 2;
212 nentries
= (sp_hashnode
*) xcalloc (size
, sizeof (sp_hashnode
));
217 unsigned int index
, hash
, hash2
;
219 hash
= calc_hash ((const unsigned char *) SP_STR (*e
), SP_LEN (*e
));
220 hash2
= ((hash
* 17) & sizemask
) | 1;
221 index
= hash
& sizemask
;
225 if (SP_EMPTY (nentries
[index
]))
227 nentries
[index
] = *e
;
231 index
= (index
+ hash2
) & sizemask
;
235 free (string_hash
.entries
);
236 string_hash
.entries
= nentries
;
237 string_hash
.nslots
= size
;
240 /* Allocate and return a string constant of length LENGTH, containing
241 CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
242 nul-terminated string, and the length is calculated using strlen.
243 If the same string constant has been allocated before, that copy is
244 returned this time too. */
247 ggc_alloc_string (contents
, length
)
248 const char *contents
;
252 length
= strlen (contents
);
256 if (length
== 1 && contents
[0] >= '0' && contents
[0] <= '9')
257 return digit_string (contents
[0] - '0');
259 obstack_grow0 (&string_stack
, contents
, length
);
260 return obstack_finish (&string_stack
);
263 /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
264 If an identifier with that name has previously been referred to,
265 the same node is returned this time. */
267 get_identifier (text
)
271 size_t length
= strlen (text
);
273 node
= alloc_ident (text
, length
, INSERT
);
274 if (!SP_VALID (node
))
276 /* If this identifier is longer than the clash-warning length,
277 do a brute force search of the entire table for clashes. */
278 if (warn_id_clash
&& do_identifier_warnings
&& length
>= (size_t) id_clash_len
)
283 if (SP_LEN (*e
) >= id_clash_len
284 && !strncmp (SP_STR (*e
), text
, id_clash_len
))
286 warning ("\"%s\" and \"%s\" identical in first %d characters",
287 text
, SP_STR (*e
), id_clash_len
);
293 TREE_SET_CODE (node
, IDENTIFIER_NODE
);
294 #ifdef GATHER_STATISTICS
295 id_string_size
+= length
;
299 return SP_TREE (node
);
302 /* If an identifier with the name TEXT (a null-terminated string) has
303 previously been referred to, return that node; otherwise return
307 maybe_get_identifier (text
)
311 size_t length
= strlen (text
);
313 node
= alloc_ident (text
, length
, NO_INSERT
);
314 if (!SP_EMPTY (node
) && SP_VALID (node
))
315 return SP_TREE (node
);
320 /* Report some basic statistics about the string pool. */
323 stringpool_statistics ()
325 size_t nelts
, nids
, overhead
, headers
;
326 size_t total_bytes
, longest
, sum_of_squares
;
327 double exp_len
, exp_len2
, exp2_len
;
329 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
331 : ((x) < 1024*1024*10 \
333 : (x) / (1024*1024))))
334 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
336 total_bytes
= longest
= sum_of_squares
= nids
= 0;
339 size_t n
= SP_LEN (*e
);
342 sum_of_squares
+= n
*n
;
349 nelts
= string_hash
.nelements
;
350 overhead
= obstack_memory_used (&string_stack
) - total_bytes
;
351 headers
= string_hash
.nslots
* sizeof (sp_hashnode
);
356 identifiers\t%lu (%.2f%%)\n\
358 bytes\t\t%lu%c (%lu%c overhead)\n\
359 table size\t%lu%c\n",
360 (unsigned long) nelts
,
361 (unsigned long) nids
, nids
* 100.0 / nelts
,
362 (unsigned long) string_hash
.nslots
,
363 SCALE (total_bytes
), LABEL (total_bytes
),
364 SCALE (overhead
), LABEL (overhead
),
365 SCALE (headers
), LABEL (headers
));
367 exp_len
= (double)total_bytes
/ (double)nelts
;
368 exp2_len
= exp_len
* exp_len
;
369 exp_len2
= (double)sum_of_squares
/ (double)nelts
;
372 "coll/search\t%.4f\n\
374 avg. entry\t%.2f bytes (+/- %.2f)\n\
375 longest entry\t%lu\n",
376 (double) string_hash
.collisions
/ (double) string_hash
.searches
,
377 (double) nelts
/ (double) string_hash
.searches
,
378 exp_len
, approx_sqrt (exp_len2
- exp2_len
),
379 (unsigned long) longest
);
384 /* Mark the string hash for GC. */
387 mark_string_hash (arg
)
388 void *arg ATTRIBUTE_UNUSED
;
394 ggc_mark_tree (SP_TREE (*h
));