1 /* String pool for GCC.
2 Copyright (C) 2000 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GNU CC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* String pool allocator. All strings allocated by ggc_alloc_string are
22 uniquified and stored in an obstack which is never shrunk. You can
23 associate a tree with a string if you wish; this is used to implement
26 We have our own private hash table implementation which is similar
27 to the one in cpphash.c (actually, it's a further refinement of
28 that code). libiberty's hashtab.c is not used because it requires
29 100% average space overhead per string, which is unacceptable.
30 Also, this algorithm is faster. */
40 /* The "" allocated string. */
41 const char empty_string
[] = "";
43 /* Character strings, each containing a single decimal digit.
44 Written this way to save space. */
45 const char digit_vector
[] = {
46 '0', 0, '1', 0, '2', 0, '3', 0, '4', 0,
47 '5', 0, '6', 0, '7', 0, '8', 0, '9', 0
50 static struct obstack string_stack
;
52 /* This is the hash entry associated with each string. It lives in
53 the hash table; only the string lives in the obstack. Note that
54 the string is not necessarily NUL terminated. */
59 tree data
; /* for get_identifier */
63 /* This is the hash table structure. There's only one. */
66 struct str_header
*entries
;
67 size_t nslots
; /* total slots in the entries array */
68 size_t nelements
; /* number of live elements */
70 /* table usage statistics */
71 unsigned int searches
;
72 unsigned int collisions
;
74 #define INITIAL_HASHSIZE (16*1024)
76 static struct str_hash string_hash
= { 0, INITIAL_HASHSIZE
, 0, 0, 0 };
78 enum insert_option
{ INSERT
, NO_INSERT
};
80 static struct str_header
*alloc_string
PARAMS ((const char *, size_t,
82 static inline unsigned int calc_hash
PARAMS ((const unsigned char *, size_t));
83 static void mark_string_hash
PARAMS ((void *));
84 static struct str_header
*expand_string_table
PARAMS ((struct str_header
*));
86 /* Convenience macro for iterating over the hash table. E is set to
87 each live entry in turn. */
88 #define FORALL_STRINGS(E) \
89 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
93 /* Likewise, but tests ->data instead of ->ptr (for cases where we only
94 care about entries with ->data set) */
95 #define FORALL_IDS(E) \
96 for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
99 /* 0 while creating built-in identifiers. */
100 static int do_identifier_warnings
;
102 /* Initialize the string pool. */
106 gcc_obstack_init (&string_stack
);
107 ggc_add_root (&string_hash
, 1, sizeof string_hash
, mark_string_hash
);
109 /* Strings need no alignment. */
110 obstack_alignment_mask (&string_stack
) = 0;
112 string_hash
.entries
= (struct str_header
*)
113 xcalloc (string_hash
.nslots
, sizeof (struct str_header
));
116 /* Enable warnings on similar identifiers (if requested).
117 Done after the built-in identifiers are created. */
119 start_identifier_warnings ()
121 do_identifier_warnings
= 1;
124 /* Record the size of an identifier node for the language in use.
125 SIZE is the total size in bytes.
126 This is called by the language-specific files. This must be
127 called before allocating any identifiers. */
129 set_identifier_size (size
)
132 tree_code_length
[(int) IDENTIFIER_NODE
]
133 = (size
- sizeof (struct tree_common
)) / sizeof (tree
);
136 /* Calculate the hash of the string STR, which is of length LEN. */
137 static inline unsigned int
139 const unsigned char *str
;
144 #define HASHSTEP(r, c) ((r) * 67 + (c - 113));
147 r
= HASHSTEP (r
, *str
++);
153 /* Internal primitive: returns the header structure for the string of
154 length LENGTH, containing CONTENTS. If that string already exists
155 in the table, returns the existing entry. If the string hasn't
156 been seen before and the last argument is INSERT, inserts and returns
157 a new entry. Otherwise returns NULL. */
158 static struct str_header
*
159 alloc_string (contents
, length
, insert
)
160 const char *contents
;
162 enum insert_option insert
;
164 unsigned int hash
= calc_hash ((const unsigned char *)contents
, length
);
168 struct str_header
*entry
;
169 struct str_header
*entries
= string_hash
.entries
;
171 sizemask
= string_hash
.nslots
- 1;
172 index
= hash
& sizemask
;
174 /* hash2 must be odd, so we're guaranteed to visit every possible
175 location in the table during rehashing. */
176 hash2
= ((hash
* 17) & sizemask
) | 1;
177 string_hash
.searches
++;
181 entry
= entries
+ index
;
183 if (entry
->ptr
== NULL
)
186 if (entry
->len
== length
187 && !memcmp (entry
->ptr
, contents
, length
))
190 index
= (index
+ hash2
) & sizemask
;
191 string_hash
.collisions
++;
194 if (insert
== NO_INSERT
)
197 obstack_grow0 (&string_stack
, contents
, length
);
198 entry
->ptr
= (const char *) obstack_finish (&string_stack
);
202 if (++string_hash
.nelements
* 4 < string_hash
.nslots
* 3)
205 /* Must expand the string table. */
206 return expand_string_table (entry
);
209 /* Subroutine of alloc_string which doubles the size of the hash table
210 and rehashes all the strings into the new table. Returns the entry
211 in the new table corresponding to ENTRY. */
212 static struct str_header
*
213 expand_string_table (entry
)
214 struct str_header
*entry
;
216 struct str_header
*nentries
;
217 struct str_header
*e
, *nentry
= NULL
;
218 size_t size
, sizemask
;
220 size
= string_hash
.nslots
* 2;
221 nentries
= (struct str_header
*) xcalloc (size
, sizeof (struct str_header
));
226 unsigned int index
, hash
, hash2
;
228 hash
= calc_hash ((const unsigned char *) e
->ptr
, e
->len
);
229 hash2
= ((hash
* 17) & sizemask
) | 1;
230 index
= hash
& sizemask
;
234 if (nentries
[index
].ptr
== NULL
)
236 nentries
[index
].ptr
= e
->ptr
;
237 nentries
[index
].len
= e
->len
;
238 nentries
[index
].data
= e
->data
;
240 nentry
= nentries
+ index
;
244 index
= (index
+ hash2
) & sizemask
;
248 free (string_hash
.entries
);
249 string_hash
.entries
= nentries
;
250 string_hash
.nslots
= size
;
254 /* Allocate and return a string constant of length LENGTH, containing
255 CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
256 nul-terminated string, and the length is calculated using strlen.
257 If the same string constant has been allocated before, that copy is
258 returned this time too. */
261 ggc_alloc_string (contents
, length
)
262 const char *contents
;
265 struct str_header
*str
;
268 length
= strlen (contents
);
272 if (length
== 1 && contents
[0] >= '0' && contents
[0] <= '9')
273 return digit_string (contents
[0] - '0');
275 str
= alloc_string (contents
, length
, INSERT
);
279 /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
280 If an identifier with that name has previously been referred to,
281 the same node is returned this time. */
283 get_identifier (text
)
287 struct str_header
*str
;
288 size_t length
= strlen (text
);
290 str
= alloc_string (text
, length
, INSERT
);
294 if (TREE_CODE_LENGTH (IDENTIFIER_NODE
) < 0)
295 abort (); /* set_identifier_size hasn't been called. */
297 /* If this identifier is longer than the clash-warning length,
298 do a brute force search of the entire table for clashes. */
299 if (warn_id_clash
&& do_identifier_warnings
&& length
>= (size_t) id_clash_len
)
301 struct str_header
*e
;
304 if (e
->len
>= (size_t)id_clash_len
305 && !strncmp (e
->ptr
, text
, id_clash_len
))
307 warning ("\"%s\" and \"%s\" identical in first %d characters",
308 text
, e
->ptr
, id_clash_len
);
314 idp
= make_node (IDENTIFIER_NODE
);
315 IDENTIFIER_LENGTH (idp
) = length
;
316 IDENTIFIER_POINTER (idp
) = str
->ptr
;
317 #ifdef GATHER_STATISTICS
318 id_string_size
+= length
;
325 /* If an identifier with the name TEXT (a null-terminated string) has
326 previously been referred to, return that node; otherwise return
330 maybe_get_identifier (text
)
333 struct str_header
*str
;
334 size_t length
= strlen (text
);
336 str
= alloc_string (text
, length
, NO_INSERT
);
338 return str
->data
; /* N.B. str->data might be null here, if the
339 string has been used but not as an identifier. */
343 /* Report some basic statistics about the string pool. */
346 stringpool_statistics ()
348 size_t nelts
, nids
, overhead
, headers
;
349 size_t total_bytes
, longest
, sum_of_squares
;
350 double exp_len
, exp_len2
, exp2_len
;
351 struct str_header
*e
;
352 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
354 : ((x) < 1024*1024*10 \
356 : (x) / (1024*1024))))
357 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
359 total_bytes
= longest
= sum_of_squares
= nids
= 0;
365 sum_of_squares
+= n
*n
;
372 nelts
= string_hash
.nelements
;
373 overhead
= obstack_memory_used (&string_stack
) - total_bytes
;
374 headers
= string_hash
.nslots
* sizeof (struct str_header
);
379 identifiers\t%lu (%.2f%%)\n\
381 bytes\t\t%lu%c (%lu%c overhead)\n\
382 table size\t%lu%c\n",
383 (unsigned long) nelts
,
384 (unsigned long) nids
, nids
* 100.0 / nelts
,
385 (unsigned long) string_hash
.nslots
,
386 SCALE (total_bytes
), LABEL (total_bytes
),
387 SCALE (overhead
), LABEL (overhead
),
388 SCALE (headers
), LABEL (headers
));
390 exp_len
= (double)total_bytes
/ (double)nelts
;
391 exp2_len
= exp_len
* exp_len
;
392 exp_len2
= (double)sum_of_squares
/ (double)nelts
;
395 "coll/search\t%.4f\n\
397 avg. entry\t%.2f bytes (+/- %.2f)\n\
398 longest entry\t%lu\n",
399 (double) string_hash
.collisions
/ (double) string_hash
.searches
,
400 (double) nelts
/ (double) string_hash
.searches
,
401 exp_len
, approx_sqrt (exp_len2
- exp2_len
),
402 (unsigned long) longest
);
407 /* Mark the string hash for GC. */
410 mark_string_hash (arg
)
411 void *arg ATTRIBUTE_UNUSED
;
413 struct str_header
*h
;
417 ggc_mark_tree (h
->data
);