Stage 10: avoid extra copying of strings and comments.
[m4/ericb.git] / src / symtab.c
blob277a79f49ed02a192a026f385180425815cd73e0
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2003, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* This file handles all the low level work around the symbol table. The
23 symbol table is a simple chained hash table. Each symbol is described
24 by a struct symbol, which is placed in the hash table based upon the
25 symbol name. Symbols that hash to the same entry in the table are
26 kept on a list, sorted by name. As a special case, to facilitate the
27 "pushdef" and "popdef" builtins, a symbol can be several times in the
28 symbol table, one for each definition. Since the name is the same,
29 all the entries for the symbol will be on the same list, and will
30 also, because the list is sorted, be adjacent. All the entries for a
31 name are simply ordered on the list by age. The current definition
32 will then always be the first found. */
34 #include "m4.h"
36 #ifdef DEBUG_SYM
37 /* When evaluating hash table performance, this profiling code shows
38 how many collisions were encountered. */
40 struct profile
42 int entry; /* Number of times lookup_symbol called with this mode. */
43 int comparisons; /* Number of times strcmp was called. */
44 int misses; /* Number of times strcmp did not return 0. */
45 long long bytes; /* Number of bytes compared. */
48 static struct profile profiles[5];
49 static symbol_lookup current_mode;
51 /* On exit, show a profile of symbol table performance. */
52 static void
53 show_profile (void)
55 int i;
56 for (i = 0; i < 5; i++)
58 xfprintf(stderr, "m4: lookup mode %d called %d times, %d compares, "
59 "%d misses, %lld bytes\n",
60 i, profiles[i].entry, profiles[i].comparisons,
61 profiles[i].misses, profiles[i].bytes);
65 /* Like strcmp (S1, S2), but also track profiling statistics. */
66 static int
67 profile_strcmp (const char *s1, const char *s2)
69 int i = 1;
70 int result;
71 while (*s1 && *s1 == *s2)
73 s1++;
74 s2++;
75 i++;
77 result = (unsigned char) *s1 - (unsigned char) *s2;
78 profiles[current_mode].comparisons++;
79 if (result != 0)
80 profiles[current_mode].misses++;
81 profiles[current_mode].bytes += i;
82 return result;
85 # define strcmp profile_strcmp
86 #endif /* DEBUG_SYM */
89 /*----------------------------------------------------------------------.
90 | Initialise the symbol table, by allocating the necessary storage, and |
91 | zeroing all the entries. |
92 `----------------------------------------------------------------------*/
94 /* Pointer to symbol table. */
95 symbol **symtab;
97 void
98 symtab_init (void)
100 size_t i;
101 symbol **s;
103 s = symtab = (symbol **) xnmalloc (hash_table_size, sizeof (symbol *));
105 for (i = 0; i < hash_table_size; i++)
106 s[i] = NULL;
108 #ifdef DEBUG_SYM
109 atexit (show_profile); /* Ignore failure, since this is debug code. */
110 #endif /* DEBUG_SYM */
113 /*--------------------------------------------------.
114 | Return a hashvalue for a string, from GNU-emacs. |
115 `--------------------------------------------------*/
117 static size_t
118 hash (const char *s)
120 register size_t val = 0;
122 register const char *ptr = s;
123 register char ch;
125 while ((ch = *ptr++) != '\0')
126 val = (val << 7) + (val >> (sizeof (val) * CHAR_BIT - 7)) + ch;
127 return val;
130 /*--------------------------------------------.
131 | Free all storage associated with a symbol. |
132 `--------------------------------------------*/
134 void
135 free_symbol (symbol *sym)
137 if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
138 SYMBOL_DELETED (sym) = true;
139 else
141 free (SYMBOL_NAME (sym));
142 if (SYMBOL_TYPE (sym) == TOKEN_TEXT)
143 free (SYMBOL_TEXT (sym));
144 free (sym);
148 /*-------------------------------------------------------------------.
149 | Search in, and manipulation of the symbol table, are all done by |
150 | lookup_symbol (). It basically hashes NAME to a list in the |
151 | symbol table, and searches this list for the first occurrence of a |
152 | symbol with the name. |
154 | The MODE parameter determines what lookup_symbol () will do. It |
155 | can either just do a lookup, do a lookup and insert if not |
156 | present, do an insertion even if the name is already in the list, |
157 | delete the first occurrence of the name on the list, or delete all |
158 | occurrences of the name on the list. |
159 `-------------------------------------------------------------------*/
161 symbol *
162 lookup_symbol (const char *name, symbol_lookup mode)
164 size_t h;
165 int cmp = 1;
166 symbol *sym, *prev;
167 symbol **spp;
169 #if DEBUG_SYM
170 current_mode = mode;
171 profiles[mode].entry++;
172 #endif /* DEBUG_SYM */
174 h = hash (name);
175 sym = symtab[h % hash_table_size];
177 for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
179 cmp = strcmp (SYMBOL_NAME (sym), name);
180 if (cmp >= 0)
181 break;
184 /* If just searching, return status of search. */
186 if (mode == SYMBOL_LOOKUP)
187 return cmp == 0 ? sym : NULL;
189 /* Symbol not found. */
191 spp = (prev != NULL) ? &prev->next : &symtab[h % hash_table_size];
193 switch (mode)
196 case SYMBOL_INSERT:
198 /* If the name was found in the table, check whether it is still in
199 use by a pending expansion. If so, replace the table element with
200 a new one; if not, just return the symbol. If not found, just
201 insert the name, and return the new symbol. */
203 if (cmp == 0 && sym != NULL)
205 if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
207 symbol *old = sym;
208 SYMBOL_DELETED (old) = true;
210 sym = (symbol *) xmalloc (sizeof (symbol));
211 SYMBOL_TYPE (sym) = TOKEN_VOID;
212 SYMBOL_TRACED (sym) = SYMBOL_TRACED (old);
213 SYMBOL_NAME (sym) = xstrdup (name);
214 SYMBOL_SHADOWED (sym) = false;
215 SYMBOL_MACRO_ARGS (sym) = false;
216 SYMBOL_BLIND_NO_ARGS (sym) = false;
217 SYMBOL_DELETED (sym) = false;
218 SYMBOL_PENDING_EXPANSIONS (sym) = 0;
220 SYMBOL_NEXT (sym) = SYMBOL_NEXT (old);
221 SYMBOL_NEXT (old) = NULL;
222 (*spp) = sym;
224 return sym;
226 /* Fall through. */
228 case SYMBOL_PUSHDEF:
230 /* Insert a name in the symbol table. If there is already a symbol
231 with the name, insert this in front of it, and mark the old
232 symbol as "shadowed". */
234 sym = (symbol *) xmalloc (sizeof (symbol));
235 SYMBOL_TYPE (sym) = TOKEN_VOID;
236 SYMBOL_TRACED (sym) = false;
237 SYMBOL_NAME (sym) = xstrdup (name);
238 SYMBOL_SHADOWED (sym) = false;
239 SYMBOL_MACRO_ARGS (sym) = false;
240 SYMBOL_BLIND_NO_ARGS (sym) = false;
241 SYMBOL_DELETED (sym) = false;
242 SYMBOL_PENDING_EXPANSIONS (sym) = 0;
244 SYMBOL_NEXT (sym) = *spp;
245 (*spp) = sym;
247 if (mode == SYMBOL_PUSHDEF && cmp == 0)
249 SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = true;
250 SYMBOL_TRACED (sym) = SYMBOL_TRACED (SYMBOL_NEXT (sym));
252 return sym;
254 case SYMBOL_DELETE:
255 case SYMBOL_POPDEF:
257 /* Delete occurrences of symbols with NAME. SYMBOL_DELETE kills
258 all definitions, SYMBOL_POPDEF kills only the first.
259 However, if the last instance of a symbol is marked for
260 tracing, reinsert a placeholder in the table. And if the
261 definition is still in use, let the caller free the memory
262 after it is done with the symbol. */
264 if (cmp != 0 || sym == NULL)
265 return NULL;
267 bool traced = false;
268 if (SYMBOL_NEXT (sym) != NULL
269 && SYMBOL_SHADOWED (SYMBOL_NEXT (sym))
270 && mode == SYMBOL_POPDEF)
272 SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = false;
273 SYMBOL_TRACED (SYMBOL_NEXT (sym)) = SYMBOL_TRACED (sym);
275 else
276 traced = SYMBOL_TRACED (sym);
279 *spp = SYMBOL_NEXT (sym);
280 free_symbol (sym);
281 sym = *spp;
283 while (*spp != NULL && SYMBOL_SHADOWED (*spp)
284 && mode == SYMBOL_DELETE);
285 if (traced)
287 sym = (symbol *) xmalloc (sizeof (symbol));
288 SYMBOL_TYPE (sym) = TOKEN_VOID;
289 SYMBOL_TRACED (sym) = true;
290 SYMBOL_NAME (sym) = xstrdup (name);
291 SYMBOL_SHADOWED (sym) = false;
292 SYMBOL_MACRO_ARGS (sym) = false;
293 SYMBOL_BLIND_NO_ARGS (sym) = false;
294 SYMBOL_DELETED (sym) = false;
295 SYMBOL_PENDING_EXPANSIONS (sym) = 0;
297 SYMBOL_NEXT (sym) = *spp;
298 (*spp) = sym;
301 return NULL;
303 default:
304 assert (!"symbol_lookup");
305 abort ();
309 /*-----------------------------------------------------------------.
310 | The following function is used for the cases where we want to do |
311 | something to each and every symbol in the table. The function |
312 | hack_all_symbols () traverses the symbol table, and calls a |
313 | specified function FUNC for each symbol in the table. FUNC is |
314 | called with a pointer to the symbol, and the DATA argument. |
316 | FUNC may safely call lookup_symbol with mode SYMBOL_POPDEF or |
317 | SYMBOL_LOOKUP, but any other mode can break the iteration. |
318 `-----------------------------------------------------------------*/
320 void
321 hack_all_symbols (hack_symbol *func, void *data)
323 size_t h;
324 symbol *sym;
325 symbol *next;
327 for (h = 0; h < hash_table_size; h++)
329 /* We allow func to call SYMBOL_POPDEF, which can invalidate
330 sym, so we must grab the next element to traverse before
331 calling func. */
332 for (sym = symtab[h]; sym != NULL; sym = next)
334 next = SYMBOL_NEXT (sym);
335 func (sym, data);
340 #ifdef DEBUG_SYM
342 static void symtab_print_list (int i);
344 static void M4_GNUC_UNUSED
345 symtab_debug (void)
347 token_data td;
348 const char *text;
349 symbol *s;
350 int delete;
351 static int i;
353 while (next_token (&td, NULL, NULL, "<debug>") == TOKEN_WORD)
355 text = TOKEN_DATA_TEXT (&td);
356 if (*text == '_')
358 delete = 1;
359 text++;
361 else
362 delete = 0;
364 s = lookup_symbol (text, SYMBOL_LOOKUP);
366 if (s == NULL)
367 xprintf ("Name `%s' is unknown\n", text);
369 if (delete)
370 (void) lookup_symbol (text, SYMBOL_DELETE);
371 else
372 (void) lookup_symbol (text, SYMBOL_INSERT);
374 symtab_print_list (i++);
377 static void
378 symtab_print_list (int i)
380 symbol *sym;
381 size_t h;
383 xprintf ("Symbol dump #%d:\n", i);
384 for (h = 0; h < hash_table_size; h++)
385 for (sym = symtab[h]; sym != NULL; sym = sym->next)
386 xprintf ("\tname %s, bucket %lu, addr %p, next %p, "
387 "flags%s%s%s, pending %d\n",
388 SYMBOL_NAME (sym),
389 (unsigned long int) h, sym, SYMBOL_NEXT (sym),
390 SYMBOL_TRACED (sym) ? " traced" : "",
391 SYMBOL_SHADOWED (sym) ? " shadowed" : "",
392 SYMBOL_DELETED (sym) ? " deleted" : "",
393 SYMBOL_PENDING_EXPANSIONS (sym));
396 #endif /* DEBUG_SYM */