1 /* Compute lookahead criteria for Bison.
3 Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2020 Free Software
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 /* Find which rules need lookahead in each state, and which lookahead
23 tokens they accept. */
37 #include "muscle-tab.h"
43 /* goto_map[nterm - NTOKENS] -> number of gotos. */
44 goto_number
*goto_map
= NULL
;
45 goto_number ngotos
= 0;
46 state_number
*from_state
= NULL
;
47 state_number
*to_state
= NULL
;
48 bitsetv goto_follows
= NULL
;
50 /* Linked list of goto numbers. */
51 typedef struct goto_list
53 struct goto_list
*next
;
58 goto_list_new (goto_number value
, struct goto_list
*next
)
60 goto_list
*res
= xmalloc (sizeof *res
);
66 /* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
67 LArule[l] is applicable in the appropriate state when the next
68 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
71 static bitsetv LA
= NULL
;
75 /* "(p, A) includes (p', B)" iff
76 B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
78 Definition p.621 [DeRemer 1982].
80 INCLUDES[(p, A)] = [(p', B),...] */
81 static goto_number
**includes
;
83 /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
85 Definition p.621 [DeRemer 1982]. */
86 static goto_list
**lookback
;
89 goto_print (goto_number i
, FILE *out
)
91 const state_number src
= from_state
[i
];
92 const state_number dst
= to_state
[i
];
93 symbol_number var
= states
[dst
]->accessing_symbol
;
95 "goto[%zu] = (%d, %s, %d)", i
, src
, symbols
[var
]->tag
, dst
);
101 /* Count the number of gotos (ngotos) per nterm (goto_map). */
102 goto_map
= xcalloc (nnterms
+ 1, sizeof *goto_map
);
104 for (state_number s
= 0; s
< nstates
; ++s
)
106 transitions
*trans
= states
[s
]->transitions
;
107 for (int i
= trans
->num
- 1; 0 <= i
&& TRANSITION_IS_GOTO (trans
, i
); --i
)
110 /* Abort if (ngotos + 1) would overflow. */
111 aver (ngotos
!= GOTO_NUMBER_MAXIMUM
);
112 goto_map
[TRANSITION_SYMBOL (trans
, i
) - ntokens
]++;
116 goto_number
*temp_map
= xnmalloc (nnterms
+ 1, sizeof *temp_map
);
119 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
121 temp_map
[i
- ntokens
] = k
;
122 k
+= goto_map
[i
- ntokens
];
125 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
126 goto_map
[i
- ntokens
] = temp_map
[i
- ntokens
];
128 goto_map
[nsyms
- ntokens
] = ngotos
;
129 temp_map
[nsyms
- ntokens
] = ngotos
;
132 from_state
= xcalloc (ngotos
, sizeof *from_state
);
133 to_state
= xcalloc (ngotos
, sizeof *to_state
);
135 for (state_number s
= 0; s
< nstates
; ++s
)
137 const transitions
*trans
= states
[s
]->transitions
;
138 for (int i
= trans
->num
- 1; 0 <= i
&& TRANSITION_IS_GOTO (trans
, i
); --i
)
140 goto_number k
= temp_map
[TRANSITION_SYMBOL (trans
, i
) - ntokens
]++;
142 to_state
[k
] = trans
->states
[i
]->number
;
148 if (trace_flag
& trace_automaton
)
149 for (int i
= 0; i
< ngotos
; ++i
)
151 goto_print (i
, stderr
);
152 fputc ('\n', stderr
);
158 map_goto (state_number src
, symbol_number sym
)
160 goto_number low
= goto_map
[sym
- ntokens
];
161 goto_number high
= goto_map
[sym
- ntokens
+ 1] - 1;
166 goto_number middle
= (low
+ high
) / 2;
167 state_number s
= from_state
[middle
];
177 /* Print FOLLOWS for debugging. */
179 follows_print (const char* title
, FILE *out
)
181 fprintf (out
, "%s:\n", title
);
182 for (goto_number i
= 0; i
< ngotos
; ++i
)
184 fputs (" FOLLOWS[", out
);
187 bitset_iterator iter
;
189 BITSET_FOR_EACH (iter
, goto_follows
[i
], sym
, 0)
190 fprintf (out
, " %s", symbols
[sym
]->tag
);
196 /* Build goto_follows. */
198 initialize_goto_follows (void)
200 goto_number
**reads
= xnmalloc (ngotos
, sizeof *reads
);
201 goto_number
*edge
= xnmalloc (ngotos
, sizeof *edge
);
203 goto_follows
= bitsetv_create (ngotos
, ntokens
, BITSET_FIXED
);
205 for (goto_number i
= 0; i
< ngotos
; ++i
)
207 state_number dst
= to_state
[i
];
208 const transitions
*trans
= states
[dst
]->transitions
;
211 FOR_EACH_SHIFT (trans
, j
)
212 bitset_set (goto_follows
[i
], TRANSITION_SYMBOL (trans
, j
));
214 /* Gotos outgoing from DST. */
215 goto_number nedges
= 0;
216 for (; j
< trans
->num
; ++j
)
218 symbol_number sym
= TRANSITION_SYMBOL (trans
, j
);
219 if (nullable
[sym
- ntokens
])
221 assert (nedges
< ngotos
);
222 edge
[nedges
++] = map_goto (dst
, sym
);
230 reads
[i
] = xnmalloc (nedges
+ 1, sizeof reads
[i
][0]);
231 memcpy (reads
[i
], edge
, nedges
* sizeof edge
[0]);
232 reads
[i
][nedges
] = END_NODE
;
235 if (trace_flag
& trace_automaton
)
237 follows_print ("follows after shifts", stderr
);
238 relation_print ("reads", reads
, ngotos
, goto_print
, stderr
);
241 relation_digraph (reads
, ngotos
, goto_follows
);
242 if (trace_flag
& trace_automaton
)
243 follows_print ("follows after read", stderr
);
245 for (goto_number i
= 0; i
< ngotos
; ++i
)
252 /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
254 lookback_find_state (int lookback_index
)
257 for (int j
= 0; j
< nstates
; ++j
)
258 if (states
[j
]->reductions
259 && states
[j
]->reductions
->lookaheads
)
261 if (states
[j
]->reductions
->lookaheads
- LA
> lookback_index
)
267 /* Pacify "potential null pointer dereference" warning. */
273 /* Print LOOKBACK for debugging. */
275 lookback_print (FILE *out
)
277 fputs ("lookback:\n", out
);
278 for (int i
= 0; i
< nLA
; ++i
)
281 fprintf (out
, " %3d = ", i
);
282 const state
*s
= lookback_find_state (i
);
283 int rnum
= i
- (s
->reductions
->lookaheads
- LA
);
284 const rule
*r
= s
->reductions
->rules
[rnum
];
285 fprintf (out
, "(%3d, ", s
->number
);
286 rule_print (r
, NULL
, out
);
288 for (goto_list
*sp
= lookback
[i
]; sp
; sp
= sp
->next
)
291 goto_print (sp
->value
, out
);
298 /* Add (S, R) -> GOTONO to LOOKBACK.
300 "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
302 The goto number GOTONO, whose source is S (which is
305 add_lookback_edge (state
*s
, rule
const *r
, goto_number gotono
)
307 int ri
= state_reduction_find (s
, r
);
308 int idx
= (s
->reductions
->lookaheads
- LA
) + ri
;
309 lookback
[idx
] = goto_list_new (gotono
, lookback
[idx
]);
313 /* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
316 build_relations (void)
318 goto_number
*edge
= xnmalloc (ngotos
, sizeof *edge
);
319 state_number
*path
= xnmalloc (ritem_longest_rhs () + 1, sizeof *path
);
321 includes
= xnmalloc (ngotos
, sizeof *includes
);
323 /* For each goto (from SRC to DST labeled by nterm VAR), iterate
324 over each rule with VAR as LHS, and find the path PATH from SRC
325 labeled with the RHS of the rule. */
326 for (goto_number i
= 0; i
< ngotos
; ++i
)
328 const state_number src
= from_state
[i
];
329 const state_number dst
= to_state
[i
];
330 symbol_number var
= states
[dst
]->accessing_symbol
;
334 for (rule
**rulep
= derives
[var
- ntokens
]; *rulep
; ++rulep
)
336 rule
const *r
= *rulep
;
337 state
*s
= states
[src
];
340 /* Length of PATH. */
342 for (item_number
const *rp
= r
->rhs
; 0 <= *rp
; rp
++)
344 symbol_number sym
= item_number_as_symbol_number (*rp
);
345 s
= transitions_to (s
, sym
);
346 path
[length
++] = s
->number
;
349 /* S is the end of PATH. */
351 add_lookback_edge (s
, r
, i
);
353 /* Walk back PATH from penultimate to beginning.
355 The "0 <= p" part is actually useless: each rhs ends in a
356 rule number (for which ISVAR(...) is false), and there is
357 a sentinel (ritem[-1]=0) before the first rhs. */
358 for (int p
= length
- 2; 0 <= p
&& ISVAR (r
->rhs
[p
]); --p
)
360 symbol_number sym
= item_number_as_symbol_number (r
->rhs
[p
]);
361 goto_number g
= map_goto (path
[p
], sym
);
362 /* Insert G if not already in EDGE.
363 FIXME: linear search. A bitset instead? */
366 for (int j
= 0; !found
&& j
< nedges
; ++j
)
367 found
= edge
[j
] == g
;
370 assert (nedges
< ngotos
);
374 if (!nullable
[sym
- ntokens
])
379 if (trace_flag
& trace_automaton
)
381 goto_print (i
, stderr
);
382 fputs (" edges = ", stderr
);
383 for (int j
= 0; j
< nedges
; ++j
)
386 goto_print (edge
[j
], stderr
);
388 fputc ('\n', stderr
);
395 includes
[i
] = xnmalloc (nedges
+ 1, sizeof includes
[i
][0]);
396 for (int j
= 0; j
< nedges
; ++j
)
397 includes
[i
][j
] = edge
[j
];
398 includes
[i
][nedges
] = END_NODE
;
405 relation_transpose (&includes
, ngotos
);
406 if (trace_flag
& trace_automaton
)
407 relation_print ("includes", includes
, ngotos
, goto_print
, stderr
);
410 /* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
412 compute_follows (void)
414 relation_digraph (includes
, ngotos
, goto_follows
);
415 if (trace_flag
& trace_sets
)
416 follows_print ("follows after includes", stderr
);
417 for (goto_number i
= 0; i
< ngotos
; ++i
)
424 compute_lookaheads (void)
426 if (trace_flag
& trace_automaton
)
427 lookback_print (stderr
);
429 for (size_t i
= 0; i
< nLA
; ++i
)
430 for (goto_list
*sp
= lookback
[i
]; sp
; sp
= sp
->next
)
431 bitset_or (LA
[i
], LA
[i
], goto_follows
[sp
->value
]);
434 for (size_t i
= 0; i
< nLA
; ++i
)
435 LIST_FREE (goto_list
, lookback
[i
]);
440 /*------------------------------------------------------.
441 | Count the number of lookahead tokens required for S. |
442 `------------------------------------------------------*/
445 state_lookaheads_count (state
*s
, bool default_reduction_only_for_accept
)
447 const reductions
*reds
= s
->reductions
;
448 const transitions
*trans
= s
->transitions
;
450 /* Transitions are only disabled during conflict resolution, and that
451 hasn't happened yet, so there should be no need to check that
452 transition 0 hasn't been disabled before checking if it is a shift.
453 However, this check was performed at one time, so we leave it as an
455 aver (trans
->num
== 0 || !TRANSITION_IS_DISABLED (trans
, 0));
457 /* We need a lookahead either to distinguish different reductions
458 (i.e., there are two or more), or to distinguish a reduction from a
459 shift. Otherwise, it is straightforward, and the state is
460 'consistent'. However, do not treat a state with any reductions as
461 consistent unless it is the accepting state (because there is never
462 a lookahead token that makes sense there, and so no lookahead token
463 should be read) if the user has otherwise disabled default
467 || (reds
->num
== 1 && trans
->num
&& TRANSITION_IS_SHIFT (trans
, 0))
468 || (reds
->num
== 1 && reds
->rules
[0]->number
!= 0
469 && default_reduction_only_for_accept
));
471 return s
->consistent
? 0 : reds
->num
;
475 /*----------------------------------------------.
476 | Compute LA, NLA, and the lookaheads members. |
477 `----------------------------------------------*/
482 bool default_reduction_only_for_accept
;
484 char *default_reductions
=
485 muscle_percent_define_get ("lr.default-reduction");
486 default_reduction_only_for_accept
= STREQ (default_reductions
, "accepting");
487 free (default_reductions
);
490 /* Compute the total number of reductions requiring a lookahead. */
492 for (state_number i
= 0; i
< nstates
; ++i
)
493 nLA
+= state_lookaheads_count (states
[i
],
494 default_reduction_only_for_accept
);
495 /* Avoid having to special case 0. */
499 bitsetv pLA
= LA
= bitsetv_create (nLA
, ntokens
, BITSET_FIXED
);
501 /* Initialize the members LOOKAHEADS for each state whose reductions
502 require lookahead tokens. */
503 for (state_number i
= 0; i
< nstates
; ++i
)
505 int count
= state_lookaheads_count (states
[i
],
506 default_reduction_only_for_accept
);
509 states
[i
]->reductions
->lookaheads
= pLA
;
516 /*---------------------------------------------.
517 | Output the lookahead tokens for each state. |
518 `---------------------------------------------*/
521 lookaheads_print (FILE *out
)
523 fputs ("Lookaheads:\n", out
);
524 for (state_number i
= 0; i
< nstates
; ++i
)
526 const reductions
*reds
= states
[i
]->reductions
;
529 fprintf (out
, " State %d:\n", i
);
530 for (int j
= 0; j
< reds
->num
; ++j
)
532 fprintf (out
, " rule %d:", reds
->rules
[j
]->number
);
533 if (reds
->lookaheads
)
535 bitset_iterator iter
;
537 BITSET_FOR_EACH (iter
, reds
->lookaheads
[j
], k
, 0)
538 fprintf (out
, " %s", symbols
[k
]->tag
);
550 if (trace_flag
& trace_automaton
)
552 fputc ('\n', stderr
);
553 begin_use_class ("trace0", stderr
);
554 fprintf (stderr
, "lalr: begin");
555 end_use_class ("trace0", stderr
);
556 fputc ('\n', stderr
);
560 initialize_goto_follows ();
561 lookback
= xcalloc (nLA
, sizeof *lookback
);
564 compute_lookaheads ();
566 if (trace_flag
& trace_sets
)
567 lookaheads_print (stderr
);
568 if (trace_flag
& trace_automaton
)
570 begin_use_class ("trace0", stderr
);
571 fprintf (stderr
, "lalr: done");
572 end_use_class ("trace0", stderr
);
573 fputc ('\n', stderr
);
579 lalr_update_state_numbers (state_number old_to_new
[], state_number nstates_old
)
581 goto_number ngotos_reachable
= 0;
582 symbol_number nonterminal
= 0;
583 aver (nsyms
== nnterms
+ ntokens
);
585 for (goto_number i
= 0; i
< ngotos
; ++i
)
587 while (i
== goto_map
[nonterminal
])
588 goto_map
[nonterminal
++] = ngotos_reachable
;
589 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
591 if (old_to_new
[from_state
[i
]] != nstates_old
)
593 /* from_state[i] is not removed, so it and thus to_state[i] are
594 reachable, so to_state[i] != nstates_old. */
595 aver (old_to_new
[to_state
[i
]] != nstates_old
);
596 from_state
[ngotos_reachable
] = old_to_new
[from_state
[i
]];
597 to_state
[ngotos_reachable
] = old_to_new
[to_state
[i
]];
601 while (nonterminal
<= nnterms
)
603 aver (ngotos
== goto_map
[nonterminal
]);
604 goto_map
[nonterminal
++] = ngotos_reachable
;
606 ngotos
= ngotos_reachable
;
613 for (state_number s
= 0; s
< nstates
; ++s
)
614 states
[s
]->reductions
->lookaheads
= NULL
;