doc/bison.texi

   1 \input texinfo @c -*-texinfo-*-
   2 @comment %**start of header
   3 @setfilename bison.info
   4 @documentencoding UTF-8
   5 @include version.texi
   6 @settitle Bison @value{VERSION}
   7 @xrefautomaticsectiontitle on
   8
   9 @c cite a reference in text.  Could not find a means to have a single
  10 @c definition that looks nice in all the output formats.
  11 @iftex
  12 @macro tcite{ref}
  13 @ref{\ref\,,\ref\}
  14 @end macro
  15 @end iftex
  16 @ifnottex
  17 @macro tcite{ref}
  18 @ref{\ref\}
  19 @end macro
  20 @end ifnottex
  21
  22 @c cite a reference in parentheses.
  23 @iftex
  24 @macro pcite{ref}
  25 (@pxref{\ref\,,\ref\})
  26 @end macro
  27 @end iftex
  28 @ifnottex
  29 @macro pcite{ref}
  30 (@pxref{\ref\})
  31 @end macro
  32 @end ifnottex
  33
  34
  35 @c ## ---------------------- ##
  36 @c ## Diagnostics in color.  ##
  37 @c ## ---------------------- ##
  38
  39 @tex
  40 \gdef\rgbGreen{0 .80 0}
  41 \gdef\colorGreen{%
  42  \setcolor{\rgbGreen}%
  43 }
  44 \gdef\rgbYellow{1 .5 0}
  45 \gdef\colorYellow{%
  46  \setcolor{\rgbYellow}%
  47 }
  48 \gdef\rgbRed{1 0 0}
  49 \gdef\colorRed{%
  50  \setcolor{\rgbRed}%
  51 }
  52 \gdef\rgbBlue{0 0 1}
  53 \gdef\colorBlue{%
  54  \setcolor{\rgbBlue}%
  55 }
  56 \gdef\rgbPurple{0.50 0 0.50}
  57 \gdef\colorPurple{%
  58  \setcolor{\rgbPurple}%
  59 }
  60 \gdef\colorOff{%
  61  \setcolor{\maincolor}%
  62 }
  63
  64 \gdef\rgbError{0.80 0 0}
  65 \gdef\diagError{%
  66  \setcolor{\rgbError}%
  67 }
  68 \gdef\rgbNotice{0 0 0.80}
  69 \gdef\diagNotice{%
  70  \setcolor{\rgbNotice}%
  71 }
  72 \gdef\rgbWarning{0.50 0 0.50}
  73 \gdef\diagWarning{%
  74  \setcolor{\rgbWarning}%
  75 }
  76 \gdef\diagOff{%
  77  \setcolor{\maincolor}%
  78 }
  79 @end tex
  80
  81 @ifnottex
  82 @macro colorGreen
  83 @inlineraw{html, <span style="color:green">}
  84 @end macro
  85 @macro colorYellow
  86 @inlineraw{html, <span style="color:#ff8000">}
  87 @end macro
  88 @macro colorRed
  89 @inlineraw{html, <span style="color:red">}
  90 @end macro
  91 @macro colorBlue
  92 @inlineraw{html, <span style="color:blue">}
  93 @end macro
  94 @macro colorPurple
  95 @inlineraw{html, <span style="color:darkviolet">}
  96 @end macro
  97 @macro colorOff
  98 @inlineraw{html, </span>}
  99 @end macro
 100
 101 @macro diagError
 102 @inlineraw{html, <b style="color:red">}
 103 @end macro
 104 @macro diagNotice
 105 @inlineraw{html, <b style="color:darkcyan">}
 106 @end macro
 107 @macro diagWarning
 108 @inlineraw{html, <b style="color:darkviolet">}
 109 @end macro
 110 @macro diagOff
 111 @inlineraw{html, </b>}
 112 @end macro
 113 @end ifnottex
 114
 115 @macro green{text}
 116 @colorGreen{}\text\@colorOff{}
 117 @end macro
 118
 119 @macro yellow{text}
 120 @colorYellow{}\text\@colorOff{}
 121 @end macro
 122
 123 @macro red{text}
 124 @colorRed{}\text\@colorOff{}
 125 @end macro
 126
 127 @macro blue{text}
 128 @colorBlue{}\text\@colorOff{}
 129 @end macro
 130
 131 @macro purple{text}
 132 @colorPurple{}\text\@colorOff{}
 133 @end macro
 134
 135 @macro dwarning{text}
 136 @diagWarning{}\text\@diagOff{}
 137 @end macro
 138
 139 @macro derror{text}
 140 @diagError{}\text\@diagOff{}
 141 @end macro
 142
 143 @macro dnotice{text}
 144 @diagNotice{}\text\@diagOff{}
 145 @end macro
 146
 147 @finalout
 148
 149 @c SMALL BOOK version
 150 @c This edition has been formatted so that you can format and print it in
 151 @c the smallbook format.
 152 @c @smallbook
 153 @c @setchapternewpage odd
 154
 155 @c Set following if you want to document %default-prec and %no-default-prec.
 156 @c This feature is experimental and may change in future Bison versions.
 157 @c @set defaultprec
 158
 159 @ifnotinfo
 160 @syncodeindex fn cp
 161 @syncodeindex vr cp
 162 @syncodeindex tp cp
 163 @end ifnotinfo
 164 @ifinfo
 165 @synindex fn cp
 166 @synindex vr cp
 167 @synindex tp cp
 168 @end ifinfo
 169 @comment %**end of header
 170
 171 @copying
 172
 173 This manual (@value{UPDATED}) is for GNU Bison (version @value{VERSION}),
 174 the GNU parser generator.
 175
 176 Copyright @copyright{} 1988--1993, 1995, 1998--2015, 2018--2021 Free
 177 Software Foundation, Inc.
 178
 179 @quotation
 180 Permission is granted to copy, distribute and/or modify this document under
 181 the terms of the GNU Free Documentation License, Version 1.3 or any later
 182 version published by the Free Software Foundation; with no Invariant
 183 Sections, with the Front-Cover texts being ``A GNU Manual,'' and with the
 184 Back-Cover Texts as in (a) below.  A copy of the license is included in the
 185 section entitled ``GNU Free Documentation License.''
 186
 187 (a) The FSF's Back-Cover Text is: ``You have the freedom to copy and modify
 188 this GNU manual.  Buying copies from the FSF supports it in developing GNU
 189 and promoting software freedom.''
 190 @end quotation
 191 @end copying
 192
 193 @dircategory Software development
 194 @direntry
 195 * bison: (bison).       GNU parser generator (Yacc replacement).
 196 @end direntry
 197
 198 @titlepage
 199 @title Bison
 200 @subtitle The Yacc-compatible Parser Generator
 201 @subtitle @value{UPDATED}, Bison Version @value{VERSION}
 202
 203 @author by Charles Donnelly and Richard Stallman
 204
 205 @page
 206 @vskip 0pt plus 1filll
 207 @insertcopying
 208 @sp 2
 209 Published by the Free Software Foundation @*
 210 51 Franklin Street, Fifth Floor @*
 211 Boston, MA  02110-1301  USA @*
 212 Printed copies are available from the Free Software Foundation.@*
 213 ISBN 1-882114-44-2
 214 @sp 2
 215 Cover art by Etienne Suvasa.
 216 @end titlepage
 217
 218 @contents
 219
 220 @ifnottex
 221 @node Top
 222 @top Bison
 223 @insertcopying
 224 @end ifnottex
 225
 226 @menu
 227 * Introduction::        What GNU Bison is.
 228 * Conditions::          Conditions for using Bison and its output.
 229 * Copying::             The GNU General Public License says
 230                           how you can copy and share Bison.
 231
 232 Tutorial sections:
 233 * Concepts::            Basic concepts for understanding Bison.
 234 * Examples::            Three simple explained examples of using Bison.
 235
 236 Reference sections:
 237 * Grammar File::        Writing Bison declarations and rules.
 238 * Interface::           C-language interface to the parser function @code{yyparse}.
 239 * Algorithm::           How the Bison parser works at run-time.
 240 * Error Recovery::      Writing rules for error recovery.
 241 * Context Dependency::  What to do if your language syntax is too
 242                           messy for Bison to handle straightforwardly.
 243 * Debugging::           Understanding or debugging Bison parsers.
 244 * Invocation::          How to run Bison (to produce the parser implementation).
 245 * Other Languages::     Creating C++, D and Java parsers.
 246 * History::             How Bison came to be
 247 * Versioning::          Dealing with Bison versioning
 248 * FAQ::                 Frequently Asked Questions
 249 * Table of Symbols::    All the keywords of the Bison language are explained.
 250 * Glossary::            Basic concepts are explained.
 251 * GNU Free Documentation License:: Copying and sharing this manual
 252 * Bibliography::        Publications cited in this manual.
 253 * Index of Terms::      Cross-references to the text.
 254
 255 @detailmenu
 256  --- The Detailed Node Listing ---
 257
 258 The Concepts of Bison
 259
 260 * Language and Grammar:: Languages and context-free grammars,
 261                            as mathematical ideas.
 262 * Grammar in Bison::     How we represent grammars for Bison's sake.
 263 * Semantic Values::      Each token or syntactic grouping can have
 264                            a semantic value (the value of an integer,
 265                            the name of an identifier, etc.).
 266 * Semantic Actions::     Each rule can have an action containing C code.
 267 * GLR Parsers::          Writing parsers for general context-free languages.
 268 * Locations::            Overview of location tracking.
 269 * Bison Parser::         What are Bison's input and output,
 270                            how is the output used?
 271 * Stages::               Stages in writing and running Bison grammars.
 272 * Grammar Layout::       Overall structure of a Bison grammar file.
 273
 274 Writing GLR Parsers
 275
 276 * Simple GLR Parsers::     Using GLR parsers on unambiguous grammars.
 277 * Merging GLR Parses::     Using GLR parsers to resolve ambiguities.
 278 * GLR Semantic Actions::   Considerations for semantic values and deferred actions.
 279 * Semantic Predicates::    Controlling a parse with arbitrary computations.
 280
 281 Examples
 282
 283 * RPN Calc::               Reverse Polish Notation Calculator;
 284                              a first example with no operator precedence.
 285 * Infix Calc::             Infix (algebraic) notation calculator.
 286                              Operator precedence is introduced.
 287 * Simple Error Recovery::  Continuing after syntax errors.
 288 * Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
 289 * Multi-function Calc::    Calculator with memory and trig functions.
 290                              It uses multiple data-types for semantic values.
 291 * Exercises::              Ideas for improving the multi-function calculator.
 292
 293 Reverse Polish Notation Calculator
 294
 295 * Rpcalc Declarations::    Prologue (declarations) for rpcalc.
 296 * Rpcalc Rules::           Grammar Rules for rpcalc, with explanation.
 297 * Rpcalc Lexer::           The lexical analyzer.
 298 * Rpcalc Main::            The controlling function.
 299 * Rpcalc Error::           The error reporting function.
 300 * Rpcalc Generate::        Running Bison on the grammar file.
 301 * Rpcalc Compile::         Run the C compiler on the output code.
 302
 303 Grammar Rules for @code{rpcalc}
 304
 305 * Rpcalc Input::            Explanation of the @code{input} nonterminal
 306 * Rpcalc Line::             Explanation of the @code{line} nonterminal
 307 * Rpcalc Exp::              Explanation of the @code{exp} nonterminal
 308
 309 Location Tracking Calculator: @code{ltcalc}
 310
 311 * Ltcalc Declarations::    Bison and C declarations for ltcalc.
 312 * Ltcalc Rules::           Grammar rules for ltcalc, with explanations.
 313 * Ltcalc Lexer::           The lexical analyzer.
 314
 315 Multi-Function Calculator: @code{mfcalc}
 316
 317 * Mfcalc Declarations::    Bison declarations for multi-function calculator.
 318 * Mfcalc Rules::           Grammar rules for the calculator.
 319 * Mfcalc Symbol Table::    Symbol table management subroutines.
 320 * Mfcalc Lexer::           The lexical analyzer.
 321 * Mfcalc Main::            The controlling function.
 322
 323 Bison Grammar Files
 324
 325 * Grammar Outline::    Overall layout of the grammar file.
 326 * Symbols::            Terminal and nonterminal symbols.
 327 * Rules::              How to write grammar rules.
 328 * Semantics::          Semantic values and actions.
 329 * Tracking Locations:: Locations and actions.
 330 * Named References::   Using named references in actions.
 331 * Declarations::       All kinds of Bison declarations are described here.
 332 * Multiple Parsers::   Putting more than one Bison parser in one program.
 333
 334 Outline of a Bison Grammar
 335
 336 * Prologue::              Syntax and usage of the prologue.
 337 * Prologue Alternatives:: Syntax and usage of alternatives to the prologue.
 338 * Bison Declarations::    Syntax and usage of the Bison declarations section.
 339 * Grammar Rules::         Syntax and usage of the grammar rules section.
 340 * Epilogue::              Syntax and usage of the epilogue.
 341
 342 Grammar Rules
 343
 344 * Rules Syntax::   Syntax of the rules.
 345 * Empty Rules::    Symbols that can match the empty string.
 346 * Recursion::      Writing recursive rules.
 347
 348
 349 Defining Language Semantics
 350
 351 * Value Type::        Specifying one data type for all semantic values.
 352 * Multiple Types::    Specifying several alternative data types.
 353 * Type Generation::   Generating the semantic value type.
 354 * Union Decl::        Declaring the set of all semantic value types.
 355 * Structured Value Type::  Providing a structured semantic value type.
 356 * Actions::           An action is the semantic definition of a grammar rule.
 357 * Action Types::      Specifying data types for actions to operate on.
 358 * Midrule Actions::   Most actions go at the end of a rule.
 359                       This says when, why and how to use the exceptional
 360                         action in the middle of a rule.
 361
 362 Actions in Midrule
 363
 364 * Using Midrule Actions::       Putting an action in the middle of a rule.
 365 * Typed Midrule Actions::       Specifying the semantic type of their values.
 366 * Midrule Action Translation::  How midrule actions are actually processed.
 367 * Midrule Conflicts::           Midrule actions can cause conflicts.
 368
 369 Tracking Locations
 370
 371 * Location Type::               Specifying a data type for locations.
 372 * Actions and Locations::       Using locations in actions.
 373 * Printing Locations::          Defining how locations are printed.
 374 * Location Default Action::     Defining a general way to compute locations.
 375
 376 Bison Declarations
 377
 378 * Require Decl::      Requiring a Bison version.
 379 * Token Decl::        Declaring terminal symbols.
 380 * Precedence Decl::   Declaring terminals with precedence and associativity.
 381 * Type Decl::         Declaring the choice of type for a nonterminal symbol.
 382 * Symbol Decls::      Summary of the Syntax of Symbol Declarations.
 383 * Initial Action Decl::  Code run before parsing starts.
 384 * Destructor Decl::   Declaring how symbols are freed.
 385 * Printer Decl::      Declaring how symbol values are displayed.
 386 * Expect Decl::       Suppressing warnings about parsing conflicts.
 387 * Start Decl::        Specifying the start symbol.
 388 * Pure Decl::         Requesting a reentrant parser.
 389 * Push Decl::         Requesting a push parser.
 390 * Decl Summary::      Table of all Bison declarations.
 391 * %define Summary::   Defining variables to adjust Bison's behavior.
 392 * %code Summary::     Inserting code into the parser source.
 393
 394 Parser C-Language Interface
 395
 396 * Parser Function::         How to call @code{yyparse} and what it returns.
 397 * Push Parser Interface::   How to create, use, and destroy push parsers.
 398 * Lexical::                 You must supply a function @code{yylex}
 399                               which reads tokens.
 400 * Error Reporting::         Passing error messages to the user.
 401 * Action Features::         Special features for use in actions.
 402 * Internationalization::    How to let the parser speak in the user's
 403                               native language.
 404
 405 The Lexical Analyzer Function @code{yylex}
 406
 407 * Calling Convention::  How @code{yyparse} calls @code{yylex}.
 408 * Special Tokens::      Signaling end-of-file and errors to the parser.
 409 * Tokens from Literals:: Finding token kinds from string aliases.
 410 * Token Values::        How @code{yylex} must return the semantic value
 411                           of the token it has read.
 412 * Token Locations::     How @code{yylex} must return the text location
 413                           (line number, etc.) of the token, if the
 414                           actions want that.
 415 * Pure Calling::        How the calling convention differs in a pure parser
 416                           (@pxref{Pure Decl}).
 417
 418 Error Reporting
 419
 420 * Error Reporting Function::         You must supply a @code{yyerror} function.
 421 * Syntax Error Reporting Function::  You can supply a @code{yyreport_syntax_error} function.
 422
 423 Parser Internationalization
 424
 425 * Enabling I18n::    Preparing your project to support internationalization.
 426 * Token I18n::       Preparing tokens for internationalization in error messages.
 427
 428 The Bison Parser Algorithm
 429
 430 * Lookahead::         Parser looks one token ahead when deciding what to do.
 431 * Shift/Reduce::      Conflicts: when either shifting or reduction is valid.
 432 * Precedence::        Operator precedence works by resolving conflicts.
 433 * Contextual Precedence::  When an operator's precedence depends on context.
 434 * Parser States::     The parser is a finite-state-machine with stack.
 435 * Reduce/Reduce::     When two rules are applicable in the same situation.
 436 * Mysterious Conflicts:: Conflicts that look unjustified.
 437 * Tuning LR::         How to tune fundamental aspects of LR-based parsing.
 438 * Generalized LR Parsing::  Parsing arbitrary context-free grammars.
 439 * Memory Management:: What happens when memory is exhausted.  How to avoid it.
 440
 441 Operator Precedence
 442
 443 * Why Precedence::    An example showing why precedence is needed.
 444 * Using Precedence::  How to specify precedence and associativity.
 445 * Precedence Only::   How to specify precedence only.
 446 * Precedence Examples::  How these features are used in the previous example.
 447 * How Precedence::    How they work.
 448 * Non Operators::     Using precedence for general conflicts.
 449
 450 Tuning LR
 451
 452 * LR Table Construction:: Choose a different construction algorithm.
 453 * Default Reductions::    Disable default reductions.
 454 * LAC::                   Correct lookahead sets in the parser states.
 455 * Unreachable States::    Keep unreachable parser states for debugging.
 456
 457 Handling Context Dependencies
 458
 459 * Semantic Tokens::   Token parsing can depend on the semantic context.
 460 * Lexical Tie-ins::   Token parsing can depend on the syntactic context.
 461 * Tie-in Recovery::   Lexical tie-ins have implications for how
 462                         error recovery rules must be written.
 463
 464 Debugging Your Parser
 465
 466 * Counterexamples::   Understanding conflicts.
 467 * Understanding::     Understanding the structure of your parser.
 468 * Graphviz::          Getting a visual representation of the parser.
 469 * Xml::               Getting a markup representation of the parser.
 470 * Tracing::           Tracing the execution of your parser.
 471
 472 Tracing Your Parser
 473
 474 * Enabling Traces::    Activating run-time trace support
 475 * Mfcalc Traces::      Extending @code{mfcalc} to support traces
 476
 477 Invoking Bison
 478
 479 * Bison Options::     All the options described in detail,
 480                         in alphabetical order by short options.
 481 * Option Cross Key::  Alphabetical list of long options.
 482 * Yacc Library::      Yacc-compatible @code{yylex} and @code{main}.
 483
 484 Bison Options
 485
 486 * Operation Modes::    Options controlling the global behavior of @command{bison}
 487 * Diagnostics::        Options controlling the diagnostics
 488 * Tuning the Parser::  Options changing the generated parsers
 489 * Output Files::       Options controlling the output
 490
 491 Parsers Written In Other Languages
 492
 493 * C++ Parsers::                 The interface to generate C++ parser classes
 494 * D Parsers::                   The interface to generate D parser classes
 495 * Java Parsers::                The interface to generate Java parser classes
 496
 497 C++ Parsers
 498
 499 * A Simple C++ Example::        A short introduction to C++ parsers
 500 * C++ Bison Interface::         Asking for C++ parser generation
 501 * C++ Parser Interface::        Instantiating and running the parser
 502 * C++ Semantic Values::         %union vs. C++
 503 * C++ Location Values::         The position and location classes
 504 * C++ Parser Context::          You can supply a @code{report_syntax_error} function.
 505 * C++ Scanner Interface::       Exchanges between yylex and parse
 506 * A Complete C++ Example::      Demonstrating their use
 507
 508 C++ Location Values
 509
 510 * C++ position::                  One point in the source file
 511 * C++ location::                  Two points in the source file
 512 * Exposing the Location Classes:: Using the Bison location class in your
 513                                   project
 514 * User Defined Location Type::    Required interface for locations
 515
 516 A Complete C++ Example
 517
 518 * Calc++ --- C++ Calculator::   The specifications
 519 * Calc++ Parsing Driver::       An active parsing context
 520 * Calc++ Parser::               A parser class
 521 * Calc++ Scanner::              A pure C++ Flex scanner
 522 * Calc++ Top Level::            Conducting the band
 523
 524 D Parsers
 525
 526 * D Bison Interface::        Asking for D parser generation
 527 * D Semantic Values::        %token and %nterm vs. D
 528 * D Location Values::        The position and location classes
 529 * D Parser Interface::       Instantiating and running the parser
 530 * D Parser Context Interface:: Circumstances of a syntax error
 531 * D Scanner Interface::      Specifying the scanner for the parser
 532 * D Action Features::        Special features for use in actions
 533 * D Push Parser Interface::  Instantiating and running the push parser
 534 * D Complete Symbols::       Using token constructors
 535
 536 Java Parsers
 537
 538 * Java Bison Interface::        Asking for Java parser generation
 539 * Java Semantic Values::        %token and %nterm vs. Java
 540 * Java Location Values::        The position and location classes
 541 * Java Parser Interface::       Instantiating and running the parser
 542 * Java Parser Context Interface:: Circumstances of a syntax error
 543 * Java Scanner Interface::      Specifying the scanner for the parser
 544 * Java Action Features::        Special features for use in actions
 545 * Java Push Parser Interface::  Instantiating and running the push parser
 546 * Java Differences::            Differences between C/C++ and Java Grammars
 547 * Java Declarations Summary::   List of Bison declarations used with Java
 548
 549 A Brief History of the Greater Ungulates
 550
 551 * Yacc::                        The original Yacc
 552 * yacchack::                    An obscure early implementation of reentrancy
 553 * Byacc::                       Berkeley Yacc
 554 * Bison::                       This program
 555 * Other Ungulates::             Similar programs
 556
 557 Bison Version Compatibility
 558
 559 * Versioning::                  Dealing with Bison versioning
 560
 561 Frequently Asked Questions
 562
 563 * Memory Exhausted::            Breaking the Stack Limits
 564 * How Can I Reset the Parser::  @code{yyparse} Keeps some State
 565 * Strings are Destroyed::       @code{yylval} Loses Track of Strings
 566 * Implementing Gotos/Loops::    Control Flow in the Calculator
 567 * Multiple start-symbols::      Factoring closely related grammars
 568 * Enabling Relocatability::     Moving Bison/using it through network shares
 569 * Secure?  Conform?::           Is Bison POSIX safe?
 570 * I can't build Bison::         Troubleshooting
 571 * Where can I find help?::      Troubleshouting
 572 * Bug Reports::                 Troublereporting
 573 * More Languages::              Parsers in C++, Java, and so on
 574 * Beta Testing::                Experimenting development versions
 575 * Mailing Lists::               Meeting other Bison users
 576
 577 Copying This Manual
 578
 579 * GNU Free Documentation License:: Copying and sharing this manual
 580
 581 @end detailmenu
 582 @end menu
 583
 584 @node Introduction
 585 @unnumbered Introduction
 586 @cindex introduction
 587
 588 @dfn{Bison} is a general-purpose parser generator that converts an annotated
 589 context-free grammar into a deterministic LR or generalized LR (GLR) parser
 590 employing LALR(1), IELR(1) or canonical LR(1) parser tables.  Once you are
 591 proficient with Bison, you can use it to develop a wide range of language
 592 parsers, from those used in simple desk calculators to complex programming
 593 languages.
 594
 595 Bison is upward compatible with Yacc: all properly-written Yacc grammars
 596 ought to work with Bison with no change.  Anyone familiar with Yacc should
 597 be able to use Bison with little trouble.  You need to be fluent in C, C++,
 598 D or Java programming in order to use Bison or to understand this manual.
 599
 600 We begin with tutorial chapters that explain the basic concepts of
 601 using Bison and show three explained examples, each building on the
 602 last.  If you don't know Bison or Yacc, start by reading these
 603 chapters.  Reference chapters follow, which describe specific aspects
 604 of Bison in detail.
 605
 606 Bison was written originally by Robert Corbett.  Richard Stallman made
 607 it Yacc-compatible.  Wilfred Hansen of Carnegie Mellon University
 608 added multi-character string literals and other features.  Since then,
 609 Bison has grown more robust and evolved many other new features thanks
 610 to the hard work of a long list of volunteers.  For details, see the
 611 @file{THANKS} and @file{ChangeLog} files included in the Bison
 612 distribution.
 613
 614 This edition corresponds to version @value{VERSION} of Bison.
 615
 616 @node Conditions
 617 @unnumbered Conditions for Using Bison
 618
 619 The distribution terms for Bison-generated parsers permit using the parsers
 620 in nonfree programs.  Before Bison version 2.2, these extra permissions
 621 applied only when Bison was generating LALR(1) parsers in C@.  And before
 622 Bison version 1.24, Bison-generated parsers could be used only in programs
 623 that were free software.
 624
 625 The other GNU programming tools, such as the GNU C compiler, have never had
 626 such a requirement.  They could always be used for nonfree software.  The
 627 reason Bison was different was not due to a special policy decision; it
 628 resulted from applying the usual General Public License to all of the Bison
 629 source code.
 630
 631 The main output of the Bison utility---the Bison parser implementation
 632 file---contains a verbatim copy of a sizable piece of Bison, which is the
 633 code for the parser's implementation.  (The actions from your grammar are
 634 inserted into this implementation at one point, but most of the rest of the
 635 implementation is not changed.)  When we applied the GPL terms to the
 636 skeleton code for the parser's implementation, the effect was to restrict
 637 the use of Bison output to free software.
 638
 639 We didn't change the terms because of sympathy for people who want to make
 640 software proprietary.  @strong{Software should be free.}  But we concluded
 641 that limiting Bison's use to free software was doing little to encourage
 642 people to make other software free.  So we decided to make the practical
 643 conditions for using Bison match the practical conditions for using the
 644 other GNU tools.
 645
 646 This exception applies when Bison is generating code for a parser.  You can
 647 tell whether the exception applies to a Bison output file by inspecting the
 648 file for text beginning with ``As a special exception@dots{}''.  The text
 649 spells out the exact terms of the exception.
 650
 651 @node Copying
 652 @unnumbered GNU GENERAL PUBLIC LICENSE
 653 @include gpl-3.0.texi
 654
 655 @node Concepts
 656 @chapter The Concepts of Bison
 657
 658 This chapter introduces many of the basic concepts without which the details
 659 of Bison will not make sense.  If you do not already know how to use Bison
 660 or Yacc, we suggest you start by reading this chapter carefully.
 661
 662 @menu
 663 * Language and Grammar:: Languages and context-free grammars,
 664                            as mathematical ideas.
 665 * Grammar in Bison::     How we represent grammars for Bison's sake.
 666 * Semantic Values::      Each token or syntactic grouping can have
 667                            a semantic value (the value of an integer,
 668                            the name of an identifier, etc.).
 669 * Semantic Actions::     Each rule can have an action containing C code.
 670 * GLR Parsers::          Writing parsers for general context-free languages.
 671 * Locations::            Overview of location tracking.
 672 * Bison Parser::         What are Bison's input and output,
 673                            how is the output used?
 674 * Stages::               Stages in writing and running Bison grammars.
 675 * Grammar Layout::       Overall structure of a Bison grammar file.
 676 @end menu
 677
 678 @node Language and Grammar
 679 @section Languages and Context-Free Grammars
 680
 681 @cindex context-free grammar
 682 @cindex grammar, context-free
 683 In order for Bison to parse a language, it must be described by a
 684 @dfn{context-free grammar}.  This means that you specify one or more
 685 @dfn{syntactic groupings} and give rules for constructing them from their
 686 parts.  For example, in the C language, one kind of grouping is called an
 687 `expression'.  One rule for making an expression might be, ``An expression
 688 can be made of a minus sign and another expression''.  Another would be,
 689 ``An expression can be an integer''.  As you can see, rules are often
 690 recursive, but there must be at least one rule which leads out of the
 691 recursion.
 692
 693 @cindex BNF
 694 @cindex Backus-Naur form
 695 The most common formal system for presenting such rules for humans to read
 696 is @dfn{Backus-Naur Form} or ``BNF'', which was developed in
 697 order to specify the language Algol 60.  Any grammar expressed in
 698 BNF is a context-free grammar.  The input to Bison is
 699 essentially machine-readable BNF.
 700
 701 @cindex LALR grammars
 702 @cindex IELR grammars
 703 @cindex LR grammars
 704 There are various important subclasses of context-free grammars.  Although
 705 it can handle almost all context-free grammars, Bison is optimized for what
 706 are called LR(1) grammars.  In brief, in these grammars, it must be possible
 707 to tell how to parse any portion of an input string with just a single token
 708 of lookahead.  For historical reasons, Bison by default is limited by the
 709 additional restrictions of LALR(1), which is hard to explain simply.
 710 @xref{Mysterious Conflicts}, for more information on this.  You can escape
 711 these additional restrictions by requesting IELR(1) or canonical LR(1)
 712 parser tables.  @xref{LR Table Construction}, to learn how.
 713
 714 @cindex GLR parsing
 715 @cindex generalized LR (GLR) parsing
 716 @cindex ambiguous grammars
 717 @cindex nondeterministic parsing
 718
 719 Parsers for LR(1) grammars are @dfn{deterministic}, meaning
 720 roughly that the next grammar rule to apply at any point in the input is
 721 uniquely determined by the preceding input and a fixed, finite portion
 722 (called a @dfn{lookahead}) of the remaining input.  A context-free
 723 grammar can be @dfn{ambiguous}, meaning that there are multiple ways to
 724 apply the grammar rules to get the same inputs.  Even unambiguous
 725 grammars can be @dfn{nondeterministic}, meaning that no fixed
 726 lookahead always suffices to determine the next grammar rule to apply.
 727 With the proper declarations, Bison is also able to parse these more
 728 general context-free grammars, using a technique known as GLR
 729 parsing (for Generalized LR).  Bison's GLR parsers
 730 are able to handle any context-free grammar for which the number of
 731 possible parses of any given string is finite.
 732
 733 @cindex symbols (abstract)
 734 @cindex token
 735 @cindex syntactic grouping
 736 @cindex grouping, syntactic
 737 In the formal grammatical rules for a language, each kind of syntactic unit
 738 or grouping is named by a @dfn{symbol}.  Those which are built by grouping
 739 smaller constructs according to grammatical rules are called
 740 @dfn{nonterminal symbols}; those which can't be subdivided are called
 741 @dfn{terminal symbols} or @dfn{token kinds}.  We call a piece of input
 742 corresponding to a single terminal symbol a @dfn{token}, and a piece
 743 corresponding to a single nonterminal symbol a @dfn{grouping}.
 744
 745 We can use the C language as an example of what symbols, terminal and
 746 nonterminal, mean.  The tokens of C are identifiers, constants (numeric
 747 and string), and the various keywords, arithmetic operators and
 748 punctuation marks.  So the terminal symbols of a grammar for C include
 749 `identifier', `number', `string', plus one symbol for each keyword,
 750 operator or punctuation mark: `if', `return', `const', `static', `int',
 751 `char', `plus-sign', `open-brace', `close-brace', `comma' and many more.
 752 (These tokens can be subdivided into characters, but that is a matter of
 753 lexicography, not grammar.)
 754
 755 Here is a simple C function subdivided into tokens:
 756
 757 @example
 758 int             /* @r{keyword `int'} */
 759 square (int x)  /* @r{identifier, open-paren, keyword `int',}
 760                    @r{identifier, close-paren} */
 761 @{               /* @r{open-brace} */
 762   return x * x; /* @r{keyword `return', identifier, asterisk,}
 763                    @r{identifier, semicolon} */
 764 @}               /* @r{close-brace} */
 765 @end example
 766
 767 The syntactic groupings of C include the expression, the statement, the
 768 declaration, and the function definition.  These are represented in the
 769 grammar of C by nonterminal symbols `expression', `statement',
 770 `declaration' and `function definition'.  The full grammar uses dozens of
 771 additional language constructs, each with its own nonterminal symbol, in
 772 order to express the meanings of these four.  The example above is a
 773 function definition; it contains one declaration, and one statement.  In
 774 the statement, each @samp{x} is an expression and so is @samp{x * x}.
 775
 776 Each nonterminal symbol must have grammatical rules showing how it is made
 777 out of simpler constructs.  For example, one kind of C statement is the
 778 @code{return} statement; this would be described with a grammar rule which
 779 reads informally as follows:
 780
 781 @quotation
 782 A `statement' can be made of a `return' keyword, an `expression' and a
 783 `semicolon'.
 784 @end quotation
 785
 786 @noindent
 787 There would be many other rules for `statement', one for each kind of
 788 statement in C.
 789
 790 @cindex start symbol
 791 One nonterminal symbol must be distinguished as the special one which
 792 defines a complete utterance in the language.  It is called the @dfn{start
 793 symbol}.  In a compiler, this means a complete input program.  In the C
 794 language, the nonterminal symbol `sequence of definitions and declarations'
 795 plays this role.
 796
 797 For example, @samp{1 + 2} is a valid C expression---a valid part of a C
 798 program---but it is not valid as an @emph{entire} C program.  In the
 799 context-free grammar of C, this follows from the fact that `expression' is
 800 not the start symbol.
 801
 802 The Bison parser reads a sequence of tokens as its input, and groups the
 803 tokens using the grammar rules.  If the input is valid, the end result is
 804 that the entire token sequence reduces to a single grouping whose symbol is
 805 the grammar's start symbol.  If we use a grammar for C, the entire input
 806 must be a `sequence of definitions and declarations'.  If not, the parser
 807 reports a syntax error.
 808
 809 @node Grammar in Bison
 810 @section From Formal Rules to Bison Input
 811 @cindex Bison grammar
 812 @cindex grammar, Bison
 813 @cindex formal grammar
 814
 815 A formal grammar is a mathematical construct.  To define the language
 816 for Bison, you must write a file expressing the grammar in Bison syntax:
 817 a @dfn{Bison grammar} file.  @xref{Grammar File}.
 818
 819 A nonterminal symbol in the formal grammar is represented in Bison input
 820 as an identifier, like an identifier in C@.  By convention, it should be
 821 in lower case, such as @code{expr}, @code{stmt} or @code{declaration}.
 822
 823 The Bison representation for a terminal symbol is also called a @dfn{token
 824 kind}.  Token kinds as well can be represented as C-like identifiers.  By
 825 convention, these identifiers should be upper case to distinguish them from
 826 nonterminals: for example, @code{INTEGER}, @code{IDENTIFIER}, @code{IF} or
 827 @code{RETURN}.  A terminal symbol that stands for a particular keyword in
 828 the language should be named after that keyword converted to upper case.
 829 The terminal symbol @code{error} is reserved for error recovery.
 830 @xref{Symbols}.
 831
 832 A terminal symbol can also be represented as a character literal, just like
 833 a C character constant.  You should do this whenever a token is just a
 834 single character (parenthesis, plus-sign, etc.): use that same character in
 835 a literal as the terminal symbol for that token.
 836
 837 A third way to represent a terminal symbol is with a C string constant
 838 containing several characters.  @xref{Symbols}, for more information.
 839
 840 The grammar rules also have an expression in Bison syntax.  For example,
 841 here is the Bison rule for a C @code{return} statement.  The semicolon in
 842 quotes is a literal character token, representing part of the C syntax for
 843 the statement; the naked semicolon, and the colon, are Bison punctuation
 844 used in every rule.
 845
 846 @example
 847 stmt: RETURN expr ';' ;
 848 @end example
 849
 850 @noindent
 851 @xref{Rules}.
 852
 853 @node Semantic Values
 854 @section Semantic Values
 855 @cindex semantic value
 856 @cindex value, semantic
 857
 858 A formal grammar selects tokens only by their classifications: for example,
 859 if a rule mentions the terminal symbol `integer constant', it means that
 860 @emph{any} integer constant is grammatically valid in that position.  The
 861 precise value of the constant is irrelevant to how to parse the input: if
 862 @samp{x+4} is grammatical then @samp{x+1} or @samp{x+3989} is equally
 863 grammatical.
 864
 865 But the precise value is very important for what the input means once it is
 866 parsed.  A compiler is useless if it fails to distinguish between 4, 1 and
 867 3989 as constants in the program!  Therefore, each token in a Bison grammar
 868 has both a token kind and a @dfn{semantic value}.  @xref{Semantics}, for
 869 details.
 870
 871 The token kind is a terminal symbol defined in the grammar, such as
 872 @code{INTEGER}, @code{IDENTIFIER} or @code{','}.  It tells everything you
 873 need to know to decide where the token may validly appear and how to group
 874 it with other tokens.  The grammar rules know nothing about tokens except
 875 their kinds.
 876
 877 The semantic value has all the rest of the information about the
 878 meaning of the token, such as the value of an integer, or the name of an
 879 identifier.  (A token such as @code{','} which is just punctuation doesn't
 880 need to have any semantic value.)
 881
 882 For example, an input token might be classified as token kind @code{INTEGER}
 883 and have the semantic value 4.  Another input token might have the same
 884 token kind @code{INTEGER} but value 3989.  When a grammar rule says that
 885 @code{INTEGER} is allowed, either of these tokens is acceptable because each
 886 is an @code{INTEGER}.  When the parser accepts the token, it keeps track of
 887 the token's semantic value.
 888
 889 Each grouping can also have a semantic value as well as its nonterminal
 890 symbol.  For example, in a calculator, an expression typically has a
 891 semantic value that is a number.  In a compiler for a programming
 892 language, an expression typically has a semantic value that is a tree
 893 structure describing the meaning of the expression.
 894
 895 @node Semantic Actions
 896 @section Semantic Actions
 897 @cindex semantic actions
 898 @cindex actions, semantic
 899
 900 In order to be useful, a program must do more than parse input; it must
 901 also produce some output based on the input.  In a Bison grammar, a grammar
 902 rule can have an @dfn{action} made up of C statements.  Each time the
 903 parser recognizes a match for that rule, the action is executed.
 904 @xref{Actions}.
 905
 906 Most of the time, the purpose of an action is to compute the semantic value
 907 of the whole construct from the semantic values of its parts.  For example,
 908 suppose we have a rule which says an expression can be the sum of two
 909 expressions.  When the parser recognizes such a sum, each of the
 910 subexpressions has a semantic value which describes how it was built up.
 911 The action for this rule should create a similar sort of value for the
 912 newly recognized larger expression.
 913
 914 For example, here is a rule that says an expression can be the sum of
 915 two subexpressions:
 916
 917 @example
 918 expr: expr '+' expr   @{ $$ = $1 + $3; @} ;
 919 @end example
 920
 921 @noindent
 922 The action says how to produce the semantic value of the sum expression
 923 from the values of the two subexpressions.
 924
 925 @node GLR Parsers
 926 @section Writing GLR Parsers
 927 @cindex GLR parsing
 928 @cindex generalized LR (GLR) parsing
 929 @findex %glr-parser
 930 @cindex conflicts
 931 @cindex shift/reduce conflicts
 932 @cindex reduce/reduce conflicts
 933
 934 In some grammars, Bison's deterministic
 935 LR(1) parsing algorithm cannot decide whether to apply a
 936 certain grammar rule at a given point.  That is, it may not be able to
 937 decide (on the basis of the input read so far) which of two possible
 938 reductions (applications of a grammar rule) applies, or whether to apply
 939 a reduction or read more of the input and apply a reduction later in the
 940 input.  These are known respectively as @dfn{reduce/reduce} conflicts
 941 (@pxref{Reduce/Reduce}), and @dfn{shift/reduce} conflicts
 942 (@pxref{Shift/Reduce}).
 943
 944 To use a grammar that is not easily modified to be LR(1), a more general
 945 parsing algorithm is sometimes necessary.  If you include @code{%glr-parser}
 946 among the Bison declarations in your file (@pxref{Grammar Outline}), the
 947 result is a Generalized LR (GLR) parser.  These parsers handle Bison
 948 grammars that contain no unresolved conflicts (i.e., after applying
 949 precedence declarations) identically to deterministic parsers.  However,
 950 when faced with unresolved shift/reduce and reduce/reduce conflicts, GLR
 951 parsers use the simple expedient of doing both, effectively cloning the
 952 parser to follow both possibilities.  Each of the resulting parsers can
 953 again split, so that at any given time, there can be any number of possible
 954 parses being explored.  The parsers proceed in lockstep; that is, all of
 955 them consume (shift) a given input symbol before any of them proceed to the
 956 next.  Each of the cloned parsers eventually meets one of two possible
 957 fates: either it runs into a parsing error, in which case it simply
 958 vanishes, or it merges with another parser, because the two of them have
 959 reduced the input to an identical set of symbols.
 960
 961 During the time that there are multiple parsers, semantic actions are
 962 recorded, but not performed.  When a parser disappears, its recorded
 963 semantic actions disappear as well, and are never performed.  When a
 964 reduction makes two parsers identical, causing them to merge, Bison records
 965 both sets of semantic actions.  Whenever the last two parsers merge,
 966 reverting to the single-parser case, Bison resolves all the outstanding
 967 actions either by precedences given to the grammar rules involved, or by
 968 performing both actions, and then calling a designated user-defined function
 969 on the resulting values to produce an arbitrary merged result.
 970
 971 @menu
 972 * Simple GLR Parsers::     Using GLR parsers on unambiguous grammars.
 973 * Merging GLR Parses::     Using GLR parsers to resolve ambiguities.
 974 * GLR Semantic Actions::   Considerations for semantic values and deferred actions.
 975 * Semantic Predicates::    Controlling a parse with arbitrary computations.
 976 @end menu
 977
 978 @node Simple GLR Parsers
 979 @subsection Using GLR on Unambiguous Grammars
 980 @cindex GLR parsing, unambiguous grammars
 981 @cindex generalized LR (GLR) parsing, unambiguous grammars
 982 @findex %glr-parser
 983 @findex %expect-rr
 984 @cindex conflicts
 985 @cindex reduce/reduce conflicts
 986 @cindex shift/reduce conflicts
 987
 988 In the simplest cases, you can use the GLR algorithm
 989 to parse grammars that are unambiguous but fail to be LR(1).
 990 Such grammars typically require more than one symbol of lookahead.
 991
 992 Consider a problem that
 993 arises in the declaration of enumerated and subrange types in the
 994 programming language Pascal.  Here are some examples:
 995
 996 @example
 997 type subrange = lo .. hi;
 998 type enum = (a, b, c);
 999 @end example
1000
1001 @noindent
1002 The original language standard allows only numeric literals and constant
1003 identifiers for the subrange bounds (@samp{lo} and @samp{hi}), but Extended
1004 Pascal (ISO/IEC 10206) and many other Pascal implementations allow arbitrary
1005 expressions there.  This gives rise to the following situation, containing a
1006 superfluous pair of parentheses:
1007
1008 @example
1009 type subrange = (a) .. b;
1010 @end example
1011
1012 @noindent
1013 Compare this to the following declaration of an enumerated
1014 type with only one value:
1015
1016 @example
1017 type enum = (a);
1018 @end example
1019
1020 @noindent
1021 (These declarations are contrived, but they are syntactically valid, and
1022 more-complicated cases can come up in practical programs.)
1023
1024 These two declarations look identical until the @samp{..} token.  With
1025 normal LR(1) one-token lookahead it is not possible to decide between the
1026 two forms when the identifier @samp{a} is parsed.  It is, however, desirable
1027 for a parser to decide this, since in the latter case @samp{a} must become a
1028 new identifier to represent the enumeration value, while in the former case
1029 @samp{a} must be evaluated with its current meaning, which may be a constant
1030 or even a function call.
1031
1032 You could parse @samp{(a)} as an ``unspecified identifier in parentheses'',
1033 to be resolved later, but this typically requires substantial contortions in
1034 both semantic actions and large parts of the grammar, where the parentheses
1035 are nested in the recursive rules for expressions.
1036
1037 You might think of using the lexer to distinguish between the two forms by
1038 returning different tokens for currently defined and undefined identifiers.
1039 But if these declarations occur in a local scope, and @samp{a} is defined in
1040 an outer scope, then both forms are possible---either locally redefining
1041 @samp{a}, or using the value of @samp{a} from the outer scope.  So this
1042 approach cannot work.
1043
1044 A simple solution to this problem is to declare the parser to use the GLR
1045 algorithm.  When the GLR parser reaches the critical state, it merely splits
1046 into two branches and pursues both syntax rules simultaneously.  Sooner or
1047 later, one of them runs into a parsing error.  If there is a @samp{..} token
1048 before the next @samp{;}, the rule for enumerated types fails since it
1049 cannot accept @samp{..} anywhere; otherwise, the subrange type rule fails
1050 since it requires a @samp{..} token.  So one of the branches fails silently,
1051 and the other one continues normally, performing all the intermediate
1052 actions that were postponed during the split.
1053
1054 If the input is syntactically incorrect, both branches fail and the parser
1055 reports a syntax error as usual.
1056
1057 The effect of all this is that the parser seems to ``guess'' the correct
1058 branch to take, or in other words, it seems to use more lookahead than the
1059 underlying LR(1) algorithm actually allows for.  In this example, LR(2)
1060 would suffice, but also some cases that are not LR(@math{k}) for any
1061 @math{k} can be handled this way.
1062
1063 In general, a GLR parser can take quadratic or cubic worst-case time, and
1064 the current Bison parser even takes exponential time and space for some
1065 grammars.  In practice, this rarely happens, and for many grammars it is
1066 possible to prove that it cannot happen.  The present example contains only
1067 one conflict between two rules, and the type-declaration context containing
1068 the conflict cannot be nested.  So the number of branches that can exist at
1069 any time is limited by the constant 2, and the parsing time is still linear.
1070
1071 Here is a Bison grammar corresponding to the example above.  It
1072 parses a vastly simplified form of Pascal type declarations.
1073
1074 @example
1075 %token TYPE DOTDOT ID
1076
1077 @group
1078 %left '+' '-'
1079 %left '*' '/'
1080 @end group
1081
1082 %%
1083 type_decl: TYPE ID '=' type ';' ;
1084
1085 @group
1086 type:
1087   '(' id_list ')'
1088 | expr DOTDOT expr
1089 ;
1090 @end group
1091
1092 @group
1093 id_list:
1094   ID
1095 | id_list ',' ID
1096 ;
1097 @end group
1098
1099 @group
1100 expr:
1101   '(' expr ')'
1102 | expr '+' expr
1103 | expr '-' expr
1104 | expr '*' expr
1105 | expr '/' expr
1106 | ID
1107 ;
1108 @end group
1109 @end example
1110
1111 When used as a normal LR(1) grammar, Bison correctly complains
1112 about one reduce/reduce conflict.  In the conflicting situation the
1113 parser chooses one of the alternatives, arbitrarily the one
1114 declared first.  Therefore the following correct input is not
1115 recognized:
1116
1117 @example
1118 type t = (a) .. b;
1119 @end example
1120
1121 The parser can be turned into a GLR parser, while also telling Bison
1122 to be silent about the one known reduce/reduce conflict, by adding
1123 these two declarations to the Bison grammar file (before the first
1124 @samp{%%}):
1125
1126 @example
1127 %glr-parser
1128 %expect-rr 1
1129 @end example
1130
1131 @noindent
1132 No change in the grammar itself is required.  Now the parser recognizes all
1133 valid declarations, according to the limited syntax above, transparently.
1134 In fact, the user does not even notice when the parser splits.
1135
1136 So here we have a case where we can use the benefits of GLR, almost without
1137 disadvantages.  Even in simple cases like this, however, there are at least
1138 two potential problems to beware.  First, always analyze the conflicts
1139 reported by Bison to make sure that GLR splitting is only done where it is
1140 intended.  A GLR parser splitting inadvertently may cause problems less
1141 obvious than an LR parser statically choosing the wrong alternative in a
1142 conflict.  Second, consider interactions with the lexer (@pxref{Semantic
1143 Tokens}) with great care.  Since a split parser consumes tokens without
1144 performing any actions during the split, the lexer cannot obtain information
1145 via parser actions.  Some cases of lexer interactions can be eliminated by
1146 using GLR to shift the complications from the lexer to the parser.  You must
1147 check the remaining cases for correctness.
1148
1149 In our example, it would be safe for the lexer to return tokens based on
1150 their current meanings in some symbol table, because no new symbols are
1151 defined in the middle of a type declaration.  Though it is possible for a
1152 parser to define the enumeration constants as they are parsed, before the
1153 type declaration is completed, it actually makes no difference since they
1154 cannot be used within the same enumerated type declaration.
1155
1156 @node Merging GLR Parses
1157 @subsection Using GLR to Resolve Ambiguities
1158 @cindex GLR parsing, ambiguous grammars
1159 @cindex generalized LR (GLR) parsing, ambiguous grammars
1160 @findex %dprec
1161 @findex %merge
1162 @cindex conflicts
1163 @cindex reduce/reduce conflicts
1164
1165 Let's consider an example, vastly simplified from a C++
1166 grammar.@footnote{The sources of an extended version of this example are
1167 available in C as @file{examples/c/glr}, and in C++ as
1168 @file{examples/c++/glr}.}
1169
1170 @example
1171 %@{
1172   #include <stdio.h>
1173   int yylex (void);
1174   void yyerror (char const *);
1175 %@}
1176
1177 %define api.value.type @{char const *@}
1178
1179 %token TYPENAME ID
1180
1181 %right '='
1182 %left '+'
1183
1184 %glr-parser
1185
1186 %%
1187
1188 prog:
1189   %empty
1190 | prog stmt   @{ printf ("\n"); @}
1191 ;
1192
1193 stmt:
1194   expr ';'  %dprec 1
1195 | decl      %dprec 2
1196 ;
1197
1198 expr:
1199   ID               @{ printf ("%s ", $$); @}
1200 | TYPENAME '(' expr ')'
1201                    @{ printf ("%s <cast> ", $1); @}
1202 | expr '+' expr    @{ printf ("+ "); @}
1203 | expr '=' expr    @{ printf ("= "); @}
1204 ;
1205
1206 decl:
1207   TYPENAME declarator ';'
1208                    @{ printf ("%s <declare> ", $1); @}
1209 | TYPENAME declarator '=' expr ';'
1210                    @{ printf ("%s <init-declare> ", $1); @}
1211 ;
1212
1213 declarator:
1214   ID               @{ printf ("\"%s\" ", $1); @}
1215 | '(' declarator ')'
1216 ;
1217 @end example
1218
1219 @noindent
1220 This models a problematic part of the C++ grammar---the ambiguity between
1221 certain declarations and statements.  For example,
1222
1223 @example
1224 T (x) = y+z;
1225 @end example
1226
1227 @noindent
1228 parses as either an @code{expr} or a @code{stmt}
1229 (assuming that @samp{T} is recognized as a @code{TYPENAME} and
1230 @samp{x} as an @code{ID}).
1231 Bison detects this as a reduce/reduce conflict between the rules
1232 @code{expr : ID} and @code{declarator : ID}, which it cannot resolve at the
1233 time it encounters @code{x} in the example above.  Since this is a
1234 GLR parser, it therefore splits the problem into two parses, one for
1235 each choice of resolving the reduce/reduce conflict.
1236 Unlike the example from the previous section (@pxref{Simple GLR Parsers}),
1237 however, neither of these parses ``dies,'' because the grammar as it stands is
1238 ambiguous.  One of the parsers eventually reduces @code{stmt : expr ';'} and
1239 the other reduces @code{stmt : decl}, after which both parsers are in an
1240 identical state: they've seen @samp{prog stmt} and have the same unprocessed
1241 input remaining.  We say that these parses have @dfn{merged.}
1242
1243 At this point, the GLR parser requires a specification in the
1244 grammar of how to choose between the competing parses.
1245 In the example above, the two @code{%dprec}
1246 declarations specify that Bison is to give precedence
1247 to the parse that interprets the example as a
1248 @code{decl}, which implies that @code{x} is a declarator.
1249 The parser therefore prints
1250
1251 @example
1252 "x" y z + T <init-declare>
1253 @end example
1254
1255 The @code{%dprec} declarations only come into play when more than one
1256 parse survives.  Consider a different input string for this parser:
1257
1258 @example
1259 T (x) + y;
1260 @end example
1261
1262 @noindent
1263 This is another example of using GLR to parse an unambiguous
1264 construct, as shown in the previous section (@pxref{Simple GLR Parsers}).
1265 Here, there is no ambiguity (this cannot be parsed as a declaration).
1266 However, at the time the Bison parser encounters @code{x}, it does not
1267 have enough information to resolve the reduce/reduce conflict (again,
1268 between @code{x} as an @code{expr} or a @code{declarator}).  In this
1269 case, no precedence declaration is used.  Again, the parser splits
1270 into two, one assuming that @code{x} is an @code{expr}, and the other
1271 assuming @code{x} is a @code{declarator}.  The second of these parsers
1272 then vanishes when it sees @code{+}, and the parser prints
1273
1274 @example
1275 x T <cast> y +
1276 @end example
1277
1278 Suppose that instead of resolving the ambiguity, you wanted to see all
1279 the possibilities.  For this purpose, you must merge the semantic
1280 actions of the two possible parsers, rather than choosing one over the
1281 other.  To do so, you could change the declaration of @code{stmt} as
1282 follows:
1283
1284 @example
1285 stmt:
1286   expr ';'  %merge <stmt_merge>
1287 | decl      %merge <stmt_merge>
1288 ;
1289 @end example
1290
1291 @noindent
1292 and define the @code{stmt_merge} function as:
1293
1294 @example
1295 static YYSTYPE
1296 stmt_merge (YYSTYPE x0, YYSTYPE x1)
1297 @{
1298   printf ("<OR> ");
1299   return "";
1300 @}
1301 @end example
1302
1303 @noindent
1304 with an accompanying forward declaration
1305 in the C declarations at the beginning of the file:
1306
1307 @example
1308 %@{
1309   static YYSTYPE stmt_merge (YYSTYPE x0, YYSTYPE x1);
1310 %@}
1311 @end example
1312
1313 @noindent
1314 With these declarations, the resulting parser parses the first example
1315 as both an @code{expr} and a @code{decl}, and prints
1316
1317 @example
1318 "x" y z + T <init-declare> x T <cast> y z + = <OR>
1319 @end example
1320
1321 Bison requires that all of the
1322 productions that participate in any particular merge have identical
1323 @samp{%merge} clauses.  Otherwise, the ambiguity would be unresolvable,
1324 and the parser will report an error during any parse that results in
1325 the offending merge.
1326
1327 @sp 1
1328
1329 The signature of the merger depends on the type of the symbol.  In the
1330 previous example, the merged-to symbol (@code{stmt}) does not have a
1331 specific type, and the merger is
1332
1333 @example
1334 YYSTYPE stmt_merge (YYSTYPE x0, YYSTYPE x1);
1335 @end example
1336
1337 @noindent
1338 However, if @code{stmt} had a declared type, e.g.,
1339
1340 @example
1341 %type <Node *> stmt;
1342 @end example
1343
1344 @noindent
1345 or
1346
1347 @example
1348 @group
1349 %union @{
1350   Node *node;
1351   ...
1352 @};
1353 @end group
1354 %type <node> stmt;
1355 @end example
1356
1357 @noindent
1358 then the prototype of the merger must be:
1359
1360 @example
1361 Node *stmt_merge (YYSTYPE x0, YYSTYPE x1);
1362 @end example
1363
1364 @noindent
1365 (This signature might be a mistake originally, and maybe it should have been
1366 @samp{Node *stmt_merge (Node *x0, Node *x1)}.  If you have an opinion about
1367 it, please let us know.)
1368
1369 @node GLR Semantic Actions
1370 @subsection GLR Semantic Actions
1371
1372 The nature of GLR parsing and the structure of the generated
1373 parsers give rise to certain restrictions on semantic values and actions.
1374
1375 @subsubsection Deferred semantic actions
1376 @cindex deferred semantic actions
1377 By definition, a deferred semantic action is not performed at the same time as
1378 the associated reduction.
1379 This raises caveats for several Bison features you might use in a semantic
1380 action in a GLR parser.
1381
1382 @vindex yychar
1383 @cindex GLR parsers and @code{yychar}
1384 @vindex yylval
1385 @cindex GLR parsers and @code{yylval}
1386 @vindex yylloc
1387 @cindex GLR parsers and @code{yylloc}
1388 In any semantic action, you can examine @code{yychar} to determine the kind
1389 of the lookahead token present at the time of the associated reduction.
1390 After checking that @code{yychar} is not set to @code{YYEMPTY} or
1391 @code{YYEOF}, you can then examine @code{yylval} and @code{yylloc} to
1392 determine the lookahead token's semantic value and location, if any.  In a
1393 nondeferred semantic action, you can also modify any of these variables to
1394 influence syntax analysis.  @xref{Lookahead}.
1395
1396 @findex yyclearin
1397 @cindex GLR parsers and @code{yyclearin}
1398 In a deferred semantic action, it's too late to influence syntax analysis.
1399 In this case, @code{yychar}, @code{yylval}, and @code{yylloc} are set to
1400 shallow copies of the values they had at the time of the associated reduction.
1401 For this reason alone, modifying them is dangerous.
1402 Moreover, the result of modifying them is undefined and subject to change with
1403 future versions of Bison.
1404 For example, if a semantic action might be deferred, you should never write it
1405 to invoke @code{yyclearin} (@pxref{Action Features}) or to attempt to free
1406 memory referenced by @code{yylval}.
1407
1408 @subsubsection YYERROR
1409 @findex YYERROR
1410 @cindex GLR parsers and @code{YYERROR}
1411 Another Bison feature requiring special consideration is @code{YYERROR}
1412 (@pxref{Action Features}), which you can invoke in a semantic action to
1413 initiate error recovery.
1414 During deterministic GLR operation, the effect of @code{YYERROR} is
1415 the same as its effect in a deterministic parser.
1416 The effect in a deferred action is similar, but the precise point of the
1417 error is undefined;  instead, the parser reverts to deterministic operation,
1418 selecting an unspecified stack on which to continue with a syntax error.
1419 In a semantic predicate (see @ref{Semantic Predicates}) during nondeterministic
1420 parsing, @code{YYERROR} silently prunes
1421 the parse that invoked the test.
1422
1423 @subsubsection Restrictions on semantic values and locations
1424 GLR parsers require that you use POD (Plain Old Data) types for
1425 semantic values and location types when using the generated parsers as
1426 C++ code.
1427
1428 @node Semantic Predicates
1429 @subsection Controlling a Parse with Arbitrary Predicates
1430 @findex %?
1431 @cindex Semantic predicates in GLR parsers
1432
1433 In addition to the @code{%dprec} and @code{%merge} directives,
1434 GLR parsers
1435 allow you to reject parses on the basis of arbitrary computations executed
1436 in user code, without having Bison treat this rejection as an error
1437 if there are alternative parses.  For example,
1438
1439 @example
1440 widget:
1441   %?@{  new_syntax @} "widget" id new_args  @{ $$ = f($3, $4); @}
1442 | %?@{ !new_syntax @} "widget" id old_args  @{ $$ = f($3, $4); @}
1443 ;
1444 @end example
1445
1446 @noindent
1447 is one way to allow the same parser to handle two different syntaxes for
1448 widgets.  The clause preceded by @code{%?} is treated like an ordinary
1449 midrule action, except that its text is handled as an expression and is always
1450 evaluated immediately (even when in nondeterministic mode).  If the
1451 expression yields 0 (false), the clause is treated as a syntax error,
1452 which, in a nondeterministic parser, causes the stack in which it is reduced
1453 to die.  In a deterministic parser, it acts like @code{YYERROR}.
1454
1455 As the example shows, predicates otherwise look like semantic actions, and
1456 therefore you must take them into account when determining the numbers
1457 to use for denoting the semantic values of right-hand side symbols.
1458 Predicate actions, however, have no defined value, and may not be given
1459 labels.
1460
1461 There is a subtle difference between semantic predicates and ordinary
1462 actions in nondeterministic mode, since the latter are deferred.
1463 For example, we could try to rewrite the previous example as
1464
1465 @example
1466 widget:
1467   @{ if (!new_syntax) YYERROR; @}
1468     "widget" id new_args  @{ $$ = f($3, $4); @}
1469 |  @{ if (new_syntax) YYERROR; @}
1470     "widget" id old_args  @{ $$ = f($3, $4); @}
1471 ;
1472 @end example
1473
1474 @noindent
1475 (reversing the sense of the predicate tests to cause an error when they are
1476 false).  However, this
1477 does @emph{not} have the same effect if @code{new_args} and @code{old_args}
1478 have overlapping syntax.
1479 Since the midrule actions testing @code{new_syntax} are deferred,
1480 a GLR parser first encounters the unresolved ambiguous reduction
1481 for cases where @code{new_args} and @code{old_args} recognize the same string
1482 @emph{before} performing the tests of @code{new_syntax}.  It therefore
1483 reports an error.
1484
1485 Finally, be careful in writing predicates: deferred actions have not been
1486 evaluated, so that using them in a predicate will have undefined effects.
1487
1488 @node Locations
1489 @section Locations
1490 @cindex location
1491 @cindex textual location
1492 @cindex location, textual
1493
1494 Many applications, like interpreters or compilers, have to produce verbose
1495 and useful error messages.  To achieve this, one must be able to keep track of
1496 the @dfn{textual location}, or @dfn{location}, of each syntactic construct.
1497 Bison provides a mechanism for handling these locations.
1498
1499 Each token has a semantic value.  In a similar fashion, each token has an
1500 associated location, but the type of locations is the same for all tokens
1501 and groupings.  Moreover, the output parser is equipped with a default data
1502 structure for storing locations (@pxref{Tracking Locations}, for more
1503 details).
1504
1505 Like semantic values, locations can be reached in actions using a dedicated
1506 set of constructs.  In the example above, the location of the whole grouping
1507 is @code{@@$}, while the locations of the subexpressions are @code{@@1} and
1508 @code{@@3}.
1509
1510 When a rule is matched, a default action is used to compute the semantic value
1511 of its left hand side (@pxref{Actions}).  In the same way, another default
1512 action is used for locations.  However, the action for locations is general
1513 enough for most cases, meaning there is usually no need to describe for each
1514 rule how @code{@@$} should be formed.  When building a new location for a given
1515 grouping, the default behavior of the output parser is to take the beginning
1516 of the first symbol, and the end of the last symbol.
1517
1518 @node Bison Parser
1519 @section Bison Output: the Parser Implementation File
1520 @cindex Bison parser
1521 @cindex Bison utility
1522 @cindex lexical analyzer, purpose
1523 @cindex parser
1524
1525 When you run Bison, you give it a Bison grammar file as input.  The
1526 most important output is a C source file that implements a parser for
1527 the language described by the grammar.  This parser is called a
1528 @dfn{Bison parser}, and this file is called a @dfn{Bison parser
1529 implementation file}.  Keep in mind that the Bison utility and the
1530 Bison parser are two distinct programs: the Bison utility is a program
1531 whose output is the Bison parser implementation file that becomes part
1532 of your program.
1533
1534 The job of the Bison parser is to group tokens into groupings according to
1535 the grammar rules---for example, to build identifiers and operators into
1536 expressions.  As it does this, it runs the actions for the grammar rules it
1537 uses.
1538
1539 The tokens come from a function called the @dfn{lexical analyzer} that
1540 you must supply in some fashion (such as by writing it in C).  The Bison
1541 parser calls the lexical analyzer each time it wants a new token.  It
1542 doesn't know what is ``inside'' the tokens (though their semantic values
1543 may reflect this).  Typically the lexical analyzer makes the tokens by
1544 parsing characters of text, but Bison does not depend on this.
1545 @xref{Lexical}.
1546
1547 The Bison parser implementation file is C code which defines a
1548 function named @code{yyparse} which implements that grammar.  This
1549 function does not make a complete C program: you must supply some
1550 additional functions.  One is the lexical analyzer.  Another is an
1551 error-reporting function which the parser calls to report an error.
1552 In addition, a complete C program must start with a function called
1553 @code{main}; you have to provide this, and arrange for it to call
1554 @code{yyparse} or the parser will never run.  @xref{Interface}.
1555
1556 Aside from the token kind names and the symbols in the actions you
1557 write, all symbols defined in the Bison parser implementation file
1558 itself begin with @samp{yy} or @samp{YY}.  This includes interface
1559 functions such as the lexical analyzer function @code{yylex}, the
1560 error reporting function @code{yyerror} and the parser function
1561 @code{yyparse} itself.  This also includes numerous identifiers used
1562 for internal purposes.  Therefore, you should avoid using C
1563 identifiers starting with @samp{yy} or @samp{YY} in the Bison grammar
1564 file except for the ones defined in this manual.  Also, you should
1565 avoid using the C identifiers @samp{malloc} and @samp{free} for
1566 anything other than their usual meanings.
1567
1568 In some cases the Bison parser implementation file includes system
1569 headers, and in those cases your code should respect the identifiers
1570 reserved by those headers.  On some non-GNU hosts, @code{<limits.h>},
1571 @code{<stddef.h>}, @code{<stdint.h>} (if available), and @code{<stdlib.h>}
1572 are included to declare memory allocators and integer types and constants.
1573 @code{<libintl.h>} is included if message translation is in use
1574 (@pxref{Internationalization}).  Other system headers may be included
1575 if you define @code{YYDEBUG} (@pxref{Tracing}) or
1576 @code{YYSTACK_USE_ALLOCA} (@pxref{Table of Symbols}) to a nonzero value.
1577
1578 @node Stages
1579 @section Stages in Using Bison
1580 @cindex stages in using Bison
1581 @cindex using Bison
1582
1583 The actual language-design process using Bison, from grammar specification
1584 to a working compiler or interpreter, has these parts:
1585
1586 @enumerate
1587 @item
1588 Formally specify the grammar in a form recognized by Bison
1589 (@pxref{Grammar File}).  For each grammatical rule
1590 in the language, describe the action that is to be taken when an
1591 instance of that rule is recognized.  The action is described by a
1592 sequence of C statements.
1593
1594 @item
1595 Write a lexical analyzer to process input and pass tokens to the parser.
1596 The lexical analyzer may be written by hand in C (@pxref{Lexical}).  It
1597 could also be produced using Lex, but the use of Lex is not discussed in
1598 this manual.
1599
1600 @item
1601 Write a controlling function that calls the Bison-produced parser.
1602
1603 @item
1604 Write error-reporting routines.
1605 @end enumerate
1606
1607 To turn this source code as written into a runnable program, you
1608 must follow these steps:
1609
1610 @enumerate
1611 @item
1612 Run Bison on the grammar to produce the parser.
1613
1614 @item
1615 Compile the code output by Bison, as well as any other source files.
1616
1617 @item
1618 Link the object files to produce the finished product.
1619 @end enumerate
1620
1621 @node Grammar Layout
1622 @section The Overall Layout of a Bison Grammar
1623 @cindex grammar file
1624 @cindex file format
1625 @cindex format of grammar file
1626 @cindex layout of Bison grammar
1627
1628 The input file for the Bison utility is a @dfn{Bison grammar file}.  The
1629 general form of a Bison grammar file is as follows:
1630
1631 @example
1632 %@{
1633 @var{Prologue}
1634 %@}
1635
1636 @var{Bison declarations}
1637
1638 %%
1639 @var{Grammar rules}
1640 %%
1641 @var{Epilogue}
1642 @end example
1643
1644 @noindent
1645 The @samp{%%}, @samp{%@{} and @samp{%@}} are punctuation that appears
1646 in every Bison grammar file to separate the sections.
1647
1648 The prologue may define types and variables used in the actions.  You can
1649 also use preprocessor commands to define macros used there, and use
1650 @code{#include} to include header files that do any of these things.
1651 You need to declare the lexical analyzer @code{yylex} and the error
1652 printer @code{yyerror} here, along with any other global identifiers
1653 used by the actions in the grammar rules.
1654
1655 The Bison declarations declare the names of the terminal and nonterminal
1656 symbols, and may also describe operator precedence and the data types of
1657 semantic values of various symbols.
1658
1659 The grammar rules define how to construct each nonterminal symbol from its
1660 parts.
1661
1662 The epilogue can contain any code you want to use.  Often the
1663 definitions of functions declared in the prologue go here.  In a
1664 simple program, all the rest of the program can go here.
1665
1666 @node Examples
1667 @chapter Examples
1668 @cindex simple examples
1669 @cindex examples, simple
1670
1671 Now we show and explain several sample programs written using Bison: a
1672 Reverse Polish Notation calculator, an algebraic (infix) notation
1673 calculator --- later extended to track ``locations'' ---
1674 and a multi-function calculator.  All
1675 produce usable, though limited, interactive desk-top calculators.
1676
1677 These examples are simple, but Bison grammars for real programming
1678 languages are written the same way.  You can copy these examples into a
1679 source file to try them.
1680
1681 @sp 1
1682
1683 Bison comes with several examples (including for the different target
1684 languages).  If this package is properly installed, you shall find them in
1685 @file{@var{prefix}/share/doc/bison/examples}, where @var{prefix} is the root
1686 of the installation, probably something like @file{/usr/local} or
1687 @file{/usr}.
1688
1689 @menu
1690 * RPN Calc::               Reverse Polish Notation Calculator;
1691                              a first example with no operator precedence.
1692 * Infix Calc::             Infix (algebraic) notation calculator.
1693                              Operator precedence is introduced.
1694 * Simple Error Recovery::  Continuing after syntax errors.
1695 * Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$.
1696 * Multi-function Calc::    Calculator with memory and trig functions.
1697                              It uses multiple data-types for semantic values.
1698 * Exercises::              Ideas for improving the multi-function calculator.
1699 @end menu
1700
1701 @node RPN Calc
1702 @section Reverse Polish Notation Calculator
1703 @cindex Reverse Polish Notation
1704 @cindex @code{rpcalc}
1705 @cindex calculator, simple
1706
1707 The first example@footnote{The sources of @command{rpcalc} are available as
1708 @file{examples/c/rpcalc}.} is that of a simple double-precision @dfn{Reverse
1709 Polish
1710 Notation} calculator (a calculator using postfix operators).  This example
1711 provides a good starting point, since operator precedence is not an issue.
1712 The second example will illustrate how operator precedence is handled.
1713
1714 The source code for this calculator is named @file{rpcalc.y}.  The
1715 @samp{.y} extension is a convention used for Bison grammar files.
1716
1717 @menu
1718 * Rpcalc Declarations::    Prologue (declarations) for rpcalc.
1719 * Rpcalc Rules::           Grammar Rules for rpcalc, with explanation.
1720 * Rpcalc Lexer::           The lexical analyzer.
1721 * Rpcalc Main::            The controlling function.
1722 * Rpcalc Error::           The error reporting function.
1723 * Rpcalc Generate::        Running Bison on the grammar file.
1724 * Rpcalc Compile::         Run the C compiler on the output code.
1725 @end menu
1726
1727 @node Rpcalc Declarations
1728 @subsection Declarations for @code{rpcalc}
1729
1730 Here are the C and Bison declarations for the Reverse Polish Notation
1731 calculator.  As in C, comments are placed between @samp{/*@dots{}*/} or
1732 after @samp{//}.
1733
1734 @ignore
1735 @comment file: c/rpcalc/rpcalc.y
1736 @example
1737 /* Parser for rpcalc.   -*- C -*-
1738
1739    Copyright (C) 1988-1993, 1995, 1998-2015, 2018-2021 Free Software
1740    Foundation, Inc.
1741
1742    This file is part of Bison, the GNU Compiler Compiler.
1743
1744    This program is free software: you can redistribute it and/or modify
1745    it under the terms of the GNU General Public License as published by
1746    the Free Software Foundation, either version 3 of the License, or
1747    (at your option) any later version.
1748
1749    This program is distributed in the hope that it will be useful,
1750    but WITHOUT ANY WARRANTY; without even the implied warranty of
1751    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1752    GNU General Public License for more details.
1753
1754    You should have received a copy of the GNU General Public License
1755    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
1756 @end example
1757 @end ignore
1758
1759 @comment file: c/rpcalc/rpcalc.y
1760 @example
1761 /* Reverse Polish Notation calculator. */
1762
1763 @group
1764 %@{
1765   #include <stdio.h>
1766   #include <math.h>
1767   int yylex (void);
1768   void yyerror (char const *);
1769 %@}
1770 @end group
1771
1772 %define api.value.type @{double@}
1773 %token NUM
1774
1775 %% /* Grammar rules and actions follow. */
1776 @end example
1777
1778 The declarations section (@pxref{Prologue}) contains two
1779 preprocessor directives and two forward declarations.
1780
1781 The @code{#include} directive is used to declare the exponentiation
1782 function @code{pow}.
1783
1784 The forward declarations for @code{yylex} and @code{yyerror} are
1785 needed because the C language requires that functions be declared
1786 before they are used.  These functions will be defined in the
1787 epilogue, but the parser calls them so they must be declared in the
1788 prologue.
1789
1790 The second section, Bison declarations, provides information to Bison about
1791 the tokens and their types (@pxref{Bison Declarations}).
1792
1793 The @code{%define} directive defines the variable @code{api.value.type},
1794 thus specifying the C data type for semantic values of both tokens and
1795 groupings (@pxref{Value Type}).  The Bison
1796 parser will use whatever type @code{api.value.type} is defined as; if you
1797 don't define it, @code{int} is the default.  Because we specify
1798 @samp{@{double@}}, each token and each expression has an associated value,
1799 which is a floating point number.  C code can use @code{YYSTYPE} to refer to
1800 the value @code{api.value.type}.
1801
1802 Each terminal symbol that is not a single-character literal must be
1803 declared.  (Single-character literals normally don't need to be declared.)
1804 In this example, all the arithmetic operators are designated by
1805 single-character literals, so the only terminal symbol that needs to be
1806 declared is @code{NUM}, the token kind for numeric constants.
1807
1808 @node Rpcalc Rules
1809 @subsection Grammar Rules for @code{rpcalc}
1810
1811 Here are the grammar rules for the Reverse Polish Notation calculator.
1812
1813 @comment file: c/rpcalc/rpcalc.y
1814 @example
1815 @group
1816 input:
1817   %empty
1818 | input line
1819 ;
1820 @end group
1821
1822 @group
1823 line:
1824   '\n'
1825 | exp '\n'      @{ printf ("%.10g\n", $1); @}
1826 ;
1827 @end group
1828
1829 @group
1830 exp:
1831   NUM
1832 | exp exp '+'   @{ $$ = $1 + $2;      @}
1833 | exp exp '-'   @{ $$ = $1 - $2;      @}
1834 | exp exp '*'   @{ $$ = $1 * $2;      @}
1835 | exp exp '/'   @{ $$ = $1 / $2;      @}
1836 | exp exp '^'   @{ $$ = pow ($1, $2); @}  /* Exponentiation */
1837 | exp 'n'       @{ $$ = -$1;          @}  /* Unary minus   */
1838 ;
1839 @end group
1840 %%
1841 @end example
1842
1843 The groupings of the rpcalc ``language'' defined here are the expression
1844 (given the name @code{exp}), the line of input (@code{line}), and the
1845 complete input transcript (@code{input}).  Each of these nonterminal
1846 symbols has several alternate rules, joined by the vertical bar @samp{|}
1847 which is read as ``or''.  The following sections explain what these rules
1848 mean.
1849
1850 The semantics of the language is determined by the actions taken when a
1851 grouping is recognized.  The actions are the C code that appears inside
1852 braces.  @xref{Actions}.
1853
1854 You must specify these actions in C, but Bison provides the means for
1855 passing semantic values between the rules.  In each action, the
1856 pseudo-variable @code{$$} stands for the semantic value for the grouping
1857 that the rule is going to construct.  Assigning a value to @code{$$} is the
1858 main job of most actions.  The semantic values of the components of the
1859 rule are referred to as @code{$1}, @code{$2}, and so on.
1860
1861 @menu
1862 * Rpcalc Input::            Explanation of the @code{input} nonterminal
1863 * Rpcalc Line::             Explanation of the @code{line} nonterminal
1864 * Rpcalc Exp::              Explanation of the @code{exp} nonterminal
1865 @end menu
1866
1867 @node Rpcalc Input
1868 @subsubsection Explanation of @code{input}
1869
1870 Consider the definition of @code{input}:
1871
1872 @example
1873 input:
1874   %empty
1875 | input line
1876 ;
1877 @end example
1878
1879 This definition reads as follows: ``A complete input is either an empty
1880 string, or a complete input followed by an input line''.  Notice that
1881 ``complete input'' is defined in terms of itself.  This definition is said
1882 to be @dfn{left recursive} since @code{input} appears always as the
1883 leftmost symbol in the sequence.  @xref{Recursion}.
1884
1885 The first alternative is empty because there are no symbols between the
1886 colon and the first @samp{|}; this means that @code{input} can match an
1887 empty string of input (no tokens).  We write the rules this way because it
1888 is legitimate to type @kbd{Ctrl-d} right after you start the calculator.
1889 It's conventional to put an empty alternative first and to use the
1890 (optional) @code{%empty} directive, or to write the comment @samp{/* empty
1891 */} in it (@pxref{Empty Rules}).
1892
1893 The second alternate rule (@code{input line}) handles all nontrivial input.
1894 It means, ``After reading any number of lines, read one more line if
1895 possible.''  The left recursion makes this rule into a loop.  Since the
1896 first alternative matches empty input, the loop can be executed zero or
1897 more times.
1898
1899 The parser function @code{yyparse} continues to process input until a
1900 grammatical error is seen or the lexical analyzer says there are no more
1901 input tokens; we will arrange for the latter to happen at end-of-input.
1902
1903 @node Rpcalc Line
1904 @subsubsection Explanation of @code{line}
1905
1906 Now consider the definition of @code{line}:
1907
1908 @example
1909 line:
1910   '\n'
1911 | exp '\n'  @{ printf ("%.10g\n", $1); @}
1912 ;
1913 @end example
1914
1915 The first alternative is a token which is a newline character; this means
1916 that rpcalc accepts a blank line (and ignores it, since there is no
1917 action).  The second alternative is an expression followed by a newline.
1918 This is the alternative that makes rpcalc useful.  The semantic value of
1919 the @code{exp} grouping is the value of @code{$1} because the @code{exp} in
1920 question is the first symbol in the alternative.  The action prints this
1921 value, which is the result of the computation the user asked for.
1922
1923 This action is unusual because it does not assign a value to @code{$$}.  As
1924 a consequence, the semantic value associated with the @code{line} is
1925 uninitialized (its value will be unpredictable).  This would be a bug if
1926 that value were ever used, but we don't use it: once rpcalc has printed the
1927 value of the user's input line, that value is no longer needed.
1928
1929 @node Rpcalc Exp
1930 @subsubsection Explanation of @code{exp}
1931
1932 The @code{exp} grouping has several rules, one for each kind of expression.
1933 The first rule handles the simplest expressions: those that are just
1934 numbers.  The second handles an addition-expression, which looks like two
1935 expressions followed by a plus-sign.  The third handles subtraction, and so
1936 on.
1937
1938 @example
1939 exp:
1940   NUM
1941 | exp exp '+'     @{ $$ = $1 + $2;    @}
1942 | exp exp '-'     @{ $$ = $1 - $2;    @}
1943 @dots{}
1944 ;
1945 @end example
1946
1947 We have used @samp{|} to join all the rules for @code{exp}, but we could
1948 equally well have written them separately:
1949
1950 @example
1951 exp: NUM;
1952 exp: exp exp '+'  @{ $$ = $1 + $2; @};
1953 exp: exp exp '-'  @{ $$ = $1 - $2; @};
1954 @dots{}
1955 @end example
1956
1957 Most of the rules have actions that compute the value of the expression in
1958 terms of the value of its parts.  For example, in the rule for addition,
1959 @code{$1} refers to the first component @code{exp} and @code{$2} refers to
1960 the second one.  The third component, @code{'+'}, has no meaningful
1961 associated semantic value, but if it had one you could refer to it as
1962 @code{$3}.  The first rule relies on the implicit default action: @samp{@{
1963 $$ = $1; @}}.
1964
1965
1966 When @code{yyparse} recognizes a sum expression using this rule, the sum of
1967 the two subexpressions' values is produced as the value of the entire
1968 expression.  @xref{Actions}.
1969
1970 You don't have to give an action for every rule.  When a rule has no action,
1971 Bison by default copies the value of @code{$1} into @code{$$}.  This is what
1972 happens in the first rule (the one that uses @code{NUM}).
1973
1974 The formatting shown here is the recommended convention, but Bison does not
1975 require it.  You can add or change white space as much as you wish.  For
1976 example, this:
1977
1978 @example
1979 exp: NUM | exp exp '+' @{$$ = $1 + $2; @} | @dots{} ;
1980 @end example
1981
1982 @noindent
1983 means the same thing as this:
1984
1985 @example
1986 exp:
1987   NUM
1988 | exp exp '+'    @{ $$ = $1 + $2; @}
1989 | @dots{}
1990 ;
1991 @end example
1992
1993 @noindent
1994 The latter, however, is much more readable.
1995
1996 @node Rpcalc Lexer
1997 @subsection The @code{rpcalc} Lexical Analyzer
1998 @cindex writing a lexical analyzer
1999 @cindex lexical analyzer, writing
2000
2001 The lexical analyzer's job is low-level parsing: converting characters
2002 or sequences of characters into tokens.  The Bison parser gets its
2003 tokens by calling the lexical analyzer.  @xref{Lexical}.
2004
2005 Only a simple lexical analyzer is needed for the RPN
2006 calculator.  This
2007 lexical analyzer skips blanks and tabs, then reads in numbers as
2008 @code{double} and returns them as @code{NUM} tokens.  Any other character
2009 that isn't part of a number is a separate token.  Note that the token-code
2010 for such a single-character token is the character itself.
2011
2012 The return value of the lexical analyzer function is a numeric code which
2013 represents a token kind.  The same text used in Bison rules to stand for
2014 this token kind is also a C expression for the numeric code of the kind.
2015 This works in two ways.  If the token kind is a character literal, then its
2016 numeric code is that of the character; you can use the same character
2017 literal in the lexical analyzer to express the number.  If the token kind is
2018 an identifier, that identifier is defined by Bison as a C enum whose
2019 definition is the appropriate code.  In this example, therefore, @code{NUM}
2020 becomes an enum for @code{yylex} to use.
2021
2022 The semantic value of the token (if it has one) is stored into the global
2023 variable @code{yylval}, which is where the Bison parser will look for it.
2024 (The C data type of @code{yylval} is @code{YYSTYPE}, whose value was defined
2025 at the beginning of the grammar via @samp{%define api.value.type
2026 @{double@}}; @pxref{Rpcalc Declarations}.)
2027
2028 A token kind code of zero is returned if the end-of-input is encountered.
2029 (Bison recognizes any nonpositive value as indicating end-of-input.)
2030
2031 Here is the code for the lexical analyzer:
2032
2033 @comment file: c/rpcalc/rpcalc.y
2034 @example
2035 @group
2036 /* The lexical analyzer returns a double floating point
2037    number on the stack and the token NUM, or the numeric code
2038    of the character read if not a number.  It skips all blanks
2039    and tabs, and returns 0 for end-of-input. */
2040
2041 #include <ctype.h>
2042 #include <stdlib.h>
2043 @end group
2044
2045 @group
2046 int
2047 yylex (void)
2048 @{
2049   int c = getchar ();
2050   /* Skip white space. */
2051   while (c == ' ' || c == '\t')
2052     c = getchar ();
2053 @end group
2054 @group
2055   /* Process numbers. */
2056   if (c == '.' || isdigit (c))
2057     @{
2058       ungetc (c, stdin);
2059       if (scanf ("%lf", &yylval) != 1)
2060         abort ();
2061       return NUM;
2062     @}
2063 @end group
2064 @group
2065   /* Return end-of-input. */
2066   else if (c == EOF)
2067     return YYEOF;
2068   /* Return a single char. */
2069   else
2070     return c;
2071 @}
2072 @end group
2073 @end example
2074
2075 @node Rpcalc Main
2076 @subsection The Controlling Function
2077 @cindex controlling function
2078 @cindex main function in simple example
2079
2080 In keeping with the spirit of this example, the controlling function is
2081 kept to the bare minimum.  The only requirement is that it call
2082 @code{yyparse} to start the process of parsing.
2083
2084 @comment file: c/rpcalc/rpcalc.y
2085 @example
2086 @group
2087 int
2088 main (void)
2089 @{
2090   return yyparse ();
2091 @}
2092 @end group
2093 @end example
2094
2095 @node Rpcalc Error
2096 @subsection The Error Reporting Routine
2097 @cindex error reporting routine
2098
2099 When @code{yyparse} detects a syntax error, it calls the error reporting
2100 function @code{yyerror} to print an error message (usually but not
2101 always @code{"syntax error"}).  It is up to the programmer to supply
2102 @code{yyerror} (@pxref{Interface}), so
2103 here is the definition we will use:
2104
2105 @comment file: c/rpcalc/rpcalc.y
2106 @example
2107 #include <stdio.h>
2108
2109 @group
2110 /* Called by yyparse on error. */
2111 void
2112 yyerror (char const *s)
2113 @{
2114   fprintf (stderr, "%s\n", s);
2115 @}
2116 @end group
2117 @end example
2118
2119 After @code{yyerror} returns, the Bison parser may recover from the error
2120 and continue parsing if the grammar contains a suitable error rule
2121 (@pxref{Error Recovery}).  Otherwise, @code{yyparse} returns nonzero.  We
2122 have not written any error rules in this example, so any invalid input will
2123 cause the calculator program to exit.  This is not clean behavior for a
2124 real calculator, but it is adequate for the first example.
2125
2126 @node Rpcalc Generate
2127 @subsection Running Bison to Make the Parser
2128 @cindex running Bison (introduction)
2129
2130 Before running Bison to produce a parser, we need to decide how to
2131 arrange all the source code in one or more source files.  For such a
2132 simple example, the easiest thing is to put everything in one file,
2133 the grammar file.  The definitions of @code{yylex}, @code{yyerror} and
2134 @code{main} go at the end, in the epilogue of the grammar file
2135 (@pxref{Grammar Layout}).
2136
2137 For a large project, you would probably have several source files, and use
2138 @code{make} to arrange to recompile them.
2139
2140 With all the source in the grammar file, you use the following command
2141 to convert it into a parser implementation file:
2142
2143 @example
2144 $ @kbd{bison @var{file}.y}
2145 @end example
2146
2147 @noindent
2148 In this example, the grammar file is called @file{rpcalc.y} (for
2149 ``Reverse Polish @sc{calc}ulator'').  Bison produces a parser
2150 implementation file named @file{@var{file}.tab.c}, removing the
2151 @samp{.y} from the grammar file name.  The parser implementation file
2152 contains the source code for @code{yyparse}.  The additional functions
2153 in the grammar file (@code{yylex}, @code{yyerror} and @code{main}) are
2154 copied verbatim to the parser implementation file.
2155
2156 @node Rpcalc Compile
2157 @subsection Compiling the Parser Implementation File
2158 @cindex compiling the parser
2159
2160 Here is how to compile and run the parser implementation file:
2161
2162 @example
2163 @group
2164 # @r{List files in current directory.}
2165 $ @kbd{ls}
2166 rpcalc.tab.c  rpcalc.y
2167 @end group
2168
2169 @group
2170 # @r{Compile the Bison parser.}
2171 # @r{@option{-lm} tells compiler to search math library for @code{pow}.}
2172 $ @kbd{cc -lm -o rpcalc rpcalc.tab.c}
2173 @end group
2174
2175 @group
2176 # @r{List files again.}
2177 $ @kbd{ls}
2178 rpcalc  rpcalc.tab.c  rpcalc.y
2179 @end group
2180 @end example
2181
2182 The file @file{rpcalc} now contains the executable code.  Here is an
2183 example session using @code{rpcalc}.
2184
2185 @example
2186 $ @kbd{rpcalc}
2187 @kbd{4 9 +}
2188 @result{} 13
2189 @kbd{3 7 + 3 4 5 *+-}
2190 @result{} -13
2191 @kbd{3 7 + 3 4 5 * + - n}              @r{Note the unary minus, @samp{n}}
2192 @result{} 13
2193 @kbd{5 6 / 4 n +}
2194 @result{} -3.166666667
2195 @kbd{3 4 ^}                            @r{Exponentiation}
2196 @result{} 81
2197 @kbd{^D}                               @r{End-of-file indicator}
2198 $
2199 @end example
2200
2201 @node Infix Calc
2202 @section Infix Notation Calculator: @code{calc}
2203 @cindex infix notation calculator
2204 @cindex @code{calc}
2205 @cindex calculator, infix notation
2206
2207 We now modify rpcalc to handle infix operators instead of
2208 postfix.@footnote{A similar example, but using an unambiguous grammar rather
2209 than precedence and associativity annotations, is available as
2210 @file{examples/c/calc}.}  Infix
2211 notation involves the concept of operator precedence and the need for
2212 parentheses nested to arbitrary depth.  Here is the Bison code for
2213 @file{calc.y}, an infix desk-top calculator.
2214
2215 @example
2216 /* Infix notation calculator. */
2217
2218 @group
2219 %@{
2220   #include <math.h>
2221   #include <stdio.h>
2222   int yylex (void);
2223   void yyerror (char const *);
2224 %@}
2225 @end group
2226
2227 @group
2228 /* Bison declarations. */
2229 %define api.value.type @{double@}
2230 %token NUM
2231 %left '-' '+'
2232 %left '*' '/'
2233 %precedence NEG   /* negation--unary minus */
2234 %right '^'        /* exponentiation */
2235 @end group
2236
2237 %% /* The grammar follows. */
2238 @group
2239 input:
2240   %empty
2241 | input line
2242 ;
2243 @end group
2244
2245 @group
2246 line:
2247   '\n'
2248 | exp '\n'  @{ printf ("\t%.10g\n", $1); @}
2249 ;
2250 @end group
2251
2252 @group
2253 exp:
2254   NUM
2255 | exp '+' exp        @{ $$ = $1 + $3;      @}
2256 | exp '-' exp        @{ $$ = $1 - $3;      @}
2257 | exp '*' exp        @{ $$ = $1 * $3;      @}
2258 | exp '/' exp        @{ $$ = $1 / $3;      @}
2259 | '-' exp  %prec NEG @{ $$ = -$2;          @}
2260 | exp '^' exp        @{ $$ = pow ($1, $3); @}
2261 | '(' exp ')'        @{ $$ = $2;           @}
2262 ;
2263 @end group
2264 %%
2265 @end example
2266
2267 @noindent
2268 The functions @code{yylex}, @code{yyerror} and @code{main} can be the
2269 same as before.
2270
2271 There are two important new features shown in this code.
2272
2273 In the second section (Bison declarations), @code{%left} declares token
2274 kinds and says they are left-associative operators.  The declarations
2275 @code{%left} and @code{%right} (right associativity) take the place of
2276 @code{%token} which is used to declare a token kind name without
2277 associativity/precedence.  (These tokens are single-character literals,
2278 which ordinarily don't need to be declared.  We declare them here to specify
2279 the associativity/precedence.)
2280
2281 Operator precedence is determined by the line ordering of the
2282 declarations; the higher the line number of the declaration (lower on
2283 the page or screen), the higher the precedence.  Hence, exponentiation
2284 has the highest precedence, unary minus (@code{NEG}) is next, followed
2285 by @samp{*} and @samp{/}, and so on.  Unary minus is not associative,
2286 only precedence matters (@code{%precedence}. @xref{Precedence}.
2287
2288 The other important new feature is the @code{%prec} in the grammar
2289 section for the unary minus operator.  The @code{%prec} simply instructs
2290 Bison that the rule @samp{| '-' exp} has the same precedence as
2291 @code{NEG}---in this case the next-to-highest.  @xref{Contextual
2292 Precedence}.
2293
2294 Here is a sample run of @file{calc.y}:
2295
2296 @need 500
2297 @example
2298 $ @kbd{calc}
2299 @kbd{4 + 4.5 - (34/(8*3+-3))}
2300 6.880952381
2301 @kbd{-56 + 2}
2302 -54
2303 @kbd{3 ^ 2}
2304 9
2305 @end example
2306
2307 @node Simple Error Recovery
2308 @section Simple Error Recovery
2309 @cindex error recovery, simple
2310
2311 Up to this point, this manual has not addressed the issue of @dfn{error
2312 recovery}---how to continue parsing after the parser detects a syntax
2313 error.  All we have handled is error reporting with @code{yyerror}.
2314 Recall that by default @code{yyparse} returns after calling
2315 @code{yyerror}.  This means that an erroneous input line causes the
2316 calculator program to exit.  Now we show how to rectify this deficiency.
2317
2318 The Bison language itself includes the reserved word @code{error}, which
2319 may be included in the grammar rules.  In the example below it has
2320 been added to one of the alternatives for @code{line}:
2321
2322 @example
2323 @group
2324 line:
2325   '\n'
2326 | exp '\n'   @{ printf ("\t%.10g\n", $1); @}
2327 | error '\n' @{ yyerrok;                  @}
2328 ;
2329 @end group
2330 @end example
2331
2332 This addition to the grammar allows for simple error recovery in the
2333 event of a syntax error.  If an expression that cannot be evaluated is
2334 read, the error will be recognized by the third rule for @code{line},
2335 and parsing will continue.  (The @code{yyerror} function is still called
2336 upon to print its message as well.)  The action executes the statement
2337 @code{yyerrok}, a macro defined automatically by Bison; its meaning is
2338 that error recovery is complete (@pxref{Error Recovery}).  Note the
2339 difference between @code{yyerrok} and @code{yyerror}; neither one is a
2340 misprint.
2341
2342 This form of error recovery deals with syntax errors.  There are other
2343 kinds of errors; for example, division by zero, which raises an exception
2344 signal that is normally fatal.  A real calculator program must handle this
2345 signal and use @code{longjmp} to return to @code{main} and resume parsing
2346 input lines; it would also have to discard the rest of the current line of
2347 input.  We won't discuss this issue further because it is not specific to
2348 Bison programs.
2349
2350 @node Location Tracking Calc
2351 @section Location Tracking Calculator: @code{ltcalc}
2352 @cindex location tracking calculator
2353 @cindex @code{ltcalc}
2354 @cindex calculator, location tracking
2355
2356 This example extends the infix notation calculator with location
2357 tracking.  This feature will be used to improve the error messages.  For
2358 the sake of clarity, this example is a simple integer calculator, since
2359 most of the work needed to use locations will be done in the lexical
2360 analyzer.
2361
2362 @menu
2363 * Ltcalc Declarations::    Bison and C declarations for ltcalc.
2364 * Ltcalc Rules::           Grammar rules for ltcalc, with explanations.
2365 * Ltcalc Lexer::           The lexical analyzer.
2366 @end menu
2367
2368 @node Ltcalc Declarations
2369 @subsection Declarations for @code{ltcalc}
2370
2371 The C and Bison declarations for the location tracking calculator are
2372 the same as the declarations for the infix notation calculator.
2373
2374 @example
2375 /* Location tracking calculator. */
2376
2377 %@{
2378   #include <math.h>
2379   int yylex (void);
2380   void yyerror (char const *);
2381 %@}
2382
2383 /* Bison declarations. */
2384 %define api.value.type @{int@}
2385 %token NUM
2386
2387 %left '-' '+'
2388 %left '*' '/'
2389 %precedence NEG
2390 %right '^'
2391
2392 %% /* The grammar follows. */
2393 @end example
2394
2395 @noindent
2396 Note there are no declarations specific to locations.  Defining a data type
2397 for storing locations is not needed: we will use the type provided by
2398 default (@pxref{Location Type}), which is a four member structure with the
2399 following integer fields: @code{first_line}, @code{first_column},
2400 @code{last_line} and @code{last_column}.  By conventions, and in accordance
2401 with the GNU Coding Standards and common practice, the line and column count
2402 both start at 1.
2403
2404 @node Ltcalc Rules
2405 @subsection Grammar Rules for @code{ltcalc}
2406
2407 Whether handling locations or not has no effect on the syntax of your
2408 language.  Therefore, grammar rules for this example will be very close
2409 to those of the previous example: we will only modify them to benefit
2410 from the new information.
2411
2412 Here, we will use locations to report divisions by zero, and locate the
2413 wrong expressions or subexpressions.
2414
2415 @example
2416 @group
2417 input:
2418   %empty
2419 | input line
2420 ;
2421 @end group
2422
2423 @group
2424 line:
2425   '\n'
2426 | exp '\n' @{ printf ("%d\n", $1); @}
2427 ;
2428 @end group
2429
2430 @group
2431 exp:
2432   NUM
2433 | exp '+' exp   @{ $$ = $1 + $3; @}
2434 | exp '-' exp   @{ $$ = $1 - $3; @}
2435 | exp '*' exp   @{ $$ = $1 * $3; @}
2436 @end group
2437 @group
2438 | exp '/' exp
2439     @{
2440       if ($3)
2441         $$ = $1 / $3;
2442       else
2443         @{
2444           $$ = 1;
2445           fprintf (stderr, "%d.%d-%d.%d: division by zero",
2446                    @@3.first_line, @@3.first_column,
2447                    @@3.last_line, @@3.last_column);
2448         @}
2449     @}
2450 @end group
2451 @group
2452 | '-' exp %prec NEG     @{ $$ = -$2; @}
2453 | exp '^' exp           @{ $$ = pow ($1, $3); @}
2454 | '(' exp ')'           @{ $$ = $2; @}
2455 @end group
2456 @end example
2457
2458 This code shows how to reach locations inside of semantic actions, by
2459 using the pseudo-variables @code{@@@var{n}} for rule components, and the
2460 pseudo-variable @code{@@$} for groupings.
2461
2462 We don't need to assign a value to @code{@@$}: the output parser does it
2463 automatically.  By default, before executing the C code of each action,
2464 @code{@@$} is set to range from the beginning of @code{@@1} to the end of
2465 @code{@@@var{n}}, for a rule with @var{n} components.  This behavior can be
2466 redefined (@pxref{Location Default Action}), and for very specific rules,
2467 @code{@@$} can be computed by hand.
2468
2469 @node Ltcalc Lexer
2470 @subsection The @code{ltcalc} Lexical Analyzer.
2471
2472 Until now, we relied on Bison's defaults to enable location
2473 tracking.  The next step is to rewrite the lexical analyzer, and make it
2474 able to feed the parser with the token locations, as it already does for
2475 semantic values.
2476
2477 To this end, we must take into account every single character of the
2478 input text, to avoid the computed locations of being fuzzy or wrong:
2479
2480 @example
2481 @group
2482 int
2483 yylex (void)
2484 @{
2485   int c;
2486 @end group
2487
2488 @group
2489   /* Skip white space. */
2490   while ((c = getchar ()) == ' ' || c == '\t')
2491     ++yylloc.last_column;
2492 @end group
2493
2494 @group
2495   /* Step. */
2496   yylloc.first_line = yylloc.last_line;
2497   yylloc.first_column = yylloc.last_column;
2498 @end group
2499
2500 @group
2501   /* Process numbers. */
2502   if (isdigit (c))
2503     @{
2504       yylval = c - '0';
2505       ++yylloc.last_column;
2506       while (isdigit (c = getchar ()))
2507         @{
2508           ++yylloc.last_column;
2509           yylval = yylval * 10 + c - '0';
2510         @}
2511       ungetc (c, stdin);
2512       return NUM;
2513     @}
2514 @end group
2515
2516   /* Return end-of-input. */
2517   if (c == EOF)
2518     return YYEOF;
2519
2520 @group
2521   /* Return a single char, and update location. */
2522   if (c == '\n')
2523     @{
2524       ++yylloc.last_line;
2525       yylloc.last_column = 0;
2526     @}
2527   else
2528     ++yylloc.last_column;
2529   return c;
2530 @}
2531 @end group
2532 @end example
2533
2534 Basically, the lexical analyzer performs the same processing as before: it
2535 skips blanks and tabs, and reads numbers or single-character tokens.  In
2536 addition, it updates @code{yylloc}, the global variable (of type
2537 @code{YYLTYPE}) containing the token's location.
2538
2539 Now, each time this function returns a token, the parser has its kind as
2540 well as its semantic value, and its location in the text.  The last needed
2541 change is to initialize @code{yylloc}, for example in the controlling
2542 function:
2543
2544 @example
2545 @group
2546 int
2547 main (void)
2548 @{
2549   yylloc.first_line = yylloc.last_line = 1;
2550   yylloc.first_column = yylloc.last_column = 0;
2551   return yyparse ();
2552 @}
2553 @end group
2554 @end example
2555
2556 Remember that computing locations is not a matter of syntax.  Every
2557 character must be associated to a location update, whether it is in
2558 valid input, in comments, in literal strings, and so on.
2559
2560 @node Multi-function Calc
2561 @section Multi-Function Calculator: @code{mfcalc}
2562 @cindex multi-function calculator
2563 @cindex @code{mfcalc}
2564 @cindex calculator, multi-function
2565
2566 Now that the basics of Bison have been discussed, it is time to move on to a
2567 more advanced problem.@footnote{The sources of @command{mfcalc} are
2568 available as @file{examples/c/mfcalc}.}  The above calculators provided only
2569 five functions, @samp{+}, @samp{-}, @samp{*}, @samp{/} and @samp{^}.  It
2570 would be nice to have a calculator that provides other mathematical
2571 functions such as @code{sin}, @code{cos}, etc.
2572
2573 It is easy to add new operators to the infix calculator as long as they are
2574 only single-character literals.  The lexical analyzer @code{yylex} passes
2575 back all nonnumeric characters as tokens, so new grammar rules suffice for
2576 adding a new operator.  But we want something more flexible: built-in
2577 functions whose syntax has this form:
2578
2579 @example
2580 @var{function_name} (@var{argument})
2581 @end example
2582
2583 @noindent
2584 At the same time, we will add memory to the calculator, by allowing you
2585 to create named variables, store values in them, and use them later.
2586 Here is a sample session with the multi-function calculator:
2587
2588 @example
2589 @group
2590 $ @kbd{mfcalc}
2591 @kbd{pi = 3.141592653589}
2592 @result{} 3.1415926536
2593 @end group
2594 @group
2595 @kbd{sin(pi)}
2596 @result{} 0.0000000000
2597 @end group
2598 @kbd{alpha = beta1 = 2.3}
2599 @result{} 2.3000000000
2600 @kbd{alpha}
2601 @result{} 2.3000000000
2602 @kbd{ln(alpha)}
2603 @result{} 0.8329091229
2604 @kbd{exp(ln(beta1))}
2605 @result{} 2.3000000000
2606 $
2607 @end example
2608
2609 Note that multiple assignment and nested function calls are permitted.
2610
2611 @menu
2612 * Mfcalc Declarations::    Bison declarations for multi-function calculator.
2613 * Mfcalc Rules::           Grammar rules for the calculator.
2614 * Mfcalc Symbol Table::    Symbol table management subroutines.
2615 * Mfcalc Lexer::           The lexical analyzer.
2616 * Mfcalc Main::            The controlling function.
2617 @end menu
2618
2619 @node Mfcalc Declarations
2620 @subsection Declarations for @code{mfcalc}
2621
2622 Here are the C and Bison declarations for the multi-function
2623 calculator.
2624
2625 @ignore
2626 @comment file: c/mfcalc/mfcalc.y
2627 @example
2628 /* Parser for mfcalc.   -*- C -*-
2629
2630    Copyright (C) 1988-1993, 1995, 1998-2015, 2018-2021 Free Software
2631    Foundation, Inc.
2632
2633    This file is part of Bison, the GNU Compiler Compiler.
2634
2635    This program is free software: you can redistribute it and/or modify
2636    it under the terms of the GNU General Public License as published by
2637    the Free Software Foundation, either version 3 of the License, or
2638    (at your option) any later version.
2639
2640    This program is distributed in the hope that it will be useful,
2641    but WITHOUT ANY WARRANTY; without even the implied warranty of
2642    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2643    GNU General Public License for more details.
2644
2645    You should have received a copy of the GNU General Public License
2646    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
2647 @end example
2648 @end ignore
2649
2650 @comment file: c/mfcalc/mfcalc.y: 1
2651 @example
2652 @group
2653 %@{
2654   #include <stdio.h>  /* For printf, etc. */
2655   #include <math.h>   /* For pow, used in the grammar. */
2656   #include "calc.h"   /* Contains definition of 'symrec'. */
2657   int yylex (void);
2658   void yyerror (char const *);
2659 %@}
2660 @end group
2661
2662 %define api.value.type union /* Generate YYSTYPE from these types: */
2663 %token <double>  NUM     /* Double precision number. */
2664 %token <symrec*> VAR FUN /* Symbol table pointer: variable/function. */
2665 %nterm <double>  exp
2666
2667 @group
2668 %precedence '='
2669 %left '-' '+'
2670 %left '*' '/'
2671 %precedence NEG /* negation--unary minus */
2672 %right '^'      /* exponentiation */
2673 @end group
2674 @end example
2675
2676 The above grammar introduces only two new features of the Bison language.
2677 These features allow semantic values to have various data types
2678 (@pxref{Multiple Types}).
2679
2680 The special @code{union} value assigned to the @code{%define} variable
2681 @code{api.value.type} specifies that the symbols are defined with their data
2682 types.  Bison will generate an appropriate definition of @code{YYSTYPE} to
2683 store these values.
2684
2685 Since values can now have various types, it is necessary to associate a type
2686 with each grammar symbol whose semantic value is used.  These symbols are
2687 @code{NUM}, @code{VAR}, @code{FUN}, and @code{exp}.  Their declarations are
2688 augmented with their data type (placed between angle brackets).  For
2689 instance, values of @code{NUM} are stored in @code{double}.
2690
2691 The Bison construct @code{%nterm} is used for declaring nonterminal symbols,
2692 just as @code{%token} is used for declaring token kinds.  Previously we did
2693 not use @code{%nterm} before because nonterminal symbols are normally
2694 declared implicitly by the rules that define them.  But @code{exp} must be
2695 declared explicitly so we can specify its value type.  @xref{Type Decl}.
2696
2697 @node Mfcalc Rules
2698 @subsection Grammar Rules for @code{mfcalc}
2699
2700 Here are the grammar rules for the multi-function calculator.
2701 Most of them are copied directly from @code{calc}; three rules,
2702 those which mention @code{VAR} or @code{FUN}, are new.
2703
2704 @comment file: c/mfcalc/mfcalc.y: 3
2705 @example
2706 %% /* The grammar follows. */
2707 @group
2708 input:
2709   %empty
2710 | input line
2711 ;
2712 @end group
2713
2714 @group
2715 line:
2716   '\n'
2717 | exp '\n'   @{ printf ("%.10g\n", $1); @}
2718 | error '\n' @{ yyerrok;                @}
2719 ;
2720 @end group
2721
2722 @group
2723 exp:
2724   NUM
2725 | VAR                @{ $$ = $1->value.var;              @}
2726 | VAR '=' exp        @{ $$ = $3; $1->value.var = $3;     @}
2727 | FUN '(' exp ')'    @{ $$ = $1->value.fun ($3);         @}
2728 | exp '+' exp        @{ $$ = $1 + $3;                    @}
2729 | exp '-' exp        @{ $$ = $1 - $3;                    @}
2730 | exp '*' exp        @{ $$ = $1 * $3;                    @}
2731 | exp '/' exp        @{ $$ = $1 / $3;                    @}
2732 | '-' exp  %prec NEG @{ $$ = -$2;                        @}
2733 | exp '^' exp        @{ $$ = pow ($1, $3);               @}
2734 | '(' exp ')'        @{ $$ = $2;                         @}
2735 ;
2736 @end group
2737 /* End of grammar. */
2738 %%
2739 @end example
2740
2741 @node Mfcalc Symbol Table
2742 @subsection The @code{mfcalc} Symbol Table
2743 @cindex symbol table example
2744
2745 The multi-function calculator requires a symbol table to keep track of the
2746 names and meanings of variables and functions.  This doesn't affect the
2747 grammar rules (except for the actions) or the Bison declarations, but it
2748 requires some additional C functions for support.
2749
2750 The symbol table itself consists of a linked list of records.  Its
2751 definition, which is kept in the header @file{calc.h}, is as follows.  It
2752 provides for either functions or variables to be placed in the table.
2753
2754 @ignore
2755 @comment file: c/mfcalc/calc.h
2756 @example
2757 /* Functions for mfcalc.   -*- C -*-
2758
2759    Copyright (C) 1988-1993, 1995, 1998-2015, 2018-2021 Free Software
2760    Foundation, Inc.
2761
2762    This file is part of Bison, the GNU Compiler Compiler.
2763
2764    This program is free software: you can redistribute it and/or modify
2765    it under the terms of the GNU General Public License as published by
2766    the Free Software Foundation, either version 3 of the License, or
2767    (at your option) any later version.
2768
2769    This program is distributed in the hope that it will be useful,
2770    but WITHOUT ANY WARRANTY; without even the implied warranty of
2771    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2772    GNU General Public License for more details.
2773
2774    You should have received a copy of the GNU General Public License
2775    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
2776 @end example
2777 @end ignore
2778
2779 @comment file: c/mfcalc/calc.h
2780 @example
2781 @group
2782 /* Function type. */
2783 typedef double (func_t) (double);
2784 @end group
2785
2786 @group
2787 /* Data type for links in the chain of symbols. */
2788 struct symrec
2789 @{
2790   char *name;  /* name of symbol */
2791   int type;    /* type of symbol: either VAR or FUN */
2792   union
2793   @{
2794     double var;    /* value of a VAR */
2795     func_t *fun;   /* value of a FUN */
2796   @} value;
2797   struct symrec *next;  /* link field */
2798 @};
2799 @end group
2800
2801 @group
2802 typedef struct symrec symrec;
2803
2804 /* The symbol table: a chain of 'struct symrec'. */
2805 extern symrec *sym_table;
2806
2807 symrec *putsym (char const *name, int sym_type);
2808 symrec *getsym (char const *name);
2809 @end group
2810 @end example
2811
2812 The new version of @code{main} will call @code{init_table} to initialize
2813 the symbol table:
2814
2815 @comment file: c/mfcalc/mfcalc.y: 3
2816 @example
2817 @group
2818 struct init
2819 @{
2820   char const *name;
2821   func_t *fun;
2822 @};
2823 @end group
2824
2825 @group
2826 struct init const funs[] =
2827 @{
2828   @{ "atan", atan @},
2829   @{ "cos",  cos  @},
2830   @{ "exp",  exp  @},
2831   @{ "ln",   log  @},
2832   @{ "sin",  sin  @},
2833   @{ "sqrt", sqrt @},
2834   @{ 0, 0 @},
2835 @};
2836 @end group
2837
2838 @group
2839 /* The symbol table: a chain of 'struct symrec'. */
2840 symrec *sym_table;
2841 @end group
2842
2843 @group
2844 /* Put functions in table. */
2845 static void
2846 init_table (void)
2847 @end group
2848 @group
2849 @{
2850   for (int i = 0; funs[i].name; i++)
2851     @{
2852       symrec *ptr = putsym (funs[i].name, FUN);
2853       ptr->value.fun = funs[i].fun;
2854     @}
2855 @}
2856 @end group
2857 @end example
2858
2859 By simply editing the initialization list and adding the necessary include
2860 files, you can add additional functions to the calculator.
2861
2862 Two important functions allow look-up and installation of symbols in the
2863 symbol table.  The function @code{putsym} is passed a name and the kind
2864 (@code{VAR} or @code{FUN}) of the object to be installed.  The object is
2865 linked to the front of the list, and a pointer to the object is returned.
2866 The function @code{getsym} is passed the name of the symbol to look up.  If
2867 found, a pointer to that symbol is returned; otherwise zero is returned.
2868
2869 @comment file: c/mfcalc/mfcalc.y: 3
2870 @example
2871 @group
2872 /* The mfcalc code assumes that malloc and realloc
2873    always succeed, and that integer calculations
2874    never overflow.  Production-quality code should
2875    not make these assumptions.  */
2876 #include <assert.h>
2877 #include <stdlib.h> /* malloc, realloc. */
2878 #include <string.h> /* strlen. */
2879 @end group
2880
2881 @group
2882 symrec *
2883 putsym (char const *name, int sym_type)
2884 @{
2885   symrec *res = (symrec *) malloc (sizeof (symrec));
2886   res->name = strdup (name);
2887   res->type = sym_type;
2888   res->value.var = 0; /* Set value to 0 even if fun. */
2889   res->next = sym_table;
2890   sym_table = res;
2891   return res;
2892 @}
2893 @end group
2894
2895 @group
2896 symrec *
2897 getsym (char const *name)
2898 @{
2899   for (symrec *p = sym_table; p; p = p->next)
2900     if (strcmp (p->name, name) == 0)
2901       return p;
2902   return NULL;
2903 @}
2904 @end group
2905 @end example
2906
2907 @node Mfcalc Lexer
2908 @subsection The @code{mfcalc} Lexer
2909
2910 The function @code{yylex} must now recognize variables, numeric values, and
2911 the single-character arithmetic operators.  Strings of alphanumeric
2912 characters with a leading letter are recognized as either variables or
2913 functions depending on what the symbol table says about them.
2914
2915 The string is passed to @code{getsym} for look up in the symbol table.  If
2916 the name appears in the table, a pointer to its location and its type
2917 (@code{VAR} or @code{FUN}) is returned to @code{yyparse}.  If it is not
2918 already in the table, then it is installed as a @code{VAR} using
2919 @code{putsym}.  Again, a pointer and its type (which must be @code{VAR}) is
2920 returned to @code{yyparse}.
2921
2922 No change is needed in the handling of numeric values and arithmetic
2923 operators in @code{yylex}.
2924
2925 @comment file: c/mfcalc/mfcalc.y: 3
2926 @example
2927 #include <ctype.h>
2928 #include <stddef.h>
2929
2930 @group
2931 int
2932 yylex (void)
2933 @{
2934   int c = getchar ();
2935
2936   /* Ignore white space, get first nonwhite character. */
2937   while (c == ' ' || c == '\t')
2938     c = getchar ();
2939
2940   if (c == EOF)
2941     return YYEOF;
2942 @end group
2943
2944 @group
2945   /* Char starts a number => parse the number. */
2946   if (c == '.' || isdigit (c))
2947     @{
2948       ungetc (c, stdin);
2949       if (scanf ("%lf", &yylval.NUM) != 1)
2950         abort ();
2951       return NUM;
2952     @}
2953 @end group
2954 @end example
2955
2956 @noindent
2957 Bison generated a definition of @code{YYSTYPE} with a member named
2958 @code{NUM} to store value of @code{NUM} symbols.
2959
2960 @comment file: c/mfcalc/mfcalc.y: 3
2961 @example
2962 @group
2963   /* Char starts an identifier => read the name. */
2964   if (isalpha (c))
2965     @{
2966       static ptrdiff_t bufsize = 0;
2967       static char *symbuf = 0;
2968 @end group
2969       ptrdiff_t i = 0;
2970       do
2971 @group
2972         @{
2973           /* If buffer is full, make it bigger. */
2974           if (bufsize <= i)
2975             @{
2976               bufsize = 2 * bufsize + 40;
2977               symbuf = realloc (symbuf, (size_t) bufsize);
2978             @}
2979           /* Add this character to the buffer. */
2980           symbuf[i++] = (char) c;
2981           /* Get another character. */
2982           c = getchar ();
2983         @}
2984 @end group
2985 @group
2986       while (isalnum (c));
2987
2988       ungetc (c, stdin);
2989       symbuf[i] = '\0';
2990 @end group
2991
2992 @group
2993       symrec *s = getsym (symbuf);
2994       if (!s)
2995         s = putsym (symbuf, VAR);
2996       yylval.VAR = s; /* or yylval.FUN = s. */
2997       return s->type;
2998     @}
2999
3000   /* Any other character is a token by itself. */
3001   return c;
3002 @}
3003 @end group
3004 @end example
3005
3006 @node Mfcalc Main
3007 @subsection The @code{mfcalc} Main
3008
3009 The error reporting function is unchanged, and the new version of
3010 @code{main} includes a call to @code{init_table} and sets the @code{yydebug}
3011 on user demand (@xref{Tracing}, for details):
3012
3013 @comment file: c/mfcalc/mfcalc.y: 3
3014 @example
3015 @group
3016 /* Called by yyparse on error. */
3017 void yyerror (char const *s)
3018 @{
3019   fprintf (stderr, "%s\n", s);
3020 @}
3021 @end group
3022
3023 @group
3024 int main (int argc, char const* argv[])
3025 @end group
3026 @group
3027 @{
3028   /* Enable parse traces on option -p. */
3029   if (argc == 2 && strcmp(argv[1], "-p") == 0)
3030     yydebug = 1;
3031 @end group
3032 @group
3033   init_table ();
3034   return yyparse ();
3035 @}
3036 @end group
3037 @end example
3038
3039 This program is both powerful and flexible.  You may easily add new
3040 functions, and it is a simple job to modify this code to install
3041 predefined variables such as @code{pi} or @code{e} as well.
3042
3043 @node Exercises
3044 @section Exercises
3045 @cindex exercises
3046
3047 @enumerate
3048 @item
3049 Add some new functions from @file{math.h} to the initialization list.
3050
3051 @item
3052 Add another array that contains constants and their values.  Then modify
3053 @code{init_table} to add these constants to the symbol table.  It will be
3054 easiest to give the constants type @code{VAR}.
3055
3056 @item
3057 Make the program report an error if the user refers to an uninitialized
3058 variable in any way except to store a value in it.
3059 @end enumerate
3060
3061 @node Grammar File
3062 @chapter Bison Grammar Files
3063
3064 Bison takes as input a context-free grammar specification and produces a
3065 C-language function that recognizes correct instances of the grammar.
3066
3067 The Bison grammar file conventionally has a name ending in @samp{.y}.
3068 @xref{Invocation}.
3069
3070 @menu
3071 * Grammar Outline::    Overall layout of the grammar file.
3072 * Symbols::            Terminal and nonterminal symbols.
3073 * Rules::              How to write grammar rules.
3074 * Semantics::          Semantic values and actions.
3075 * Tracking Locations:: Locations and actions.
3076 * Named References::   Using named references in actions.
3077 * Declarations::       All kinds of Bison declarations are described here.
3078 * Multiple Parsers::   Putting more than one Bison parser in one program.
3079 @end menu
3080
3081 @node Grammar Outline
3082 @section Outline of a Bison Grammar
3083 @cindex comment
3084 @findex // @dots{}
3085 @findex /* @dots{} */
3086
3087 A Bison grammar file has four main sections, shown here with the
3088 appropriate delimiters:
3089
3090 @example
3091 %@{
3092   @var{Prologue}
3093 %@}
3094
3095 @var{Bison declarations}
3096
3097 %%
3098 @var{Grammar rules}
3099 %%
3100
3101 @var{Epilogue}
3102 @end example
3103
3104 Comments enclosed in @samp{/* @dots{} */} may appear in any of the sections.
3105 As a GNU extension, @samp{//} introduces a comment that continues until end
3106 of line.
3107
3108 @menu
3109 * Prologue::              Syntax and usage of the prologue.
3110 * Prologue Alternatives:: Syntax and usage of alternatives to the prologue.
3111 * Bison Declarations::    Syntax and usage of the Bison declarations section.
3112 * Grammar Rules::         Syntax and usage of the grammar rules section.
3113 * Epilogue::              Syntax and usage of the epilogue.
3114 @end menu
3115
3116 @node Prologue
3117 @subsection The prologue
3118 @cindex declarations section
3119 @cindex Prologue
3120 @cindex declarations
3121
3122 The @var{Prologue} section contains macro definitions and declarations of
3123 functions and variables that are used in the actions in the grammar rules.
3124 These are copied to the beginning of the parser implementation file so that
3125 they precede the definition of @code{yyparse}.  You can use @samp{#include}
3126 to get the declarations from a header file.  If you don't need any C
3127 declarations, you may omit the @samp{%@{} and @samp{%@}} delimiters that
3128 bracket this section.
3129
3130 The @var{Prologue} section is terminated by the first occurrence of
3131 @samp{%@}} that is outside a comment, a string literal, or a character
3132 constant.
3133
3134 You may have more than one @var{Prologue} section, intermixed with the
3135 @var{Bison declarations}.  This allows you to have C and Bison declarations
3136 that refer to each other.  For example, the @code{%union} declaration may
3137 use types defined in a header file, and you may wish to prototype functions
3138 that take arguments of type @code{YYSTYPE}.  This can be done with two
3139 @var{Prologue} blocks, one before and one after the @code{%union}
3140 declaration.
3141
3142 @example
3143 @group
3144 %@{
3145   #define _GNU_SOURCE
3146   #include <stdio.h>
3147   #include "ptypes.h"
3148 %@}
3149 @end group
3150
3151 @group
3152 %union @{
3153   long n;
3154   tree t;  /* @r{@code{tree} is defined in @file{ptypes.h}.} */
3155 @}
3156 @end group
3157
3158 @group
3159 %@{
3160   static void print_token (yytoken_kind_t token, YYSTYPE val);
3161 %@}
3162 @end group
3163
3164 @dots{}
3165 @end example
3166
3167 When in doubt, it is usually safer to put prologue code before all Bison
3168 declarations, rather than after.  For example, any definitions of feature
3169 test macros like @code{_GNU_SOURCE} or @code{_POSIX_C_SOURCE} should appear
3170 before all Bison declarations, as feature test macros can affect the
3171 behavior of Bison-generated @code{#include} directives.
3172
3173 @node Prologue Alternatives
3174 @subsection Prologue Alternatives
3175 @cindex Prologue Alternatives
3176
3177 @findex %code
3178 @findex %code requires
3179 @findex %code provides
3180 @findex %code top
3181
3182 The functionality of @var{Prologue} sections can often be subtle and
3183 inflexible.  As an alternative, Bison provides a @code{%code} directive with
3184 an explicit qualifier field, which identifies the purpose of the code and
3185 thus the location(s) where Bison should generate it.  For C/C++, the
3186 qualifier can be omitted for the default location, or it can be one of
3187 @code{requires}, @code{provides}, @code{top}.  @xref{%code Summary}.
3188
3189 Look again at the example of the previous section:
3190
3191 @example
3192 @group
3193 %@{
3194   #define _GNU_SOURCE
3195   #include <stdio.h>
3196   #include "ptypes.h"
3197 %@}
3198 @end group
3199
3200 @group
3201 %union @{
3202   long n;
3203   tree t;  /* @r{@code{tree} is defined in @file{ptypes.h}.} */
3204 @}
3205 @end group
3206
3207 @group
3208 %@{
3209   static void print_token (yytoken_kind_t token, YYSTYPE val);
3210 %@}
3211 @end group
3212
3213 @dots{}
3214 @end example
3215
3216 @noindent
3217 Notice that there are two @var{Prologue} sections here, but there's a subtle
3218 distinction between their functionality.  For example, if you decide to
3219 override Bison's default definition for @code{YYLTYPE}, in which
3220 @var{Prologue} section should you write your new
3221 definition?@footnote{However, defining @code{YYLTYPE} via a C macro is not
3222 the recommended way.  @xref{Location Type}}
3223 You should
3224 write it in the first since Bison will insert that code into the parser
3225 implementation file @emph{before} the default @code{YYLTYPE} definition.  In
3226 which @var{Prologue} section should you prototype an internal function,
3227 @code{trace_token}, that accepts @code{YYLTYPE} and @code{yytoken_kind_t} as
3228 arguments?  You should prototype it in the second since Bison will insert
3229 that code @emph{after} the @code{YYLTYPE} and @code{yytoken_kind_t}
3230 definitions.
3231
3232 This distinction in functionality between the two @var{Prologue} sections is
3233 established by the appearance of the @code{%union} between them.  This
3234 behavior raises a few questions.  First, why should the position of a
3235 @code{%union} affect definitions related to @code{YYLTYPE} and
3236 @code{yytoken_kind_t}?  Second, what if there is no @code{%union}?  In that
3237 case, the second kind of @var{Prologue} section is not available.  This
3238 behavior is not intuitive.
3239
3240 To avoid this subtle @code{%union} dependency, rewrite the example using a
3241 @code{%code top} and an unqualified @code{%code}.  Let's go ahead and add
3242 the new @code{YYLTYPE} definition and the @code{trace_token} prototype at
3243 the same time:
3244
3245 @example
3246 %code top @{
3247   #define _GNU_SOURCE
3248   #include <stdio.h>
3249
3250   /* WARNING: The following code really belongs
3251    * in a '%code requires'; see below. */
3252
3253   #include "ptypes.h"
3254   #define YYLTYPE YYLTYPE
3255   typedef struct YYLTYPE
3256   @{
3257     int first_line;
3258     int first_column;
3259     int last_line;
3260     int last_column;
3261     char *filename;
3262   @} YYLTYPE;
3263 @}
3264
3265 @group
3266 %union @{
3267   long n;
3268   tree t;  /* @r{@code{tree} is defined in @file{ptypes.h}.} */
3269 @}
3270 @end group
3271
3272 @group
3273 %code @{
3274   static void print_token (yytoken_kind_t token, YYSTYPE val);
3275   static void trace_token (yytoken_kind_t token, YYLTYPE loc);
3276 @}
3277 @end group
3278
3279 @dots{}
3280 @end example
3281
3282 @noindent
3283 In this way, @code{%code top} and the unqualified @code{%code} achieve the
3284 same functionality as the two kinds of @var{Prologue} sections, but it's
3285 always explicit which kind you intend.  Moreover, both kinds are always
3286 available even in the absence of @code{%union}.
3287
3288 The @code{%code top} block above logically contains two parts.  The first
3289 two lines before the warning need to appear near the top of the parser
3290 implementation file.  The first line after the warning is required by
3291 @code{YYSTYPE} and thus also needs to appear in the parser implementation
3292 file.  However, if you've instructed Bison to generate a parser header file
3293 (@pxref{Decl Summary}), you probably want that line to appear
3294 before the @code{YYSTYPE} definition in that header file as well.  The
3295 @code{YYLTYPE} definition should also appear in the parser header file to
3296 override the default @code{YYLTYPE} definition there.
3297
3298 In other words, in the @code{%code top} block above, all but the first two
3299 lines are dependency code required by the @code{YYSTYPE} and @code{YYLTYPE}
3300 definitions.
3301 Thus, they belong in one or more @code{%code requires}:
3302
3303 @example
3304 @group
3305 %code top @{
3306   #define _GNU_SOURCE
3307   #include <stdio.h>
3308 @}
3309 @end group
3310
3311 @group
3312 %code requires @{
3313   #include "ptypes.h"
3314 @}
3315 @end group
3316 @group
3317 %union @{
3318   long n;
3319   tree t;  /* @r{@code{tree} is defined in @file{ptypes.h}.} */
3320 @}
3321 @end group
3322
3323 @group
3324 %code requires @{
3325   #define YYLTYPE YYLTYPE
3326   typedef struct YYLTYPE
3327   @{
3328     int first_line;
3329     int first_column;
3330     int last_line;
3331     int last_column;
3332     char *filename;
3333   @} YYLTYPE;
3334 @}
3335 @end group
3336
3337 @group
3338 %code @{
3339   static void print_token (yytoken_kind_t token, YYSTYPE val);
3340   static void trace_token (yytoken_kind_t token, YYLTYPE loc);
3341 @}
3342 @end group
3343
3344 @dots{}
3345 @end example
3346
3347 @noindent
3348 Now Bison will insert @code{#include "ptypes.h"} and the new @code{YYLTYPE}
3349 definition before the Bison-generated @code{YYSTYPE} and @code{YYLTYPE}
3350 definitions in both the parser implementation file and the parser header
3351 file.  (By the same reasoning, @code{%code requires} would also be the
3352 appropriate place to write your own definition for @code{YYSTYPE}.)
3353
3354 When you are writing dependency code for @code{YYSTYPE} and @code{YYLTYPE},
3355 you should prefer @code{%code requires} over @code{%code top} regardless of
3356 whether you instruct Bison to generate a parser header file.  When you are
3357 writing code that you need Bison to insert only into the parser
3358 implementation file and that has no special need to appear at the top of
3359 that file, you should prefer the unqualified @code{%code} over @code{%code
3360 top}.  These practices will make the purpose of each block of your code
3361 explicit to Bison and to other developers reading your grammar file.
3362 Following these practices, we expect the unqualified @code{%code} and
3363 @code{%code requires} to be the most important of the four @var{Prologue}
3364 alternatives.
3365
3366 At some point while developing your parser, you might decide to provide
3367 @code{trace_token} to modules that are external to your parser.  Thus, you
3368 might wish for Bison to insert the prototype into both the parser header
3369 file and the parser implementation file.  Since this function is not a
3370 dependency required by @code{YYSTYPE} or @code{YYLTYPE}, it doesn't make
3371 sense to move its prototype to a @code{%code requires}.  More importantly,
3372 since it depends upon @code{YYLTYPE} and @code{yytoken_kind_t}, @code{%code
3373 requires} is not sufficient.  Instead, move its prototype from the
3374 unqualified @code{%code} to a @code{%code provides}:
3375
3376 @example
3377 @group
3378 %code top @{
3379   #define _GNU_SOURCE
3380   #include <stdio.h>
3381 @}
3382 @end group
3383
3384 @group
3385 %code requires @{
3386   #include "ptypes.h"
3387 @}
3388 @end group
3389 @group
3390 %union @{
3391   long n;
3392   tree t;  /* @r{@code{tree} is defined in @file{ptypes.h}.} */
3393 @}
3394 @end group
3395
3396 @group
3397 %code requires @{
3398   #define YYLTYPE YYLTYPE
3399   typedef struct YYLTYPE
3400   @{
3401     int first_line;
3402     int first_column;
3403     int last_line;
3404     int last_column;
3405     char *filename;
3406   @} YYLTYPE;
3407 @}
3408 @end group
3409
3410 @group
3411 %code provides @{
3412   void trace_token (yytoken_kind_t token, YYLTYPE loc);
3413 @}
3414 @end group
3415
3416 @group
3417 %code @{
3418   static void print_token (FILE *file, int token, YYSTYPE val);
3419 @}
3420 @end group
3421
3422 @dots{}
3423 @end example
3424
3425 @noindent
3426 Bison will insert the @code{trace_token} prototype into both the parser
3427 header file and the parser implementation file after the definitions for
3428 @code{yytoken_kind_t}, @code{YYLTYPE}, and @code{YYSTYPE}.
3429
3430 The above examples are careful to write directives in an order that reflects
3431 the layout of the generated parser implementation and header files:
3432 @code{%code top}, @code{%code requires}, @code{%code provides}, and then
3433 @code{%code}.  While your grammar files may generally be easier to read if
3434 you also follow this order, Bison does not require it.  Instead, Bison lets
3435 you choose an organization that makes sense to you.
3436
3437 You may declare any of these directives multiple times in the grammar file.
3438 In that case, Bison concatenates the contained code in declaration order.
3439 This is the only way in which the position of one of these directives within
3440 the grammar file affects its functionality.
3441
3442 The result of the previous two properties is greater flexibility in how you may
3443 organize your grammar file.
3444 For example, you may organize semantic-type-related directives by semantic
3445 type:
3446
3447 @example
3448 @group
3449 %code requires @{ #include "type1.h" @}
3450 %union @{ type1 field1; @}
3451 %destructor @{ type1_free ($$); @} <field1>
3452 %printer @{ type1_print (yyo, $$); @} <field1>
3453 @end group
3454
3455 @group
3456 %code requires @{ #include "type2.h" @}
3457 %union @{ type2 field2; @}
3458 %destructor @{ type2_free ($$); @} <field2>
3459 %printer @{ type2_print (yyo, $$); @} <field2>
3460 @end group
3461 @end example
3462
3463 @noindent
3464 You could even place each of the above directive groups in the rules section of
3465 the grammar file next to the set of rules that uses the associated semantic
3466 type.
3467 (In the rules section, you must terminate each of those directives with a
3468 semicolon.)
3469 And you don't have to worry that some directive (like a @code{%union}) in the
3470 definitions section is going to adversely affect their functionality in some
3471 counter-intuitive manner just because it comes first.
3472 Such an organization is not possible using @var{Prologue} sections.
3473
3474 This section has been concerned with explaining the advantages of the four
3475 @var{Prologue} alternatives over the original Yacc @var{Prologue}.
3476 However, in most cases when using these directives, you shouldn't need to
3477 think about all the low-level ordering issues discussed here.
3478 Instead, you should simply use these directives to label each block of your
3479 code according to its purpose and let Bison handle the ordering.
3480 @code{%code} is the most generic label.
3481 Move code to @code{%code requires}, @code{%code provides}, or @code{%code top}
3482 as needed.
3483
3484 @node Bison Declarations
3485 @subsection The Bison Declarations Section
3486 @cindex Bison declarations (introduction)
3487 @cindex declarations, Bison (introduction)
3488
3489 The @var{Bison declarations} section contains declarations that define
3490 terminal and nonterminal symbols, specify precedence, and so on.
3491 In some simple grammars you may not need any declarations.
3492 @xref{Declarations}.
3493
3494 @node Grammar Rules
3495 @subsection The Grammar Rules Section
3496 @cindex grammar rules section
3497 @cindex rules section for grammar
3498
3499 The @dfn{grammar rules} section contains one or more Bison grammar
3500 rules, and nothing else.  @xref{Rules}.
3501
3502 There must always be at least one grammar rule, and the first
3503 @samp{%%} (which precedes the grammar rules) may never be omitted even
3504 if it is the first thing in the file.
3505
3506 @node Epilogue
3507 @subsection The epilogue
3508 @cindex additional C code section
3509 @cindex epilogue
3510 @cindex C code, section for additional
3511
3512 The @var{Epilogue} is copied verbatim to the end of the parser
3513 implementation file, just as the @var{Prologue} is copied to the
3514 beginning.  This is the most convenient place to put anything that you
3515 want to have in the parser implementation file but which need not come
3516 before the definition of @code{yyparse}.  For example, the definitions
3517 of @code{yylex} and @code{yyerror} often go here.  Because C requires
3518 functions to be declared before being used, you often need to declare
3519 functions like @code{yylex} and @code{yyerror} in the Prologue, even
3520 if you define them in the Epilogue.  @xref{Interface}.
3521
3522 If the last section is empty, you may omit the @samp{%%} that separates it
3523 from the grammar rules.
3524
3525 The Bison parser itself contains many macros and identifiers whose names
3526 start with @samp{yy} or @samp{YY}, so it is a good idea to avoid using
3527 any such names (except those documented in this manual) in the epilogue
3528 of the grammar file.
3529
3530 @node Symbols
3531 @section Symbols, Terminal and Nonterminal
3532 @cindex nonterminal symbol
3533 @cindex terminal symbol
3534 @cindex token kind
3535 @cindex symbol
3536
3537 @dfn{Symbols} in Bison grammars represent the grammatical classifications
3538 of the language.
3539
3540 A @dfn{terminal symbol} (also known as a @dfn{token kind}) represents a
3541 class of syntactically equivalent tokens.  You use the symbol in grammar
3542 rules to mean that a token in that class is allowed.  The symbol is
3543 represented in the Bison parser by a numeric code, and the @code{yylex}
3544 function returns a token kind code to indicate what kind of token has been
3545 read.  You don't need to know what the code value is; you can use the symbol
3546 to stand for it.
3547
3548 A @dfn{nonterminal symbol} stands for a class of syntactically
3549 equivalent groupings.  The symbol name is used in writing grammar rules.
3550 By convention, it should be all lower case.
3551
3552 Symbol names can contain letters, underscores, periods, and non-initial
3553 digits and dashes.  Dashes in symbol names are a GNU extension, incompatible
3554 with POSIX Yacc.  Periods and dashes make symbol names less convenient to
3555 use with named references, which require brackets around such names
3556 (@pxref{Named References}).  Terminal symbols that contain periods or dashes
3557 make little sense: since they are not valid symbols (in most programming
3558 languages) they are not exported as token names.
3559
3560 There are three ways of writing terminal symbols in the grammar:
3561
3562 @itemize @bullet
3563 @item
3564 A @dfn{named token kind} is written with an identifier, like an identifier
3565 in C@.  By convention, it should be all upper case.  Each such name must be
3566 defined with a Bison declaration such as @code{%token}.  @xref{Token Decl}.
3567
3568 @item
3569 @cindex character token
3570 @cindex literal token
3571 @cindex single-character literal
3572 A @dfn{character token kind} (or @dfn{literal character token}) is written
3573 in the grammar using the same syntax used in C for character constants; for
3574 example, @code{'+'} is a character token kind.  A character token kind
3575 doesn't need to be declared unless you need to specify its semantic value
3576 data type (@pxref{Value Type}), associativity, or precedence
3577 (@pxref{Precedence}).
3578
3579 By convention, a character token kind is used only to represent a token that
3580 consists of that particular character.  Thus, the token kind @code{'+'} is
3581 used to represent the character @samp{+} as a token.  Nothing enforces this
3582 convention, but if you depart from it, your program will confuse other
3583 readers.
3584
3585 All the usual escape sequences used in character literals in C can be used
3586 in Bison as well, but you must not use the null character as a character
3587 literal because its numeric code, zero, signifies end-of-input
3588 (@pxref{Calling Convention}).  Also, unlike standard C, trigraphs have no
3589 special meaning in Bison character literals, nor is backslash-newline
3590 allowed.
3591
3592 @item
3593 @cindex string token
3594 @cindex literal string token
3595 @cindex multicharacter literal
3596 A @dfn{literal string token} is written like a C string constant; for
3597 example, @code{"<="} is a literal string token.  A literal string token
3598 doesn't need to be declared unless you need to specify its semantic
3599 value data type (@pxref{Value Type}), associativity, or precedence
3600 (@pxref{Precedence}).
3601
3602 You can associate the literal string token with a symbolic name as an alias,
3603 using the @code{%token} declaration (@pxref{Token Decl}).  If you don't do
3604 that, the lexical analyzer has to retrieve the token code for the literal
3605 string token from the @code{yytname} table (@pxref{Calling Convention}).
3606
3607 @strong{Warning}: literal string tokens do not work in Yacc.
3608
3609 By convention, a literal string token is used only to represent a token
3610 that consists of that particular string.  Thus, you should use the token
3611 kind @code{"<="} to represent the string @samp{<=} as a token.  Bison
3612 does not enforce this convention, but if you depart from it, people who
3613 read your program will be confused.
3614
3615 All the escape sequences used in string literals in C can be used in
3616 Bison as well, except that you must not use a null character within a
3617 string literal.  Also, unlike Standard C, trigraphs have no special
3618 meaning in Bison string literals, nor is backslash-newline allowed.  A
3619 literal string token must contain two or more characters; for a token
3620 containing just one character, use a character token (see above).
3621 @end itemize
3622
3623 How you choose to write a terminal symbol has no effect on its
3624 grammatical meaning.  That depends only on where it appears in rules and
3625 on when the parser function returns that symbol.
3626
3627 The value returned by @code{yylex} is always one of the terminal
3628 symbols, except that a zero or negative value signifies end-of-input.
3629 Whichever way you write the token kind in the grammar rules, you write
3630 it the same way in the definition of @code{yylex}.  The numeric code
3631 for a character token kind is simply the positive numeric code of the
3632 character, so @code{yylex} can use the identical value to generate the
3633 requisite code, though you may need to convert it to @code{unsigned
3634 char} to avoid sign-extension on hosts where @code{char} is signed.
3635 Each named token kind becomes a C macro in the parser implementation
3636 file, so @code{yylex} can use the name to stand for the code.  (This
3637 is why periods don't make sense in terminal symbols.)  @xref{Calling
3638 Convention}.
3639
3640 If @code{yylex} is defined in a separate file, you need to arrange for the
3641 token-kind definitions to be available there.  Use the @option{-d} option
3642 when you run Bison, so that it will write these definitions into a separate
3643 header file @file{@var{name}.tab.h} which you can include in the other
3644 source files that need it.  @xref{Invocation}.
3645
3646 If you want to write a grammar that is portable to any Standard C
3647 host, you must use only nonnull character tokens taken from the basic
3648 execution character set of Standard C@.  This set consists of the ten
3649 digits, the 52 lower- and upper-case English letters, and the
3650 characters in the following C-language string:
3651
3652 @example
3653 "\a\b\t\n\v\f\r !\"#%&'()*+,-./:;<=>?[\\]^_@{|@}~"
3654 @end example
3655
3656 The @code{yylex} function and Bison must use a consistent character set
3657 and encoding for character tokens.  For example, if you run Bison in an
3658 ASCII environment, but then compile and run the resulting
3659 program in an environment that uses an incompatible character set like
3660 EBCDIC, the resulting program may not work because the tables
3661 generated by Bison will assume ASCII numeric values for
3662 character tokens.  It is standard practice for software distributions to
3663 contain C source files that were generated by Bison in an
3664 ASCII environment, so installers on platforms that are
3665 incompatible with ASCII must rebuild those files before
3666 compiling them.
3667
3668 The symbol @code{error} is a terminal symbol reserved for error recovery
3669 (@pxref{Error Recovery}); you shouldn't use it for any other purpose.
3670 In particular, @code{yylex} should never return this value.  The default
3671 value of the error token is 256, unless you explicitly assigned 256 to
3672 one of your tokens with a @code{%token} declaration.
3673
3674 @node Rules
3675 @section Grammar Rules
3676
3677 A Bison grammar is a list of rules.
3678
3679 @menu
3680 * Rules Syntax::   Syntax of the rules.
3681 * Empty Rules::    Symbols that can match the empty string.
3682 * Recursion::      Writing recursive rules.
3683 @end menu
3684
3685 @node Rules Syntax
3686 @subsection Syntax of Grammar Rules
3687 @cindex rule syntax
3688 @cindex grammar rule syntax
3689 @cindex syntax of grammar rules
3690
3691 A Bison grammar rule has the following general form:
3692
3693 @example
3694 @var{result}: @var{components}@dots{};
3695 @end example
3696
3697 @noindent
3698 where @var{result} is the nonterminal symbol that this rule describes,
3699 and @var{components} are various terminal and nonterminal symbols that
3700 are put together by this rule (@pxref{Symbols}).
3701
3702 For example,
3703
3704 @example
3705 exp: exp '+' exp;
3706 @end example
3707
3708 @noindent
3709 says that two groupings of type @code{exp}, with a @samp{+} token in between,
3710 can be combined into a larger grouping of type @code{exp}.
3711
3712 White space in rules is significant only to separate symbols.  You can add
3713 extra white space as you wish.
3714
3715 Scattered among the components can be @var{actions} that determine
3716 the semantics of the rule.  An action looks like this:
3717
3718 @example
3719 @{@var{C statements}@}
3720 @end example
3721
3722 @noindent
3723 @cindex braced code
3724 This is an example of @dfn{braced code}, that is, C code surrounded by
3725 braces, much like a compound statement in C@.  Braced code can contain
3726 any sequence of C tokens, so long as its braces are balanced.  Bison
3727 does not check the braced code for correctness directly; it merely
3728 copies the code to the parser implementation file, where the C
3729 compiler can check it.
3730
3731 Within braced code, the balanced-brace count is not affected by braces
3732 within comments, string literals, or character constants, but it is
3733 affected by the C digraphs @samp{<%} and @samp{%>} that represent
3734 braces.  At the top level braced code must be terminated by @samp{@}}
3735 and not by a digraph.  Bison does not look for trigraphs, so if braced
3736 code uses trigraphs you should ensure that they do not affect the
3737 nesting of braces or the boundaries of comments, string literals, or
3738 character constants.
3739
3740 Usually there is only one action and it follows the components.
3741 @xref{Actions}.
3742
3743 @findex |
3744 Multiple rules for the same @var{result} can be written separately or can
3745 be joined with the vertical-bar character @samp{|} as follows:
3746
3747 @example
3748 @group
3749 @var{result}:
3750   @var{rule1-components}@dots{}
3751 | @var{rule2-components}@dots{}
3752 @dots{}
3753 ;
3754 @end group
3755 @end example
3756
3757 @noindent
3758 They are still considered distinct rules even when joined in this way.
3759
3760 @node Empty Rules
3761 @subsection Empty Rules
3762 @cindex empty rule
3763 @cindex rule, empty
3764 @findex %empty
3765
3766 A rule is said to be @dfn{empty} if its right-hand side (@var{components})
3767 is empty.  It means that @var{result} in the previous example can match the
3768 empty string.  As another example, here is how to define an optional
3769 semicolon:
3770
3771 @example
3772 semicolon.opt: | ";";
3773 @end example
3774
3775 @noindent
3776 It is easy not to see an empty rule, especially when @code{|} is used.  The
3777 @code{%empty} directive allows to make explicit that a rule is empty on
3778 purpose:
3779
3780 @example
3781 @group
3782 semicolon.opt:
3783   %empty
3784 | ";"
3785 ;
3786 @end group
3787 @end example
3788
3789 Flagging a non-empty rule with @code{%empty} is an error.  If run with
3790 @option{-Wempty-rule}, @command{bison} will report empty rules without
3791 @code{%empty}.  Using @code{%empty} enables this warning, unless
3792 @option{-Wno-empty-rule} was specified.
3793
3794 The @code{%empty} directive is a Bison extension, it does not work with
3795 Yacc.  To remain compatible with POSIX Yacc, it is customary to write a
3796 comment @samp{/* empty */} in each rule with no components:
3797
3798 @example
3799 @group
3800 semicolon.opt:
3801   /* empty */
3802 | ";"
3803 ;
3804 @end group
3805 @end example
3806
3807
3808 @node Recursion
3809 @subsection Recursive Rules
3810 @cindex recursive rule
3811 @cindex rule, recursive
3812
3813 A rule is called @dfn{recursive} when its @var{result} nonterminal
3814 appears also on its right hand side.  Nearly all Bison grammars need to
3815 use recursion, because that is the only way to define a sequence of any
3816 number of a particular thing.  Consider this recursive definition of a
3817 comma-separated sequence of one or more expressions:
3818
3819 @example
3820 @group
3821 expseq1:
3822   exp
3823 | expseq1 ',' exp
3824 ;
3825 @end group
3826 @end example
3827
3828 @cindex left recursion
3829 @cindex right recursion
3830 @noindent
3831 Since the recursive use of @code{expseq1} is the leftmost symbol in the
3832 right hand side, we call this @dfn{left recursion}.  By contrast, here
3833 the same construct is defined using @dfn{right recursion}:
3834
3835 @example
3836 @group
3837 expseq1:
3838   exp
3839 | exp ',' expseq1
3840 ;
3841 @end group
3842 @end example
3843
3844 @noindent
3845 Any kind of sequence can be defined using either left recursion or right
3846 recursion, but you should always use left recursion, because it can
3847 parse a sequence of any number of elements with bounded stack space.
3848 Right recursion uses up space on the Bison stack in proportion to the
3849 number of elements in the sequence, because all the elements must be
3850 shifted onto the stack before the rule can be applied even once.
3851 @xref{Algorithm}, for further explanation
3852 of this.
3853
3854 @cindex mutual recursion
3855 @dfn{Indirect} or @dfn{mutual} recursion occurs when the result of the
3856 rule does not appear directly on its right hand side, but does appear
3857 in rules for other nonterminals which do appear on its right hand
3858 side.
3859
3860 For example:
3861
3862 @example
3863 @group
3864 expr:
3865   primary
3866 | primary '+' primary
3867 ;
3868 @end group
3869
3870 @group
3871 primary:
3872   constant
3873 | '(' expr ')'
3874 ;
3875 @end group
3876 @end example
3877
3878 @noindent
3879 defines two mutually-recursive nonterminals, since each refers to the
3880 other.
3881
3882 @node Semantics
3883 @section Defining Language Semantics
3884 @cindex defining language semantics
3885 @cindex language semantics, defining
3886
3887 The grammar rules for a language determine only the syntax.  The semantics
3888 are determined by the semantic values associated with various tokens and
3889 groupings, and by the actions taken when various groupings are recognized.
3890
3891 For example, the calculator calculates properly because the value
3892 associated with each expression is the proper number; it adds properly
3893 because the action for the grouping @w{@samp{@var{x} + @var{y}}} is to add
3894 the numbers associated with @var{x} and @var{y}.
3895
3896 @menu
3897 * Value Type::        Specifying one data type for all semantic values.
3898 * Multiple Types::    Specifying several alternative data types.
3899 * Type Generation::   Generating the semantic value type.
3900 * Union Decl::        Declaring the set of all semantic value types.
3901 * Structured Value Type::  Providing a structured semantic value type.
3902 * Actions::           An action is the semantic definition of a grammar rule.
3903 * Action Types::      Specifying data types for actions to operate on.
3904 * Midrule Actions::   Most actions go at the end of a rule.
3905                       This says when, why and how to use the exceptional
3906                         action in the middle of a rule.
3907 @end menu
3908
3909 @node Value Type
3910 @subsection Data Types of Semantic Values
3911 @cindex semantic value type
3912 @cindex value type, semantic
3913 @cindex data types of semantic values
3914 @cindex default data type
3915
3916 In a simple program it may be sufficient to use the same data type for
3917 the semantic values of all language constructs.  This was true in the
3918 RPN and infix calculator examples (@pxref{RPN Calc}).
3919
3920 Bison normally uses the type @code{int} for semantic values if your program
3921 uses the same data type for all language constructs.  To specify some other
3922 type, define the @code{%define} variable @code{api.value.type} like this:
3923
3924 @example
3925 %define api.value.type @{double@}
3926 @end example
3927
3928 @noindent
3929 or
3930
3931 @example
3932 %define api.value.type @{struct semantic_value_type@}
3933 @end example
3934
3935 The value of @code{api.value.type} should be a type name that does not
3936 contain parentheses or square brackets.
3937
3938 Alternatively in C, instead of relying of Bison's @code{%define} support,
3939 you may rely on the C preprocessor and define @code{YYSTYPE} as a macro:
3940
3941 @example
3942 #define YYSTYPE double
3943 @end example
3944
3945 @noindent
3946 This macro definition must go in the prologue of the grammar file
3947 (@pxref{Grammar Outline}).  If compatibility with POSIX Yacc matters to you,
3948 use this.  Note however that Bison cannot know @code{YYSTYPE}'s value, not
3949 even whether it is defined, so there are services it cannot provide.
3950 Besides this works only for C.
3951
3952 @node Multiple Types
3953 @subsection More Than One Value Type
3954
3955 In most programs, you will need different data types for different kinds
3956 of tokens and groupings.  For example, a numeric constant may need type
3957 @code{int} or @code{long}, while a string constant needs type
3958 @code{char *}, and an identifier might need a pointer to an entry in the
3959 symbol table.
3960
3961 To use more than one data type for semantic values in one parser, Bison
3962 requires you to do two things:
3963
3964 @itemize @bullet
3965 @item
3966 Specify the entire collection of possible data types.  There are several
3967 options:
3968 @itemize @bullet
3969 @item
3970 let Bison compute the union type from the tags you assign to symbols;
3971
3972 @item
3973 use the @code{%union} Bison declaration (@pxref{Union Decl});
3974
3975 @item
3976 define the @code{%define} variable @code{api.value.type} to be a union type
3977 whose members are the type tags (@pxref{Structured Value Type});
3978
3979 @item
3980 use a @code{typedef} or a @code{#define} to define @code{YYSTYPE} to be a
3981 union type whose member names are the type tags.
3982 @end itemize
3983
3984 @item
3985 Choose one of those types for each symbol (terminal or nonterminal) for
3986 which semantic values are used.  This is done for tokens with the
3987 @code{%token} Bison declaration (@pxref{Token Decl}) and
3988 for groupings with the @code{%nterm}/@code{%type} Bison declarations
3989 (@pxref{Type Decl}).
3990 @end itemize
3991
3992 @node Type Generation
3993 @subsection Generating the Semantic Value Type
3994 @cindex declaring value types
3995 @cindex value types, declaring
3996 @findex %define api.value.type union
3997
3998 The special value @code{union} of the @code{%define} variable
3999 @code{api.value.type} instructs Bison that the type tags (used with the
4000 @code{%token}, @code{%nterm} and @code{%type} directives) are genuine types,
4001 not names of members of @code{YYSTYPE}.
4002
4003 For example:
4004
4005 @example
4006 %define api.value.type union
4007 %token <int> INT "integer"
4008 %token <int> 'n'
4009 %nterm <int> expr
4010 %token <char const *> ID "identifier"
4011 @end example
4012
4013 @noindent
4014 generates an appropriate value of @code{YYSTYPE} to support each symbol
4015 type.  The name of the member of @code{YYSTYPE} for tokens than have a
4016 declared identifier @var{id} (such as @code{INT} and @code{ID} above, but
4017 not @code{'n'}) is @code{@var{id}}.  The other symbols have unspecified
4018 names on which you should not depend; instead, relying on C casts to access
4019 the semantic value with the appropriate type:
4020
4021 @example
4022 /* For an "integer". */
4023 yylval.INT = 42;
4024 return INT;
4025
4026 /* For an 'n', also declared as int. */
4027 *((int*)&yylval) = 42;
4028 return 'n';
4029
4030 /* For an "identifier". */
4031 yylval.ID = "42";
4032 return ID;
4033 @end example
4034
4035 If the @code{%define} variable @code{api.token.prefix} is defined
4036 (@pxref{%define Summary}), then it is also used to prefix
4037 the union member names.  For instance, with @samp{%define api.token.prefix
4038 @{TOK_@}}:
4039
4040 @example
4041 /* For an "integer". */
4042 yylval.TOK_INT = 42;
4043 return TOK_INT;
4044 @end example
4045
4046 This Bison extension cannot work if @code{%yacc} (or
4047 @option{-y}/@option{--yacc}) is enabled, as POSIX mandates that Yacc
4048 generate tokens as macros (e.g., @samp{#define INT 258}, or @samp{#define
4049 TOK_INT 258}).
4050
4051 A similar feature is provided for C++ that in addition overcomes C++
4052 limitations (that forbid non-trivial objects to be part of a @code{union}):
4053 @samp{%define api.value.type variant}, see @ref{C++ Variants}.
4054
4055 @node Union Decl
4056 @subsection The Union Declaration
4057 @cindex declaring value types
4058 @cindex value types, declaring
4059 @findex %union
4060
4061 The @code{%union} declaration specifies the entire collection of possible
4062 data types for semantic values.  The keyword @code{%union} is followed by
4063 braced code containing the same thing that goes inside a @code{union} in C@.
4064
4065 For example:
4066
4067 @example
4068 @group
4069 %union @{
4070   double val;
4071   symrec *tptr;
4072 @}
4073 @end group
4074 @end example
4075
4076 @noindent
4077 This says that the two alternative types are @code{double} and @code{symrec
4078 *}.  They are given names @code{val} and @code{tptr}; these names are used
4079 in the @code{%token}, @code{%nterm} and @code{%type} declarations to pick
4080 one of the types for a terminal or nonterminal symbol (@pxref{Type Decl}).
4081
4082 As an extension to POSIX, a tag is allowed after the @code{%union}.  For
4083 example:
4084
4085 @example
4086 @group
4087 %union value @{
4088   double val;
4089   symrec *tptr;
4090 @}
4091 @end group
4092 @end example
4093
4094 @noindent
4095 specifies the union tag @code{value}, so the corresponding C type is
4096 @code{union value}.  If you do not specify a tag, it defaults to
4097 @code{YYSTYPE} (@pxref{%define Summary}).
4098
4099 As another extension to POSIX, you may specify multiple @code{%union}
4100 declarations; their contents are concatenated.  However, only the first
4101 @code{%union} declaration can specify a tag.
4102
4103 Note that, unlike making a @code{union} declaration in C, you need not write
4104 a semicolon after the closing brace.
4105
4106 @node Structured Value Type
4107 @subsection Providing a Structured Semantic Value Type
4108 @cindex declaring value types
4109 @cindex value types, declaring
4110 @findex %union
4111
4112 Instead of @code{%union}, you can define and use your own union type
4113 @code{YYSTYPE} if your grammar contains at least one @samp{<@var{type}>}
4114 tag.  For example, you can put the following into a header file
4115 @file{parser.h}:
4116
4117 @example
4118 @group
4119 union YYSTYPE @{
4120   double val;
4121   symrec *tptr;
4122 @};
4123 @end group
4124 @end example
4125
4126 @noindent
4127 and then your grammar can use the following instead of @code{%union}:
4128
4129 @example
4130 @group
4131 %@{
4132 #include "parser.h"
4133 %@}
4134 %define api.value.type @{union YYSTYPE@}
4135 %nterm <val> expr
4136 %token <tptr> ID
4137 @end group
4138 @end example
4139
4140 Actually, you may also provide a @code{struct} rather that a @code{union},
4141 which may be handy if you want to track information for every symbol (such
4142 as preceding comments).
4143
4144 The type you provide may even be structured and include pointers, in which
4145 case the type tags you provide may be composite, with @samp{.} and @samp{->}
4146 operators.
4147
4148 @node Actions
4149 @subsection Actions
4150 @cindex action
4151 @vindex $$
4152 @vindex $@var{n}
4153 @vindex $@var{name}
4154 @vindex $[@var{name}]
4155
4156 An action accompanies a syntactic rule and contains C code to be executed
4157 each time an instance of that rule is recognized.  The task of most actions
4158 is to compute a semantic value for the grouping built by the rule from the
4159 semantic values associated with tokens or smaller groupings.
4160
4161 An action consists of braced code containing C statements, and can be
4162 placed at any position in the rule;
4163 it is executed at that position.  Most rules have just one action at the
4164 end of the rule, following all the components.  Actions in the middle of
4165 a rule are tricky and used only for special purposes (@pxref{Midrule
4166 Actions}).
4167
4168 The C code in an action can refer to the semantic values of the
4169 components matched by the rule with the construct @code{$@var{n}},
4170 which stands for the value of the @var{n}th component.  The semantic
4171 value for the grouping being constructed is @code{$$}.  In addition,
4172 the semantic values of symbols can be accessed with the named
4173 references construct @code{$@var{name}} or @code{$[@var{name}]}.
4174 Bison translates both of these constructs into expressions of the
4175 appropriate type when it copies the actions into the parser
4176 implementation file.  @code{$$} (or @code{$@var{name}}, when it stands
4177 for the current grouping) is translated to a modifiable lvalue, so it
4178 can be assigned to.
4179
4180 Here is a typical example:
4181
4182 @example
4183 @group
4184 exp:
4185 @dots{}
4186 | exp '+' exp     @{ $$ = $1 + $3; @}
4187 @end group
4188 @end example
4189
4190 Or, in terms of named references:
4191
4192 @example
4193 @group
4194 exp[result]:
4195 @dots{}
4196 | exp[left] '+' exp[right]  @{ $result = $left + $right; @}
4197 @end group
4198 @end example
4199
4200 @noindent
4201 This rule constructs an @code{exp} from two smaller @code{exp} groupings
4202 connected by a plus-sign token.  In the action, @code{$1} and @code{$3}
4203 (@code{$left} and @code{$right})
4204 refer to the semantic values of the two component @code{exp} groupings,
4205 which are the first and third symbols on the right hand side of the rule.
4206 The sum is stored into @code{$$} (@code{$result}) so that it becomes the
4207 semantic value of
4208 the addition-expression just recognized by the rule.  If there were a
4209 useful semantic value associated with the @samp{+} token, it could be
4210 referred to as @code{$2}.
4211
4212 @xref{Named References}, for more information about using the named
4213 references construct.
4214
4215 Note that the vertical-bar character @samp{|} is really a rule
4216 separator, and actions are attached to a single rule.  This is a
4217 difference with tools like Flex, for which @samp{|} stands for either
4218 ``or'', or ``the same action as that of the next rule''.  In the
4219 following example, the action is triggered only when @samp{b} is found:
4220
4221 @example
4222 a-or-b: 'a'|'b'   @{ a_or_b_found = 1; @};
4223 @end example
4224
4225 @cindex default action
4226 If you don't specify an action for a rule, Bison supplies a default:
4227 @w{@code{$$ = $1}.}  Thus, the value of the first symbol in the rule
4228 becomes the value of the whole rule.  Of course, the default action is
4229 valid only if the two data types match.  There is no meaningful default
4230 action for an empty rule; every empty rule must have an explicit action
4231 unless the rule's value does not matter.
4232
4233 @code{$@var{n}} with @var{n} zero or negative is allowed for reference
4234 to tokens and groupings on the stack @emph{before} those that match the
4235 current rule.  This is a very risky practice, and to use it reliably
4236 you must be certain of the context in which the rule is applied.  Here
4237 is a case in which you can use this reliably:
4238
4239 @example
4240 @group
4241 foo:
4242   expr bar '+' expr  @{ @dots{} @}
4243 | expr bar '-' expr  @{ @dots{} @}
4244 ;
4245 @end group
4246
4247 @group
4248 bar:
4249   %empty    @{ previous_expr = $0; @}
4250 ;
4251 @end group
4252 @end example
4253
4254 As long as @code{bar} is used only in the fashion shown here, @code{$0}
4255 always refers to the @code{expr} which precedes @code{bar} in the
4256 definition of @code{foo}.
4257
4258 @vindex yylval
4259 It is also possible to access the semantic value of the lookahead token, if
4260 any, from a semantic action.
4261 This semantic value is stored in @code{yylval}.
4262 @xref{Action Features}.
4263
4264 @node Action Types
4265 @subsection Data Types of Values in Actions
4266 @cindex action data types
4267 @cindex data types in actions
4268
4269 If you have chosen a single data type for semantic values, the @code{$$}
4270 and @code{$@var{n}} constructs always have that data type.
4271
4272 If you have used @code{%union} to specify a variety of data types, then you
4273 must declare a choice among these types for each terminal or nonterminal
4274 symbol that can have a semantic value.  Then each time you use @code{$$} or
4275 @code{$@var{n}}, its data type is determined by which symbol it refers to
4276 in the rule.  In this example,
4277
4278 @example
4279 @group
4280 exp:
4281   @dots{}
4282 | exp '+' exp    @{ $$ = $1 + $3; @}
4283 @end group
4284 @end example
4285
4286 @noindent
4287 @code{$1} and @code{$3} refer to instances of @code{exp}, so they all
4288 have the data type declared for the nonterminal symbol @code{exp}.  If
4289 @code{$2} were used, it would have the data type declared for the
4290 terminal symbol @code{'+'}, whatever that might be.
4291
4292 Alternatively, you can specify the data type when you refer to the value,
4293 by inserting @samp{<@var{type}>} after the @samp{$} at the beginning of the
4294 reference.  For example, if you have defined types as shown here:
4295
4296 @example
4297 @group
4298 %union @{
4299   int itype;
4300   double dtype;
4301 @}
4302 @end group
4303 @end example
4304
4305 @noindent
4306 then you can write @code{$<itype>1} to refer to the first subunit of the
4307 rule as an integer, or @code{$<dtype>1} to refer to it as a double.
4308
4309 @node Midrule Actions
4310 @subsection Actions in Midrule
4311 @cindex actions in midrule
4312 @cindex midrule actions
4313
4314 Occasionally it is useful to put an action in the middle of a rule.
4315 These actions are written just like usual end-of-rule actions, but they
4316 are executed before the parser even recognizes the following components.
4317
4318 @menu
4319 * Using Midrule Actions::       Putting an action in the middle of a rule.
4320 * Typed Midrule Actions::       Specifying the semantic type of their values.
4321 * Midrule Action Translation::  How midrule actions are actually processed.
4322 * Midrule Conflicts::           Midrule actions can cause conflicts.
4323 @end menu
4324
4325 @node Using Midrule Actions
4326 @subsubsection Using Midrule Actions
4327
4328 A midrule action may refer to the components preceding it using
4329 @code{$@var{n}}, but it may not refer to subsequent components because
4330 it is run before they are parsed.
4331
4332 The midrule action itself counts as one of the components of the rule.
4333 This makes a difference when there is another action later in the same rule
4334 (and usually there is another at the end): you have to count the actions
4335 along with the symbols when working out which number @var{n} to use in
4336 @code{$@var{n}}.
4337
4338 The midrule action can also have a semantic value.  The action can set
4339 its value with an assignment to @code{$$}, and actions later in the rule
4340 can refer to the value using @code{$@var{n}}.  Since there is no symbol
4341 to name the action, there is no way to declare a data type for the value
4342 in advance, so you must use the @samp{$<@dots{}>@var{n}} construct to
4343 specify a data type each time you refer to this value.
4344
4345 There is no way to set the value of the entire rule with a midrule
4346 action, because assignments to @code{$$} do not have that effect.  The
4347 only way to set the value for the entire rule is with an ordinary action
4348 at the end of the rule.
4349
4350 Here is an example from a hypothetical compiler, handling a @code{let}
4351 statement that looks like @samp{let (@var{variable}) @var{statement}} and
4352 serves to create a variable named @var{variable} temporarily for the
4353 duration of @var{statement}.  To parse this construct, we must put
4354 @var{variable} into the symbol table while @var{statement} is parsed, then
4355 remove it afterward.  Here is how it is done:
4356
4357 @example
4358 @group
4359 stmt:
4360   "let" '(' var ')'
4361     @{
4362       $<context>$ = push_context ();
4363       declare_variable ($3);
4364     @}
4365   stmt
4366     @{
4367       $$ = $6;
4368       pop_context ($<context>5);
4369     @}
4370 @end group
4371 @end example
4372
4373 @noindent
4374 As soon as @samp{let (@var{variable})} has been recognized, the first
4375 action is run.  It saves a copy of the current semantic context (the
4376 list of accessible variables) as its semantic value, using alternative
4377 @code{context} in the data-type union.  Then it calls
4378 @code{declare_variable} to add the new variable to that list.  Once the
4379 first action is finished, the embedded statement @code{stmt} can be
4380 parsed.
4381
4382 Note that the midrule action is component number 5, so the @samp{stmt} is
4383 component number 6.  Named references can be used to improve the readability
4384 and maintainability (@pxref{Named References}):
4385
4386 @example
4387 @group
4388 stmt:
4389   "let" '(' var ')'
4390     @{
4391       $<context>let = push_context ();
4392       declare_variable ($3);
4393     @}[let]
4394   stmt
4395     @{
4396       $$ = $6;
4397       pop_context ($<context>let);
4398     @}
4399 @end group
4400 @end example
4401
4402 After the embedded statement is parsed, its semantic value becomes the
4403 value of the entire @code{let}-statement.  Then the semantic value from the
4404 earlier action is used to restore the prior list of variables.  This
4405 removes the temporary @code{let}-variable from the list so that it won't
4406 appear to exist while the rest of the program is parsed.
4407
4408 Because the types of the semantic values of midrule actions are unknown to
4409 Bison, type-based features (e.g., @samp{%printer}, @samp{%destructor}) do
4410 not work, which could result in memory leaks.  They also forbid the use of
4411 the @code{variant} implementation of the @code{api.value.type} in C++
4412 (@pxref{C++ Variants}).
4413
4414 @xref{Typed Midrule Actions}, for one way to address this issue, and
4415 @ref{Midrule Action Translation}, for another: turning mid-action actions
4416 into regular actions.
4417
4418
4419 @node Typed Midrule Actions
4420 @subsubsection Typed Midrule Actions
4421
4422 @findex %destructor
4423 @cindex discarded symbols, midrule actions
4424 @cindex error recovery, midrule actions
4425 In the above example, if the parser initiates error recovery (@pxref{Error
4426 Recovery}) while parsing the tokens in the embedded statement @code{stmt},
4427 it might discard the previous semantic context @code{$<context>5} without
4428 restoring it.  Thus, @code{$<context>5} needs a destructor
4429 (@pxref{Destructor Decl}), and Bison needs the
4430 type of the semantic value (@code{context}) to select the right destructor.
4431
4432 As an extension to Yacc's midrule actions, Bison offers a means to type
4433 their semantic value: specify its type tag (@samp{<...>} before the midrule
4434 action.
4435
4436 Consider the previous example, with an untyped midrule action:
4437
4438 @example
4439 @group
4440 stmt:
4441   "let" '(' var ')'
4442     @{
4443       $<context>$ = push_context (); // ***
4444       declare_variable ($3);
4445     @}
4446   stmt
4447     @{
4448       $$ = $6;
4449       pop_context ($<context>5);     // ***
4450     @}
4451 @end group
4452 @end example
4453
4454 @noindent
4455 If instead you write:
4456
4457 @example
4458 @group
4459 stmt:
4460   "let" '(' var ')'
4461     <context>@{                       // ***
4462       $$ = push_context ();          // ***
4463       declare_variable ($3);
4464     @}
4465   stmt
4466     @{
4467       $$ = $6;
4468       pop_context ($5);              // ***
4469     @}
4470 @end group
4471 @end example
4472
4473 @noindent
4474 then @code{%printer} and @code{%destructor} work properly (no more leaks!),
4475 C++ @code{variant}s can be used, and redundancy is reduced (@code{<context>}
4476 is specified once).
4477
4478
4479 @node Midrule Action Translation
4480 @subsubsection Midrule Action Translation
4481 @vindex $@@@var{n}
4482 @vindex @@@var{n}
4483
4484 Midrule actions are actually transformed into regular rules and actions.
4485 The various reports generated by Bison (textual, graphical, etc., see
4486 @ref{Understanding}) reveal this translation,
4487 best explained by means of an example.  The following rule:
4488
4489 @example
4490 exp: @{ a(); @} "b" @{ c(); @} @{ d(); @} "e" @{ f(); @};
4491 @end example
4492
4493 @noindent
4494 is translated into:
4495
4496 @example
4497 $@@1: %empty @{ a(); @};
4498 $@@2: %empty @{ c(); @};
4499 $@@3: %empty @{ d(); @};
4500 exp: $@@1 "b" $@@2 $@@3 "e" @{ f(); @};
4501 @end example
4502
4503 @noindent
4504 with new nonterminal symbols @code{$@@@var{n}}, where @var{n} is a number.
4505
4506 A midrule action is expected to generate a value if it uses @code{$$}, or
4507 the (final) action uses @code{$@var{n}} where @var{n} denote the midrule
4508 action.  In that case its nonterminal is rather named @code{@@@var{n}}:
4509
4510 @example
4511 exp: @{ a(); @} "b" @{ $$ = c(); @} @{ d(); @} "e" @{ f = $1; @};
4512 @end example
4513
4514 @noindent
4515 is translated into
4516
4517 @example
4518 @@1: %empty @{ a(); @};
4519 @@2: %empty @{ $$ = c(); @};
4520 $@@3: %empty @{ d(); @};
4521 exp: @@1 "b" @@2 $@@3 "e" @{ f = $1; @}
4522 @end example
4523
4524 There are probably two errors in the above example: the first midrule action
4525 does not generate a value (it does not use @code{$$} although the final
4526 action uses it), and the value of the second one is not used (the final
4527 action does not use @code{$3}).  Bison reports these errors when the
4528 @code{midrule-value} warnings are enabled (@pxref{Invocation}):
4529
4530 @example
4531 $ @kbd{bison -Wmidrule-value mid.y}
4532 @group
4533 mid.y:2.6-13: @dwarning{warning}: unset value: $$
4534     2 | exp: @dwarning{@{ a(); @}} "b" @{ $$ = c(); @} @{ d(); @} "e" @{ f = $1; @};
4535       |      @dwarning{^~~~~~~~}
4536 @end group
4537 @group
4538 mid.y:2.19-31: @dwarning{warning}: unused value: $3
4539     2 | exp: @{ a(); @} "b" @dwarning{@{ $$ = c(); @}} @{ d(); @} "e" @{ f = $1; @};
4540       |                   @dwarning{^~~~~~~~~~~~~}
4541 @end group
4542 @end example
4543
4544 @sp 1
4545
4546 It is sometimes useful to turn midrule actions into regular actions, e.g.,
4547 to factor them, or to escape from their limitations.  For instance, as an
4548 alternative to @emph{typed} midrule action, you may bury the midrule action
4549 inside a nonterminal symbol and to declare a printer and a destructor for
4550 that symbol:
4551
4552 @example
4553 @group
4554 %nterm <context> let
4555 %destructor @{ pop_context ($$); @} let
4556 %printer @{ print_context (yyo, $$); @} let
4557 @end group
4558
4559 %%
4560
4561 @group
4562 stmt:
4563   let stmt
4564     @{
4565       $$ = $2;
4566       pop_context ($let);
4567     @};
4568 @end group
4569
4570 @group
4571 let:
4572   "let" '(' var ')'
4573     @{
4574       $let = push_context ();
4575       declare_variable ($var);
4576     @};
4577
4578 @end group
4579 @end example
4580
4581
4582
4583
4584 @node Midrule Conflicts
4585 @subsubsection Conflicts due to Midrule Actions
4586 Taking action before a rule is completely recognized often leads to
4587 conflicts since the parser must commit to a parse in order to execute the
4588 action.  For example, the following two rules, without midrule actions,
4589 can coexist in a working parser because the parser can shift the open-brace
4590 token and look at what follows before deciding whether there is a
4591 declaration or not:
4592
4593 @example
4594 @group
4595 compound:
4596   '@{' declarations statements '@}'
4597 | '@{' statements '@}'
4598 ;
4599 @end group
4600 @end example
4601
4602 @noindent
4603 But when we add a midrule action as follows, the rules become nonfunctional:
4604
4605 @example
4606 @group
4607 compound:
4608   @{ prepare_for_local_variables (); @}
4609      '@{' declarations statements '@}'
4610 @end group
4611 @group
4612 |    '@{' statements '@}'
4613 ;
4614 @end group
4615 @end example
4616
4617 @noindent
4618 Now the parser is forced to decide whether to run the midrule action
4619 when it has read no farther than the open-brace.  In other words, it
4620 must commit to using one rule or the other, without sufficient
4621 information to do it correctly.  (The open-brace token is what is called
4622 the @dfn{lookahead} token at this time, since the parser is still
4623 deciding what to do about it.  @xref{Lookahead}.)
4624
4625 You might think that you could correct the problem by putting identical
4626 actions into the two rules, like this:
4627
4628 @example
4629 @group
4630 compound:
4631   @{ prepare_for_local_variables (); @}
4632     '@{' declarations statements '@}'
4633 | @{ prepare_for_local_variables (); @}
4634     '@{' statements '@}'
4635 ;
4636 @end group
4637 @end example
4638
4639 @noindent
4640 But this does not help, because Bison does not realize that the two actions
4641 are identical.  (Bison never tries to understand the C code in an action.)
4642
4643 If the grammar is such that a declaration can be distinguished from a
4644 statement by the first token (which is true in C), then one solution which
4645 does work is to put the action after the open-brace, like this:
4646
4647 @example
4648 @group
4649 compound:
4650   '@{' @{ prepare_for_local_variables (); @}
4651     declarations statements '@}'
4652 | '@{' statements '@}'
4653 ;
4654 @end group
4655 @end example
4656
4657 @noindent
4658 Now the first token of the following declaration or statement,
4659 which would in any case tell Bison which rule to use, can still do so.
4660
4661 Another solution is to bury the action inside a nonterminal symbol which
4662 serves as a subroutine:
4663
4664 @example
4665 @group
4666 subroutine:
4667   %empty  @{ prepare_for_local_variables (); @}
4668 ;
4669 @end group
4670
4671 @group
4672 compound:
4673   subroutine '@{' declarations statements '@}'
4674 | subroutine '@{' statements '@}'
4675 ;
4676 @end group
4677 @end example
4678
4679 @noindent
4680 Now Bison can execute the action in the rule for @code{subroutine} without
4681 deciding which rule for @code{compound} it will eventually use.
4682
4683
4684 @node Tracking Locations
4685 @section Tracking Locations
4686 @cindex location
4687 @cindex textual location
4688 @cindex location, textual
4689
4690 Though grammar rules and semantic actions are enough to write a fully
4691 functional parser, it can be useful to process some additional information,
4692 especially symbol locations.
4693
4694 The way locations are handled is defined by providing a data type, and
4695 actions to take when rules are matched.
4696
4697 @menu
4698 * Location Type::               Specifying a data type for locations.
4699 * Actions and Locations::       Using locations in actions.
4700 * Printing Locations::          Defining how locations are printed.
4701 * Location Default Action::     Defining a general way to compute locations.
4702 @end menu
4703
4704 @node Location Type
4705 @subsection Data Type of Locations
4706 @cindex data type of locations
4707 @cindex default location type
4708
4709 Defining a data type for locations is much simpler than for semantic values,
4710 since all tokens and groupings always use the same type.  The location type
4711 is specified using @samp{%define api.location.type}:
4712
4713 @example
4714 %define api.location.type @{location_t@}
4715 @end example
4716
4717 This defines, in the C generated code, the @code{YYLTYPE} type name.  When
4718 @code{YYLTYPE} is not defined, Bison uses a default structure type with four
4719 members:
4720
4721 @example
4722 typedef struct YYLTYPE
4723 @{
4724   int first_line;
4725   int first_column;
4726   int last_line;
4727   int last_column;
4728 @} YYLTYPE;
4729 @end example
4730
4731 In C, you may also specify the type of locations by defining a macro called
4732 @code{YYLTYPE}, just as you can specify the semantic value type by defining
4733 a @code{YYSTYPE} macro (@pxref{Value Type}).  However, rather than using
4734 macros, we recommend the @code{api.value.type} and @code{api.location.type}
4735 @code{%define} variables.
4736
4737 Default locations represent a range in the source file(s), but this is not a
4738 requirement.  It could be a single point or just a line number, or even more
4739 complex structures.
4740
4741 When the default location type is used, Bison initializes all these fields
4742 to 1 for @code{yylloc} at the beginning of the parsing.  To initialize
4743 @code{yylloc} with a custom location type (or to chose a different
4744 initialization), use the @code{%initial-action} directive.  @xref{Initial
4745 Action Decl}.
4746
4747
4748 @node Actions and Locations
4749 @subsection Actions and Locations
4750 @cindex location actions
4751 @cindex actions, location
4752 @vindex @@$
4753 @vindex @@@var{n}
4754 @vindex @@@var{name}
4755 @vindex @@[@var{name}]
4756
4757 Actions are not only useful for defining language semantics, but also for
4758 describing the behavior of the output parser with locations.
4759
4760 The most obvious way for building locations of syntactic groupings is very
4761 similar to the way semantic values are computed.  In a given rule, several
4762 constructs can be used to access the locations of the elements being matched.
4763 The location of the @var{n}th component of the right hand side is
4764 @code{@@@var{n}}, while the location of the left hand side grouping is
4765 @code{@@$}.
4766
4767 In addition, the named references construct @code{@@@var{name}} and
4768 @code{@@[@var{name}]} may also be used to address the symbol locations.
4769 @xref{Named References}, for more information about using the named
4770 references construct.
4771
4772 Here is a basic example using the default data type for locations:
4773
4774 @example
4775 @group
4776 exp:
4777   @dots{}
4778 | exp '/' exp
4779     @{
4780       @@$.first_column = @@1.first_column;
4781       @@$.first_line = @@1.first_line;
4782       @@$.last_column = @@3.last_column;
4783       @@$.last_line = @@3.last_line;
4784       if ($3)
4785         $$ = $1 / $3;
4786       else
4787         @{
4788           $$ = 1;
4789           fprintf (stderr, "%d.%d-%d.%d: division by zero",
4790                    @@3.first_line, @@3.first_column,
4791                    @@3.last_line, @@3.last_column);
4792         @}
4793     @}
4794 @end group
4795 @end example
4796
4797 As for semantic values, there is a default action for locations that is
4798 run each time a rule is matched.  It sets the beginning of @code{@@$} to the
4799 beginning of the first symbol, and the end of @code{@@$} to the end of the
4800 last symbol.
4801
4802 With this default action, the location tracking can be fully automatic.  The
4803 example above simply rewrites this way:
4804
4805 @example
4806 @group
4807 exp:
4808   @dots{}
4809 | exp '/' exp
4810     @{
4811       if ($3)
4812         $$ = $1 / $3;
4813       else
4814         @{
4815           $$ = 1;
4816           fprintf (stderr, "%d.%d-%d.%d: division by zero",
4817                    @@3.first_line, @@3.first_column,
4818                    @@3.last_line, @@3.last_column);
4819         @}
4820     @}
4821 @end group
4822 @end example
4823
4824 @vindex yylloc
4825 It is also possible to access the location of the lookahead token, if any,
4826 from a semantic action.
4827 This location is stored in @code{yylloc}.
4828 @xref{Action Features}.
4829
4830 @node Printing Locations
4831 @subsection Printing Locations
4832 @vindex YYLOCATION_PRINT
4833
4834 When using the default location type, the debug traces report the symbols'
4835 location.  The generated parser does so using the @code{YYLOCATION_PRINT}
4836 macro.
4837
4838 @deffn {Macro} YYLOCATION_PRINT (@var{file}, @var{loc})@code{;}
4839 When traces are enabled, print @var{loc} (of type @samp{YYLTYPE const *}) on
4840 @var{file} (of type @samp{FILE *}).  Do nothing when traces are disabled, or
4841 if the location type is user defined.
4842 @end deffn
4843
4844 To get locations in the debug traces with your user-defined location types,
4845 define the @code{YYLOCATION_PRINT} macro.  For instance:
4846
4847 @example
4848 #define YYLOCATION_PRINT   location_print
4849 @end example
4850
4851
4852
4853 @node Location Default Action
4854 @subsection Default Action for Locations
4855 @vindex YYLLOC_DEFAULT
4856 @cindex GLR parsers and @code{YYLLOC_DEFAULT}
4857
4858 Actually, actions are not the best place to compute locations.  Since
4859 locations are much more general than semantic values, there is room in
4860 the output parser to redefine the default action to take for each
4861 rule.  The @code{YYLLOC_DEFAULT} macro is invoked each time a rule is
4862 matched, before the associated action is run.  It is also invoked
4863 while processing a syntax error, to compute the error's location.
4864 Before reporting an unresolvable syntactic ambiguity, a GLR
4865 parser invokes @code{YYLLOC_DEFAULT} recursively to compute the location
4866 of that ambiguity.
4867
4868 Most of the time, this macro is general enough to suppress location
4869 dedicated code from semantic actions.
4870
4871 The @code{YYLLOC_DEFAULT} macro takes three parameters.  The first one is
4872 the location of the grouping (the result of the computation).  When a
4873 rule is matched, the second parameter identifies locations of
4874 all right hand side elements of the rule being matched, and the third
4875 parameter is the size of the rule's right hand side.
4876 When a GLR parser reports an ambiguity, which of multiple candidate
4877 right hand sides it passes to @code{YYLLOC_DEFAULT} is undefined.
4878 When processing a syntax error, the second parameter identifies locations
4879 of the symbols that were discarded during error processing, and the third
4880 parameter is the number of discarded symbols.
4881
4882 By default, @code{YYLLOC_DEFAULT} is defined this way:
4883
4884 @example
4885 @group
4886 # define YYLLOC_DEFAULT(Cur, Rhs, N)                      \
4887 do                                                        \
4888   if (N)                                                  \
4889     @{                                                     \
4890       (Cur).first_line   = YYRHSLOC(Rhs, 1).first_line;   \
4891       (Cur).first_column = YYRHSLOC(Rhs, 1).first_column; \
4892       (Cur).last_line    = YYRHSLOC(Rhs, N).last_line;    \
4893       (Cur).last_column  = YYRHSLOC(Rhs, N).last_column;  \
4894     @}                                                     \
4895   else                                                    \
4896     @{                                                     \
4897       (Cur).first_line   = (Cur).last_line   =            \
4898         YYRHSLOC(Rhs, 0).last_line;                       \
4899       (Cur).first_column = (Cur).last_column =            \
4900         YYRHSLOC(Rhs, 0).last_column;                     \
4901     @}                                                     \
4902 while (0)
4903 @end group
4904 @end example
4905
4906 @noindent
4907 where @code{YYRHSLOC (rhs, k)} is the location of the @var{k}th symbol
4908 in @var{rhs} when @var{k} is positive, and the location of the symbol
4909 just before the reduction when @var{k} and @var{n} are both zero.
4910
4911 When defining @code{YYLLOC_DEFAULT}, you should consider that:
4912
4913 @itemize @bullet
4914 @item
4915 All arguments are free of side-effects.  However, only the first one (the
4916 result) should be modified by @code{YYLLOC_DEFAULT}.
4917
4918 @item
4919 For consistency with semantic actions, valid indexes within the
4920 right hand side range from 1 to @var{n}.  When @var{n} is zero, only 0 is a
4921 valid index, and it refers to the symbol just before the reduction.
4922 During error processing @var{n} is always positive.
4923
4924 @item
4925 Your macro should parenthesize its arguments, if need be, since the
4926 actual arguments may not be surrounded by parentheses.  Also, your
4927 macro should expand to something that can be used as a single
4928 statement when it is followed by a semicolon.
4929 @end itemize
4930
4931 @node Named References
4932 @section Named References
4933 @cindex named references
4934
4935 As described in the preceding sections, the traditional way to refer to any
4936 semantic value or location is a @dfn{positional reference}, which takes the
4937 form @code{$@var{n}}, @code{$$}, @code{@@@var{n}}, and @code{@@$}.  However,
4938 such a reference is not very descriptive.  Moreover, if you later decide to
4939 insert or remove symbols in the right-hand side of a grammar rule, the need
4940 to renumber such references can be tedious and error-prone.
4941
4942 To avoid these issues, you can also refer to a semantic value or location
4943 using a @dfn{named reference}.  First of all, original symbol names may be
4944 used as named references.  For example:
4945
4946 @example
4947 @group
4948 invocation: op '(' args ')'
4949   @{ $invocation = new_invocation ($op, $args, @@invocation); @}
4950 @end group
4951 @end example
4952
4953 @noindent
4954 Positional and named references can be mixed arbitrarily.  For example:
4955
4956 @example
4957 @group
4958 invocation: op '(' args ')'
4959   @{ $$ = new_invocation ($op, $args, @@$); @}
4960 @end group
4961 @end example
4962
4963 @noindent
4964 However, sometimes regular symbol names are not sufficient due to
4965 ambiguities:
4966
4967 @example
4968 @group
4969 exp: exp '/' exp
4970   @{ $exp = $exp / $exp; @} // $exp is ambiguous.
4971
4972 exp: exp '/' exp
4973   @{ $$ = $1 / $exp; @} // One usage is ambiguous.
4974
4975 exp: exp '/' exp
4976   @{ $$ = $1 / $3; @} // No error.
4977 @end group
4978 @end example
4979
4980 @noindent
4981 When ambiguity occurs, explicitly declared names may be used for values and
4982 locations.  Explicit names are declared as a bracketed name after a symbol
4983 appearance in rule definitions.  For example:
4984 @example
4985 @group
4986 exp[result]: exp[left] '/' exp[right]
4987   @{ $result = $left / $right; @}
4988 @end group
4989 @end example
4990
4991 @noindent
4992 In order to access a semantic value generated by a midrule action, an
4993 explicit name may also be declared by putting a bracketed name after the
4994 closing brace of the midrule action code:
4995 @example
4996 @group
4997 exp[res]: exp[x] '+' @{$left = $x;@}[left] exp[right]
4998   @{ $res = $left + $right; @}
4999 @end group
5000 @end example
5001
5002 @noindent
5003
5004 In references, in order to specify names containing dots and dashes, an explicit
5005 bracketed syntax @code{$[name]} and @code{@@[name]} must be used:
5006 @example
5007 @group
5008 if-stmt: "if" '(' expr ')' "then" then.stmt ';'
5009   @{ $[if-stmt] = new_if_stmt ($expr, $[then.stmt]); @}
5010 @end group
5011 @end example
5012
5013 It often happens that named references are followed by a dot, dash or other
5014 C punctuation marks and operators.  By default, Bison will read
5015 @samp{$name.suffix} as a reference to symbol value @code{$name} followed by
5016 @samp{.suffix}, i.e., an access to the @code{suffix} field of the semantic
5017 value.  In order to force Bison to recognize @samp{name.suffix} in its
5018 entirety as the name of a semantic value, the bracketed syntax
5019 @samp{$[name.suffix]} must be used.
5020
5021 @node Declarations
5022 @section Bison Declarations
5023 @cindex declarations, Bison
5024 @cindex Bison declarations
5025
5026 The @dfn{Bison declarations} section of a Bison grammar defines the symbols
5027 used in formulating the grammar and the data types of semantic values.
5028 @xref{Symbols}.
5029
5030 All token kind names (but not single-character literal tokens such as
5031 @code{'+'} and @code{'*'}) must be declared.  Nonterminal symbols must be
5032 declared if you need to specify which data type to use for the semantic
5033 value (@pxref{Multiple Types}).
5034
5035 The first rule in the grammar file also specifies the start symbol, by
5036 default.  If you want some other symbol to be the start symbol, you
5037 must declare it explicitly (@pxref{Language and Grammar}).
5038
5039 @menu
5040 * Require Decl::      Requiring a Bison version.
5041 * Token Decl::        Declaring terminal symbols.
5042 * Precedence Decl::   Declaring terminals with precedence and associativity.
5043 * Type Decl::         Declaring the choice of type for a nonterminal symbol.
5044 * Symbol Decls::      Summary of the Syntax of Symbol Declarations.
5045 * Initial Action Decl::  Code run before parsing starts.
5046 * Destructor Decl::   Declaring how symbols are freed.
5047 * Printer Decl::      Declaring how symbol values are displayed.
5048 * Expect Decl::       Suppressing warnings about parsing conflicts.
5049 * Start Decl::        Specifying the start symbol.
5050 * Pure Decl::         Requesting a reentrant parser.
5051 * Push Decl::         Requesting a push parser.
5052 * Decl Summary::      Table of all Bison declarations.
5053 * %define Summary::   Defining variables to adjust Bison's behavior.
5054 * %code Summary::     Inserting code into the parser source.
5055 @end menu
5056
5057 @node Require Decl
5058 @subsection Require a Version of Bison
5059 @cindex version requirement
5060 @cindex requiring a version of Bison
5061 @findex %require
5062
5063 You may require the minimum version of Bison to process the grammar.  If
5064 the requirement is not met, @command{bison} exits with an error (exit
5065 status 63).
5066
5067 @example
5068 %require "@var{version}"
5069 @end example
5070
5071 Some deprecated behaviors are disabled for some required @var{version}:
5072 @table @asis
5073 @item @code{"3.2"} (or better)
5074 The C++ deprecated files @file{position.hh} and @file{stack.hh} are no
5075 longer generated.
5076 @end table
5077
5078
5079 @node Token Decl
5080 @subsection Token Kind Names
5081 @cindex declaring token kind names
5082 @cindex token kind names, declaring
5083 @cindex declaring literal string tokens
5084 @findex %token
5085
5086 The basic way to declare a token kind name (terminal symbol) is as follows:
5087
5088 @example
5089 %token @var{name}
5090 @end example
5091
5092 Bison will convert this into a definition in the parser, so that the
5093 function @code{yylex} (if it is in this file) can use the name @var{name} to
5094 stand for this token kind's code.
5095
5096 Alternatively, you can use @code{%left}, @code{%right}, @code{%precedence},
5097 or @code{%nonassoc} instead of @code{%token}, if you wish to specify
5098 associativity and precedence.  @xref{Precedence Decl}.  However, for
5099 clarity, we recommend to use these directives only to declare associativity
5100 and precedence, and not to add string aliases, semantic types, etc.
5101
5102 You can explicitly specify the numeric code for a token kind by appending a
5103 nonnegative decimal or hexadecimal integer value in the field immediately
5104 following the token name:
5105
5106 @example
5107 %token NUM 300
5108 %token XNUM 0x12d // a GNU extension
5109 @end example
5110
5111 @noindent
5112 It is generally best, however, to let Bison choose the numeric codes for all
5113 token kinds.  Bison will automatically select codes that don't conflict with
5114 each other or with normal characters.
5115
5116 In the event that the stack type is a union, you must augment the
5117 @code{%token} or other token declaration to include the data type
5118 alternative delimited by angle-brackets (@pxref{Multiple Types}).
5119
5120 For example:
5121
5122 @example
5123 @group
5124 %union @{              /* define stack type */
5125   double val;
5126   symrec *tptr;
5127 @}
5128 %token <val> NUM      /* define token NUM and its type */
5129 @end group
5130 @end example
5131
5132 You can associate a literal string token with a token kind name by writing
5133 the literal string at the end of a @code{%token} declaration which declares
5134 the name.  For example:
5135
5136 @example
5137 %token ARROW "=>"
5138 @end example
5139
5140 @noindent
5141 For example, a grammar for the C language might specify these names with
5142 equivalent literal string tokens:
5143
5144 @example
5145 %token  <operator>  OR      "||"
5146 %token  <operator>  LE 134  "<="
5147 %left  OR  "<="
5148 @end example
5149
5150 @noindent
5151 Once you equate the literal string and the token kind name, you can use them
5152 interchangeably in further declarations or the grammar rules.  The
5153 @code{yylex} function can use the token name or the literal string to obtain
5154 the token kind code (@pxref{Calling Convention}).
5155
5156 String aliases allow for better error messages using the literal strings
5157 instead of the token names, such as @samp{syntax error, unexpected ||,
5158 expecting number or (} rather than @samp{syntax error, unexpected OR,
5159 expecting NUM or LPAREN}.
5160
5161 String aliases may also be marked for internationalization (@pxref{Token
5162 I18n}):
5163
5164 @example
5165 %token
5166     OR     "||"
5167     LPAREN "("
5168     RPAREN ")"
5169     '\n'   _("end of line")
5170   <double>
5171     NUM    _("number")
5172 @end example
5173
5174 @noindent
5175 would produce in French @samp{erreur de syntaxe, || inattendu, attendait
5176 nombre ou (} rather than @samp{erreur de syntaxe, || inattendu, attendait
5177 number ou (}.
5178
5179 @node Precedence Decl
5180 @subsection Operator Precedence
5181 @cindex precedence declarations
5182 @cindex declaring operator precedence
5183 @cindex operator precedence, declaring
5184
5185 Use the @code{%left}, @code{%right}, @code{%nonassoc}, or @code{%precedence}
5186 declaration to declare a token and specify its precedence and associativity,
5187 all at once.  These are called @dfn{precedence declarations}.
5188 @xref{Precedence}, for general information on operator
5189 precedence.
5190
5191 The syntax of a precedence declaration is nearly the same as that of
5192 @code{%token}: either
5193
5194 @example
5195 %left @var{symbols}@dots{}
5196 @end example
5197
5198 @noindent
5199 or
5200
5201 @example
5202 %left <@var{type}> @var{symbols}@dots{}
5203 @end example
5204
5205 And indeed any of these declarations serves the purposes of @code{%token}.
5206 But in addition, they specify the associativity and relative precedence for
5207 all the @var{symbols}:
5208
5209 @itemize @bullet
5210 @item
5211 The associativity of an operator @var{op} determines how repeated uses of
5212 the operator nest: whether @samp{@var{x} @var{op} @var{y} @var{op} @var{z}}
5213 is parsed by grouping @var{x} with @var{y} first or by grouping @var{y} with
5214 @var{z} first.  @code{%left} specifies left-associativity (grouping @var{x}
5215 with @var{y} first) and @code{%right} specifies right-associativity
5216 (grouping @var{y} with @var{z} first).  @code{%nonassoc} specifies no
5217 associativity, which means that @samp{@var{x} @var{op} @var{y} @var{op}
5218 @var{z}} is considered a syntax error.
5219
5220 @code{%precedence} gives only precedence to the @var{symbols}, and defines
5221 no associativity at all.  Use this to define precedence only, and leave any
5222 potential conflict due to associativity enabled.
5223
5224 @item
5225 The precedence of an operator determines how it nests with other operators.
5226 All the tokens declared in a single precedence declaration have equal
5227 precedence and nest together according to their associativity.  When two
5228 tokens declared in different precedence declarations associate, the one
5229 declared later has the higher precedence and is grouped first.
5230 @end itemize
5231
5232 For backward compatibility, there is a confusing difference between the
5233 argument lists of @code{%token} and precedence declarations.  Only a
5234 @code{%token} can associate a literal string with a token kind name.  A
5235 precedence declaration always interprets a literal string as a reference to
5236 a separate token.  For example:
5237
5238 @example
5239 %left  OR "<="         // Does not declare an alias.
5240 %left  OR 134 "<=" 135 // Declares 134 for OR and 135 for "<=".
5241 @end example
5242
5243 @node Type Decl
5244 @subsection Nonterminal Symbols
5245 @cindex declaring value types, nonterminals
5246 @cindex value types, nonterminals, declaring
5247 @findex %nterm
5248 @findex %type
5249
5250 @noindent
5251 When you use @code{%union} to specify multiple value types, you must
5252 declare the value type of each nonterminal symbol for which values are
5253 used.  This is done with a @code{%type} declaration, like this:
5254
5255 @example
5256 %type <@var{type}> @var{nonterminal}@dots{}
5257 @end example
5258
5259 @noindent
5260 Here @var{nonterminal} is the name of a nonterminal symbol, and @var{type}
5261 is the name given in the @code{%union} to the alternative that you want
5262 (@pxref{Union Decl}).  You can give any number of nonterminal symbols in the
5263 same @code{%type} declaration, if they have the same value type.  Use spaces
5264 to separate the symbol names.
5265
5266 While POSIX Yacc allows @code{%type} only for nonterminals, Bison accepts
5267 that this directive be also applied to terminal symbols.  To declare
5268 exclusively nonterminal symbols, use the safer @code{%nterm}:
5269
5270 @example
5271 %nterm <@var{type}> @var{nonterminal}@dots{}
5272 @end example
5273
5274
5275 @node Symbol Decls
5276 @subsection Syntax of Symbol Declarations
5277 @findex %left
5278 @findex %nterm
5279 @findex %token
5280 @findex %type
5281
5282 The syntax of the various directives to declare symbols is as follows.
5283
5284 @example
5285 %token @var{tag}? ( @var{id} @var{number}? @var{string}? )+ ( @var{tag} ( @var{id} @var{number}? @var{string}? )+ )*
5286 %left  @var{tag}? ( @var{id} @var{number}?)+ ( @var{tag} ( @var{id} @var{number}? )+ )*
5287 %type  @var{tag}? ( @var{id} | @var{char} | @var{string} )+ ( @var{tag} ( @var{id} | @var{char} | @var{string} )+ )*
5288 %nterm @var{tag}? @var{id}+ ( @var{tag} @var{id}+ )*
5289 @end example
5290
5291 @noindent
5292 where @var{tag} denotes a type tag such as @samp{<ival>}, @var{id} denotes
5293 an identifier such as @samp{NUM}, @var{number} a decimal or hexadecimal
5294 integer such as @samp{300} or @samp{0x12d}, @var{char} a character literal
5295 such as @samp{'+'}, and @var{string} a string literal such as
5296 @samp{"number"}.  The postfix quantifiers are @samp{?} (zero or one),
5297 @samp{*} (zero or more) and @samp{+} (one or more).
5298
5299 The directives @code{%precedence}, @code{%right} and @code{%nonassoc} behave
5300 like @code{%left}.
5301
5302 @node Initial Action Decl
5303 @subsection Performing Actions before Parsing
5304 @findex %initial-action
5305
5306 Sometimes your parser needs to perform some initializations before parsing.
5307 The @code{%initial-action} directive allows for such arbitrary code.
5308
5309 @deffn {Directive} %initial-action @{ @var{code} @}
5310 @findex %initial-action
5311 Declare that the braced @var{code} must be invoked before parsing each time
5312 @code{yyparse} is called.  The @var{code} may use @code{$$} (or
5313 @code{$<@var{tag}>$}) and @code{@@$} --- initial value and location of the
5314 lookahead --- and the @code{%parse-param}.
5315 @end deffn
5316
5317 For instance, if your locations use a file name, you may use
5318
5319 @example
5320 %parse-param @{ char const *file_name @};
5321 %initial-action
5322 @{
5323   @@$.initialize (file_name);
5324 @};
5325 @end example
5326
5327
5328 @node Destructor Decl
5329 @subsection Freeing Discarded Symbols
5330 @cindex freeing discarded symbols
5331 @findex %destructor
5332 @findex <*>
5333 @findex <>
5334 During error recovery (@pxref{Error Recovery}), symbols already pushed on
5335 the stack and tokens coming from the rest of the file are discarded until
5336 the parser falls on its feet.  If the parser runs out of memory, or if it
5337 returns via @code{YYABORT}, @code{YYACCEPT} or @code{YYNOMEM}, all the
5338 symbols on the stack must be discarded.  Even if the parser succeeds, it
5339 must discard the start symbol.
5340
5341 When discarded symbols convey heap based information, this memory is
5342 lost.  While this behavior can be tolerable for batch parsers, such as
5343 in traditional compilers, it is unacceptable for programs like shells or
5344 protocol implementations that may parse and execute indefinitely.
5345
5346 The @code{%destructor} directive defines code that is called when a
5347 symbol is automatically discarded.
5348
5349 @deffn {Directive} %destructor @{ @var{code} @} @var{symbols}
5350 @findex %destructor
5351 Invoke the braced @var{code} whenever the parser discards one of the
5352 @var{symbols}.  Within @var{code}, @code{$$} (or @code{$<@var{tag}>$})
5353 designates the semantic value associated with the discarded symbol, and
5354 @code{@@$} designates its location.  The additional parser parameters are
5355 also available (@pxref{Parser Function}).
5356
5357 When a symbol is listed among @var{symbols}, its @code{%destructor} is called a
5358 per-symbol @code{%destructor}.
5359 You may also define a per-type @code{%destructor} by listing a semantic type
5360 tag among @var{symbols}.
5361 In that case, the parser will invoke this @var{code} whenever it discards any
5362 grammar symbol that has that semantic type tag unless that symbol has its own
5363 per-symbol @code{%destructor}.
5364
5365 Finally, you can define two different kinds of default @code{%destructor}s.
5366 You can place each of @code{<*>} and @code{<>} in the @var{symbols} list of
5367 exactly one @code{%destructor} declaration in your grammar file.
5368 The parser will invoke the @var{code} associated with one of these whenever it
5369 discards any user-defined grammar symbol that has no per-symbol and no per-type
5370 @code{%destructor}.
5371 The parser uses the @var{code} for @code{<*>} in the case of such a grammar
5372 symbol for which you have formally declared a semantic type tag (@code{%token},
5373 @code{%nterm}, and @code{%type}
5374 count as such a declaration, but @code{$<tag>$} does not).
5375 The parser uses the @var{code} for @code{<>} in the case of such a grammar
5376 symbol that has no declared semantic type tag.
5377 @end deffn
5378
5379 @noindent
5380 For example:
5381
5382 @example
5383 %union @{ char *string; @}
5384 %token <string> STRING1 STRING2
5385 %nterm <string> string1 string2
5386 %union @{ char character; @}
5387 %token <character> CHR
5388 %nterm <character> chr
5389 %token TAGLESS
5390
5391 %destructor @{ @} <character>
5392 %destructor @{ free ($$); @} <*>
5393 %destructor @{ free ($$); printf ("%d", @@$.first_line); @} STRING1 string1
5394 %destructor @{ printf ("Discarding tagless symbol.\n"); @} <>
5395 @end example
5396
5397 @noindent
5398 guarantees that, when the parser discards any user-defined symbol that has a
5399 semantic type tag other than @code{<character>}, it passes its semantic value
5400 to @code{free} by default.
5401 However, when the parser discards a @code{STRING1} or a @code{string1},
5402 it uses the third @code{%destructor}, which frees it and
5403 prints its line number to @code{stdout} (@code{free} is invoked only once).
5404 Finally, the parser merely prints a message whenever it discards any symbol,
5405 such as @code{TAGLESS}, that has no semantic type tag.
5406
5407 A Bison-generated parser invokes the default @code{%destructor}s only for
5408 user-defined as opposed to Bison-defined symbols.
5409 For example, the parser will not invoke either kind of default
5410 @code{%destructor} for the special Bison-defined symbols @code{$accept},
5411 @code{$undefined}, or @code{$end} (@pxref{Table of Symbols}),
5412 none of which you can reference in your grammar.
5413 It also will not invoke either for the @code{error} token (@pxref{Table of
5414 Symbols}), which is always defined by Bison regardless of whether you
5415 reference it in your grammar.
5416 However, it may invoke one of them for the end token (token 0) if you
5417 redefine it from @code{$end} to, for example, @code{END}:
5418
5419 @example
5420 %token END 0
5421 @end example
5422
5423 @cindex actions in midrule
5424 @cindex midrule actions
5425 Finally, Bison will never invoke a @code{%destructor} for an unreferenced
5426 midrule semantic value (@pxref{Midrule Actions}).
5427 That is, Bison does not consider a midrule to have a semantic value if you
5428 do not reference @code{$$} in the midrule's action or @code{$@var{n}}
5429 (where @var{n} is the right-hand side symbol position of the midrule) in
5430 any later action in that rule.  However, if you do reference either, the
5431 Bison-generated parser will invoke the @code{<>} @code{%destructor} whenever
5432 it discards the midrule symbol.
5433
5434 @ignore
5435 @noindent
5436 In the future, it may be possible to redefine the @code{error} token as a
5437 nonterminal that captures the discarded symbols.
5438 In that case, the parser will invoke the default destructor for it as well.
5439 @end ignore
5440
5441 @sp 1
5442
5443 @cindex discarded symbols
5444 @dfn{Discarded symbols} are the following:
5445
5446 @itemize
5447 @item
5448 stacked symbols popped during the first phase of error recovery,
5449 @item
5450 incoming terminals during the second phase of error recovery,
5451 @item
5452 the current lookahead and the entire stack (except the current
5453 right-hand side symbols) when the parser returns immediately, and
5454 @item
5455 the current lookahead and the entire stack (including the current right-hand
5456 side symbols) when the C++ parser (@file{lalr1.cc}) catches an exception in
5457 @code{parse},
5458 @item
5459 the start symbol, when the parser succeeds.
5460 @end itemize
5461
5462 The parser can @dfn{return immediately} because of an explicit call to
5463 @code{YYABORT}, @code{YYACCEPT} or @code{YYNOMEM}, or failed error recovery,
5464 or memory exhaustion.
5465
5466 Right-hand side symbols of a rule that explicitly triggers a syntax
5467 error via @code{YYERROR} are not discarded automatically.  As a rule
5468 of thumb, destructors are invoked only when user actions cannot manage
5469 the memory.
5470
5471 @node Printer Decl
5472 @subsection Printing Semantic Values
5473 @cindex printing semantic values
5474 @findex %printer
5475 @findex <*>
5476 @findex <>
5477 When run-time traces are enabled (@pxref{Tracing}),
5478 the parser reports its actions, such as reductions.  When a symbol involved
5479 in an action is reported, only its kind is displayed, as the parser cannot
5480 know how semantic values should be formatted.
5481
5482 The @code{%printer} directive defines code that is called when a symbol is
5483 reported.  Its syntax is the same as @code{%destructor} (@pxref{Destructor
5484 Decl}).
5485
5486 @deffn {Directive} %printer @{ @var{code} @} @var{symbols}
5487 @findex %printer
5488 @vindex yyo
5489 @c This is the same text as for %destructor.
5490 Invoke the braced @var{code} whenever the parser displays one of the
5491 @var{symbols}.  Within @var{code}, @code{yyo} denotes the output stream (a
5492 @code{FILE*} in C, an @code{std::ostream&} in C++, and @code{stdout} in D), @code{$$} (or
5493 @code{$<@var{tag}>$}) designates the semantic value associated with the
5494 symbol, and @code{@@$} its location.  The additional parser parameters are
5495 also available (@pxref{Parser Function}).
5496
5497 The @var{symbols} are defined as for @code{%destructor} (@pxref{Destructor
5498 Decl}.): they can be per-type (e.g.,
5499 @samp{<ival>}), per-symbol (e.g., @samp{exp}, @samp{NUM}, @samp{"float"}),
5500 typed per-default (i.e., @samp{<*>}, or untyped per-default (i.e.,
5501 @samp{<>}).
5502 @end deffn
5503
5504 @noindent
5505 For example:
5506
5507 @example
5508 %union @{ char *string; @}
5509 %token <string> STRING1 STRING2
5510 %nterm <string> string1 string2
5511 %union @{ char character; @}
5512 %token <character> CHR
5513 %nterm <character> chr
5514 %token TAGLESS
5515
5516 %printer @{ fprintf (yyo, "'%c'", $$); @} <character>
5517 %printer @{ fprintf (yyo, "&%p", $$); @} <*>
5518 %printer @{ fprintf (yyo, "\"%s\"", $$); @} STRING1 string1
5519 %printer @{ fprintf (yyo, "<>"); @} <>
5520 @end example
5521
5522 @noindent
5523 guarantees that, when the parser print any symbol that has a semantic type
5524 tag other than @code{<character>}, it display the address of the semantic
5525 value by default.  However, when the parser displays a @code{STRING1} or a
5526 @code{string1}, it formats it as a string in double quotes.  It performs
5527 only the second @code{%printer} in this case, so it prints only once.
5528 Finally, the parser print @samp{<>} for any symbol, such as @code{TAGLESS},
5529 that has no semantic type tag.  @xref{Mfcalc Traces}, for a complete example.
5530
5531
5532
5533 @node Expect Decl
5534 @subsection Suppressing Conflict Warnings
5535 @cindex suppressing conflict warnings
5536 @cindex preventing warnings about conflicts
5537 @cindex warnings, preventing
5538 @cindex conflicts, suppressing warnings of
5539 @findex %expect
5540 @findex %expect-rr
5541
5542 Bison normally warns if there are any conflicts in the grammar
5543 (@pxref{Shift/Reduce}), but most real grammars
5544 have harmless shift/reduce conflicts which are resolved in a predictable
5545 way and would be difficult to eliminate.  It is desirable to suppress
5546 the warning about these conflicts unless the number of conflicts
5547 changes.  You can do this with the @code{%expect} declaration.
5548
5549 The declaration looks like this:
5550
5551 @example
5552 %expect @var{n}
5553 @end example
5554
5555 Here @var{n} is a decimal integer.  The declaration says there should
5556 be @var{n} shift/reduce conflicts and no reduce/reduce conflicts.
5557 Bison reports an error if the number of shift/reduce conflicts differs
5558 from @var{n}, or if there are any reduce/reduce conflicts.
5559
5560 For deterministic parsers, reduce/reduce conflicts are more
5561 serious, and should be eliminated entirely.  Bison will always report
5562 reduce/reduce conflicts for these parsers.  With GLR
5563 parsers, however, both kinds of conflicts are routine; otherwise,
5564 there would be no need to use GLR parsing.  Therefore, it is
5565 also possible to specify an expected number of reduce/reduce conflicts
5566 in GLR parsers, using the declaration:
5567
5568 @example
5569 %expect-rr @var{n}
5570 @end example
5571
5572 You may wish to be more specific in your
5573 specification of expected conflicts.  To this end, you can also attach
5574 @code{%expect} and @code{%expect-rr} modifiers to individual rules.
5575 The interpretation of these modifiers differs from their use as
5576 declarations.  When attached to rules, they indicate the number of states
5577 in which the rule is involved in a conflict.  You will need to consult the
5578 output resulting from @option{-v} to determine appropriate numbers to use.
5579 For example, for the following grammar fragment, the first rule for
5580 @code{empty_dims} appears in two states in which the @samp{[} token is a
5581 lookahead.  Having determined that, you can document this fact with an
5582 @code{%expect} modifier as follows:
5583
5584 @example
5585 dims:
5586   empty_dims
5587 | '[' expr ']' dims
5588 ;
5589
5590 empty_dims:
5591   %empty   %expect 2
5592 | empty_dims '[' ']'
5593 ;
5594 @end example
5595
5596 Mid-rule actions generate implicit rules that are also subject to conflicts
5597 (@pxref{Midrule Conflicts}). To attach
5598 an @code{%expect} or @code{%expect-rr} annotation to an implicit
5599 mid-rule action's rule, put it before the action.  For example,
5600
5601 @example
5602 %glr-parser
5603 %expect-rr 1
5604
5605 %%
5606
5607 clause:
5608   "condition" %expect-rr 1 @{ value_mode(); @} '(' exprs ')'
5609 | "condition" %expect-rr 1 @{ class_mode(); @} '(' types ')'
5610 ;
5611 @end example
5612
5613 @noindent
5614 Here, the appropriate mid-rule action will not be determined until after
5615 the @samp{(} token is shifted.  Thus,
5616 the two actions will clash with each other, and we should expect one
5617 reduce/reduce conflict for each.
5618
5619 In general, using @code{%expect} involves these steps:
5620
5621 @itemize @bullet
5622 @item
5623 Compile your grammar without @code{%expect}.  Use the @option{-v} option
5624 to get a verbose list of where the conflicts occur.  Bison will also
5625 print the number of conflicts.
5626
5627 @item
5628 Check each of the conflicts to make sure that Bison's default
5629 resolution is what you really want.  If not, rewrite the grammar and
5630 go back to the beginning.
5631
5632 @item
5633 Add an @code{%expect} declaration, copying the number @var{n} from the
5634 number that Bison printed.  With GLR parsers, add an
5635 @code{%expect-rr} declaration as well.
5636
5637 @item
5638 Optionally, count up the number of states in which one or more
5639 conflicted reductions for particular rules appear and add these numbers
5640 to the affected rules as @code{%expect-rr} or @code{%expect} modifiers
5641 as appropriate.  Rules that are in conflict appear in the output listing
5642 surrounded by square brackets or, in the case of reduce/reduce conflicts,
5643 as reductions having the same lookahead symbol as a square-bracketed
5644 reduction in the same state.
5645 @end itemize
5646
5647 Now Bison will report an error if you introduce an unexpected conflict,
5648 but will keep silent otherwise.
5649
5650 @node Start Decl
5651 @subsection The Start-Symbol
5652 @cindex declaring the start symbol
5653 @cindex start symbol, declaring
5654 @cindex default start symbol
5655 @findex %start
5656
5657 Bison assumes by default that the start symbol for the grammar is the first
5658 nonterminal specified in the grammar specification section.  The programmer
5659 may override this restriction with the @code{%start} declaration as follows:
5660
5661 @example
5662 %start @var{symbol}
5663 @end example
5664
5665 @node Pure Decl
5666 @subsection A Pure (Reentrant) Parser
5667 @cindex reentrant parser
5668 @cindex pure parser
5669 @findex %define api.pure
5670
5671 A @dfn{reentrant} program is one which does not alter in the course of
5672 execution; in other words, it consists entirely of @dfn{pure} (read-only)
5673 code.  Reentrancy is important whenever asynchronous execution is possible;
5674 for example, a nonreentrant program may not be safe to call from a signal
5675 handler.  In systems with multiple threads of control, a nonreentrant
5676 program must be called only within interlocks.
5677
5678 Normally, Bison generates a parser which is not reentrant.  This is
5679 suitable for most uses, and it permits compatibility with Yacc.  (The
5680 standard Yacc interfaces are inherently nonreentrant, because they use
5681 statically allocated variables for communication with @code{yylex},
5682 including @code{yylval} and @code{yylloc}.)
5683
5684 Alternatively, you can generate a pure, reentrant parser.  The Bison
5685 declaration @samp{%define api.pure} says that you want the parser to be
5686 reentrant.  It looks like this:
5687
5688 @example
5689 %define api.pure full
5690 @end example
5691
5692 The result is that the communication variables @code{yylval} and
5693 @code{yylloc} become local variables in @code{yyparse}, and a different
5694 calling convention is used for the lexical analyzer function @code{yylex}.
5695 @xref{Pure Calling}, for the details of this.  The variable @code{yynerrs}
5696 becomes local in @code{yyparse} in pull mode but it becomes a member of
5697 @code{yypstate} in push mode.  (@pxref{Error Reporting Function}).  The
5698 convention for calling @code{yyparse} itself is unchanged.
5699
5700 Whether the parser is pure has nothing to do with the grammar rules.
5701 You can generate either a pure parser or a nonreentrant parser from any
5702 valid grammar.
5703
5704 @node Push Decl
5705 @subsection A Push Parser
5706 @cindex push parser
5707 @cindex push parser
5708 @findex %define api.push-pull
5709
5710 A pull parser is called once and it takes control until all its input
5711 is completely parsed.  A push parser, on the other hand, is called
5712 each time a new token is made available.
5713
5714 A push parser is typically useful when the parser is part of a
5715 main event loop in the client's application.  This is typically
5716 a requirement of a GUI, when the main event loop needs to be triggered
5717 within a certain time period.
5718
5719 Normally, Bison generates a pull parser.
5720 The following Bison declaration says that you want the parser to be a push
5721 parser (@pxref{%define Summary}):
5722
5723 @example
5724 %define api.push-pull push
5725 @end example
5726
5727 In almost all cases, you want to ensure that your push parser is also
5728 a pure parser (@pxref{Pure Decl}).  The only
5729 time you should create an impure push parser is to have backwards
5730 compatibility with the impure Yacc pull mode interface.  Unless you know
5731 what you are doing, your declarations should look like this:
5732
5733 @example
5734 %define api.pure full
5735 %define api.push-pull push
5736 @end example
5737
5738 There is a major notable functional difference between the pure push parser
5739 and the impure push parser.  It is acceptable for a pure push parser to have
5740 many parser instances, of the same type of parser, in memory at the same time.
5741 An impure push parser should only use one parser at a time.
5742
5743 When a push parser is selected, Bison will generate some new symbols in
5744 the generated parser.  @code{yypstate} is a structure that the generated
5745 parser uses to store the parser's state.  @code{yypstate_new} is the
5746 function that will create a new parser instance.  @code{yypstate_delete}
5747 will free the resources associated with the corresponding parser instance.
5748 Finally, @code{yypush_parse} is the function that should be called whenever a
5749 token is available to provide the parser.  A trivial example
5750 of using a pure push parser would look like this:
5751
5752 @example
5753 int status;
5754 yypstate *ps = yypstate_new ();
5755 do @{
5756   status = yypush_parse (ps, yylex (), NULL);
5757 @} while (status == YYPUSH_MORE);
5758 yypstate_delete (ps);
5759 @end example
5760
5761 If the user decided to use an impure push parser, a few things about the
5762 generated parser will change.  The @code{yychar} variable becomes a global
5763 variable instead of a local one in the @code{yypush_parse} function.  For
5764 this reason, the signature of the @code{yypush_parse} function is changed to
5765 remove the token as a parameter.  A nonreentrant push parser example would
5766 thus look like this:
5767
5768 @example
5769 extern int yychar;
5770 int status;
5771 yypstate *ps = yypstate_new ();
5772 do @{
5773   yychar = yylex ();
5774   status = yypush_parse (ps);
5775 @} while (status == YYPUSH_MORE);
5776 yypstate_delete (ps);
5777 @end example
5778
5779 That's it. Notice the next token is put into the global variable @code{yychar}
5780 for use by the next invocation of the @code{yypush_parse} function.
5781
5782 Bison also supports both the push parser interface along with the pull parser
5783 interface in the same generated parser.  In order to get this functionality,
5784 you should replace the @samp{%define api.push-pull push} declaration with the
5785 @samp{%define api.push-pull both} declaration.  Doing this will create all of
5786 the symbols mentioned earlier along with the two extra symbols, @code{yyparse}
5787 and @code{yypull_parse}.  @code{yyparse} can be used exactly as it normally
5788 would be used.  However, the user should note that it is implemented in the
5789 generated parser by calling @code{yypull_parse}.
5790 This makes the @code{yyparse} function that is generated with the
5791 @samp{%define api.push-pull both} declaration slower than the normal
5792 @code{yyparse} function.  If the user
5793 calls the @code{yypull_parse} function it will parse the rest of the input
5794 stream.  It is possible to @code{yypush_parse} tokens to select a subgrammar
5795 and then @code{yypull_parse} the rest of the input stream.  If you would like
5796 to switch back and forth between between parsing styles, you would have to
5797 write your own @code{yypull_parse} function that knows when to quit looking
5798 for input.  An example of using the @code{yypull_parse} function would look
5799 like this:
5800
5801 @example
5802 yypstate *ps = yypstate_new ();
5803 yypull_parse (ps); /* Will call the lexer */
5804 yypstate_delete (ps);
5805 @end example
5806
5807 Adding the @samp{%define api.pure} declaration does exactly the same thing to
5808 the generated parser with @samp{%define api.push-pull both} as it did for
5809 @samp{%define api.push-pull push}.
5810
5811 @node Decl Summary
5812 @subsection Bison Declaration Summary
5813 @cindex Bison declaration summary
5814 @cindex declaration summary
5815 @cindex summary, Bison declaration
5816
5817 Here is a summary of the declarations used to define a grammar:
5818
5819 @deffn {Directive} %union
5820 Declare the collection of data types that semantic values may have
5821 (@pxref{Union Decl}).
5822 @end deffn
5823
5824 @deffn {Directive} %token
5825 Declare a terminal symbol (token kind name) with no precedence
5826 or associativity specified (@pxref{Token Decl}).
5827 @end deffn
5828
5829 @deffn {Directive} %right
5830 Declare a terminal symbol (token kind name) that is right-associative
5831 (@pxref{Precedence Decl}).
5832 @end deffn
5833
5834 @deffn {Directive} %left
5835 Declare a terminal symbol (token kind name) that is left-associative
5836 (@pxref{Precedence Decl}).
5837 @end deffn
5838
5839 @deffn {Directive} %nonassoc
5840 Declare a terminal symbol (token kind name) that is nonassociative
5841 (@pxref{Precedence Decl}).
5842 Using it in a way that would be associative is a syntax error.
5843 @end deffn
5844
5845 @ifset defaultprec
5846 @deffn {Directive} %default-prec
5847 Assign a precedence to rules lacking an explicit @code{%prec} modifier
5848 (@pxref{Contextual Precedence}).
5849 @end deffn
5850 @end ifset
5851
5852 @deffn {Directive} %nterm
5853 Declare the type of semantic values for a nonterminal symbol (@pxref{Type
5854 Decl}).
5855 @end deffn
5856
5857 @deffn {Directive} %type
5858 Declare the type of semantic values for a symbol (@pxref{Type Decl}).
5859 @end deffn
5860
5861 @deffn {Directive} %start
5862 Specify the grammar's start symbol (@pxref{Start Decl}).
5863 @end deffn
5864
5865 @deffn {Directive} %expect
5866 Declare the expected number of shift/reduce conflicts, either overall or
5867 for a given rule
5868 (@pxref{Expect Decl}).
5869 @end deffn
5870
5871 @deffn {Directive} %expect-rr
5872 Declare the expected number of reduce/reduce conflicts, either overall or
5873 for a given rule
5874 (@pxref{Expect Decl}).
5875 @end deffn
5876
5877
5878 @sp 1
5879 @noindent
5880 In order to change the behavior of @command{bison}, use the following
5881 directives:
5882
5883 @deffn {Directive} %code @{@var{code}@}
5884 @deffnx {Directive} %code @var{qualifier} @{@var{code}@}
5885 @findex %code
5886 Insert @var{code} verbatim into the output parser source at the
5887 default location or at the location specified by @var{qualifier}.
5888 @xref{%code Summary}.
5889 @end deffn
5890
5891 @deffn {Directive} %debug
5892 Instrument the parser for traces.  Obsoleted by @samp{%define
5893 parse.trace}.
5894 @xref{Tracing}.
5895 @end deffn
5896
5897 @deffn {Directive} %define @var{variable}
5898 @deffnx {Directive} %define @var{variable} @var{value}
5899 @deffnx {Directive} %define @var{variable} @{@var{value}@}
5900 @deffnx {Directive} %define @var{variable} "@var{value}"
5901 Define a variable to adjust Bison's behavior.  @xref{%define Summary}.
5902 @end deffn
5903
5904 @deffn {Directive} %defines
5905 @deffnx {Directive} %defines @var{defines-file}
5906 Historical name for @code{%header}.  @xref{%header,,@code{%header}}.
5907 @end deffn
5908
5909 @deffn {Directive} %destructor
5910 Specify how the parser should reclaim the memory associated to
5911 discarded symbols.  @xref{Destructor Decl}.
5912 @end deffn
5913
5914 @deffn {Directive} %file-prefix "@var{prefix}"
5915 Specify a prefix to use for all Bison output file names.  The names
5916 are chosen as if the grammar file were named @file{@var{prefix}.y}.
5917 @end deffn
5918
5919 @anchor{%header}
5920 @deffn {Directive} %header
5921 Write a parser header file containing definitions for the token kind names
5922 defined in the grammar as well as a few other declarations.  If the parser
5923 implementation file is named @file{@var{name}.c} then the parser header file
5924 is named @file{@var{name}.h}.
5925
5926 For C parsers, the parser header file declares @code{YYSTYPE} unless
5927 @code{YYSTYPE} is already defined as a macro or you have used a
5928 @code{<@var{type}>} tag without using @code{%union}.  Therefore, if you are
5929 using a @code{%union} (@pxref{Multiple Types}) with components that require
5930 other definitions, or if you have defined a @code{YYSTYPE} macro or type
5931 definition (@pxref{Value Type}), you need to arrange for these definitions
5932 to be propagated to all modules, e.g., by putting them in a prerequisite
5933 header that is included both by your parser and by any other module that
5934 needs @code{YYSTYPE}.
5935
5936 Unless your parser is pure, the parser header file declares
5937 @code{yylval} as an external variable.  @xref{Pure Decl}.
5938
5939 If you have also used locations, the parser header file declares
5940 @code{YYLTYPE} and @code{yylloc} using a protocol similar to that of the
5941 @code{YYSTYPE} macro and @code{yylval}.  @xref{Tracking Locations}.
5942
5943 This parser header file is normally essential if you wish to put the
5944 definition of @code{yylex} in a separate source file, because
5945 @code{yylex} typically needs to be able to refer to the
5946 above-mentioned declarations and to the token kind codes.  @xref{Token
5947 Values}.
5948
5949 @findex %code requires
5950 @findex %code provides
5951 If you have declared @code{%code requires} or @code{%code provides}, the output
5952 header also contains their code.
5953 @xref{%code Summary}.
5954
5955 @cindex Header guard
5956 The generated header is protected against multiple inclusions with a C
5957 preprocessor guard: @samp{YY_@var{PREFIX}_@var{FILE}_INCLUDED}, where
5958 @var{PREFIX} and @var{FILE} are the prefix (@pxref{Multiple Parsers}) and
5959 generated file name turned uppercase, with each series of non alphanumerical
5960 characters converted to a single underscore.
5961
5962 For instance with @samp{%define api.prefix @{calc@}} and @samp{%header
5963 "lib/parse.h"}, the header will be guarded as follows.
5964 @example
5965 #ifndef YY_CALC_LIB_PARSE_H_INCLUDED
5966 # define YY_CALC_LIB_PARSE_H_INCLUDED
5967 ...
5968 #endif /* ! YY_CALC_LIB_PARSE_H_INCLUDED */
5969 @end example
5970
5971 Introduced in Bison 3.8.
5972 @end deffn
5973
5974 @deffn {Directive} %header @var{header-file}
5975 Same as above, but save in the file @file{@var{header-file}}.
5976 @end deffn
5977
5978 @deffn {Directive} %language "@var{language}"
5979 Specify the programming language for the generated parser.  Currently
5980 supported languages include C, C++, D and Java.  @var{language} is
5981 case-insensitive.
5982 @end deffn
5983
5984 @deffn {Directive} %locations
5985 Generate the code processing the locations (@pxref{Action Features}).  This
5986 mode is enabled as soon as the grammar uses the special @samp{@@@var{n}}
5987 tokens, but if your grammar does not use it, using @samp{%locations} allows
5988 for more accurate syntax error messages.
5989 @end deffn
5990
5991 @deffn {Directive} %name-prefix "@var{prefix}"
5992 Obsoleted by @samp{%define api.prefix @{@var{prefix}@}}.  @xref{Multiple
5993 Parsers}.  For C++ parsers, see the
5994 @samp{%define api.namespace} documentation in this section.
5995
5996 Rename the external symbols used in the parser so that they start with
5997 @var{prefix} instead of @samp{yy}.  The precise list of symbols renamed in C
5998 parsers is @code{yyparse}, @code{yylex}, @code{yyerror}, @code{yynerrs},
5999 @code{yylval}, @code{yychar}, @code{yydebug}, and (if locations are used)
6000 @code{yylloc}.  If you use a push parser, @code{yypush_parse},
6001 @code{yypull_parse}, @code{yypstate}, @code{yypstate_new} and
6002 @code{yypstate_delete} will also be renamed.  For example, if you use
6003 @samp{%name-prefix "c_"}, the names become @code{c_parse}, @code{c_lex}, and
6004 so on.
6005
6006 Contrary to defining @code{api.prefix}, some symbols are @emph{not} renamed
6007 by @code{%name-prefix}, for instance @code{YYDEBUG}, @code{YYTOKENTYPE},
6008 @code{yytoken_kind_t}, @code{YYSTYPE}, @code{YYLTYPE}.
6009 @end deffn
6010
6011 @ifset defaultprec
6012 @deffn {Directive} %no-default-prec
6013 Do not assign a precedence to rules lacking an explicit @code{%prec}
6014 modifier (@pxref{Contextual Precedence}).
6015 @end deffn
6016 @end ifset
6017
6018 @deffn {Directive} %no-lines
6019 Don't generate any @code{#line} preprocessor commands in the parser
6020 implementation file.  Ordinarily Bison writes these commands in the parser
6021 implementation file so that the C compiler and debuggers will associate
6022 errors and object code with your source file (the grammar file).  This
6023 directive causes them to associate errors with the parser implementation
6024 file, treating it as an independent source file in its own right.
6025 @end deffn
6026
6027 @deffn {Directive} %output "@var{file}"
6028 Generate the parser implementation in @file{@var{file}}.
6029 @end deffn
6030
6031 @deffn {Directive} %pure-parser
6032 Deprecated version of @samp{%define api.pure} (@pxref{%define
6033 Summary}), for which Bison is more careful to warn about
6034 unreasonable usage.
6035 @end deffn
6036
6037 @deffn {Directive} %require "@var{version}"
6038 Require version @var{version} or higher of Bison.  @xref{Require Decl}.
6039 @end deffn
6040
6041 @deffn {Directive} %skeleton "@var{file}"
6042 Specify the skeleton to use.
6043
6044 @c You probably don't need this option unless you are developing Bison.
6045 @c You should use @code{%language} if you want to specify the skeleton for a
6046 @c different language, because it is clearer and because it will always choose the
6047 @c correct skeleton for non-deterministic or push parsers.
6048
6049 If @var{file} does not contain a @code{/}, @var{file} is the name of a skeleton
6050 file in the Bison installation directory.
6051 If it does, @var{file} is an absolute file name or a file name relative to the
6052 directory of the grammar file.
6053 This is similar to how most shells resolve commands.
6054 @end deffn
6055
6056 @deffn {Directive} %token-table
6057 This feature is obsolescent, avoid it in new projects.
6058
6059 Generate an array of token names in the parser implementation file.  The
6060 name of the array is @code{yytname}; @code{yytname[@var{i}]} is the name of
6061 the token whose internal Bison token code is @var{i}.  The first three
6062 elements of @code{yytname} correspond to the predefined tokens
6063 @code{"$end"}, @code{"error"}, and @code{"$undefined"}; after these come the
6064 symbols defined in the grammar file.
6065
6066 The name in the table includes all the characters needed to represent the
6067 token in Bison.  For single-character literals and literal strings, this
6068 includes the surrounding quoting characters and any escape sequences.  For
6069 example, the Bison single-character literal @code{'+'} corresponds to a
6070 three-character name, represented in C as @code{"'+'"}; and the Bison
6071 two-character literal string @code{"\\/"} corresponds to a five-character
6072 name, represented in C as @code{"\"\\\\/\""}.
6073
6074 When you specify @code{%token-table}, Bison also generates macro definitions
6075 for macros @code{YYNTOKENS}, @code{YYNNTS}, and @code{YYNRULES}, and
6076 @code{YYNSTATES}:
6077
6078 @table @code
6079 @item YYNTOKENS
6080 The number of terminal symbols, i.e., the highest token code, plus one.
6081 @item YYNNTS
6082 The number of nonterminal symbols.
6083 @item YYNRULES
6084 The number of grammar rules,
6085 @item YYNSTATES
6086 The number of parser states (@pxref{Parser States}).
6087 @end table
6088
6089 Here's code for looking up a multicharacter token in @code{yytname},
6090 assuming that the characters of the token are stored in @code{token_buffer},
6091 and assuming that the token does not contain any characters like @samp{"}
6092 that require escaping.
6093
6094 @example
6095 for (int i = 0; i < YYNTOKENS; i++)
6096   if (yytname[i]
6097       && yytname[i][0] == '"'
6098       && ! strncmp (yytname[i] + 1, token_buffer,
6099                     strlen (token_buffer))
6100       && yytname[i][strlen (token_buffer) + 1] == '"'
6101       && yytname[i][strlen (token_buffer) + 2] == 0)
6102     break;
6103 @end example
6104
6105 This method is discouraged: the primary purpose of string aliases is forging
6106 good error messages, not describing the spelling of keywords.  In addition,
6107 looking for the token kind at runtime incurs a (small but noticeable) cost.
6108
6109 Finally, @code{%token-table} is incompatible with the @code{custom} and
6110 @code{detailed} values of the @code{parse.error} @code{%define} variable.
6111 @end deffn
6112
6113 @deffn {Directive} %verbose
6114 Write an extra output file containing verbose descriptions of the parser
6115 states and what is done for each type of lookahead token in that state.
6116 @xref{Understanding}, for more information.
6117 @end deffn
6118
6119 @deffn {Directive} %yacc
6120 Pretend the option @option{--yacc} was given
6121 (@pxref{option-yacc,,@option{--yacc}}), i.e., imitate Yacc, including its
6122 naming conventions.  Only makes sense with the @file{yacc.c}
6123 skeleton. @xref{Tuning the Parser}, for more.
6124
6125 Of course, being a Bison extension, @code{%yacc} is somewhat
6126 self-contradictory@dots{}
6127 @end deffn
6128
6129
6130 @node %define Summary
6131 @subsection %define Summary
6132
6133 There are many features of Bison's behavior that can be controlled by
6134 assigning the feature a single value.  For historical reasons, some such
6135 features are assigned values by dedicated directives, such as @code{%start},
6136 which assigns the start symbol.  However, newer such features are associated
6137 with variables, which are assigned by the @code{%define} directive:
6138
6139 @deffn {Directive} %define @var{variable}
6140 @deffnx {Directive} %define @var{variable} @var{value}
6141 @deffnx {Directive} %define @var{variable} @{@var{value}@}
6142 @deffnx {Directive} %define @var{variable} "@var{value}"
6143 Define @var{variable} to @var{value}.
6144
6145 The type of the values depend on the syntax.  Braces denote value in the
6146 target language (e.g., a namespace, a type, etc.).  Keyword values (no
6147 delimiters) denote finite choice (e.g., a variation of a feature).  String
6148 values denote remaining cases (e.g., a file name).
6149
6150 It is an error if a @var{variable} is defined by @code{%define} multiple
6151 times, but see @ref{Tuning the Parser,,@option{-D @var{name}[=@var{value}]}}.
6152 @end deffn
6153
6154 The rest of this section summarizes variables and values that @code{%define}
6155 accepts.
6156
6157 Some @var{variable}s take Boolean values.  In this case, Bison will complain
6158 if the variable definition does not meet one of the following four
6159 conditions:
6160
6161 @enumerate
6162 @item @code{@var{value}} is @code{true}
6163
6164 @item @code{@var{value}} is omitted (or @code{""} is specified).
6165 This is equivalent to @code{true}.
6166
6167 @item @code{@var{value}} is @code{false}.
6168
6169 @item @var{variable} is never defined.
6170 In this case, Bison selects a default value.
6171 @end enumerate
6172
6173 What @var{variable}s are accepted, as well as their meanings and default
6174 values, depend on the selected target language and/or the parser skeleton
6175 (@pxref{Decl Summary}, @pxref{Decl Summary}).
6176 Unaccepted @var{variable}s produce an error.  Some of the accepted
6177 @var{variable}s are described below.
6178
6179
6180 @c ================================================== api.filename.file
6181 @anchor{api-filename-type}
6182 @deffn {Directive} {%define api.filename.type} @{@var{type}@}
6183
6184 @itemize @bullet
6185 @item Language(s): C++
6186
6187 @item Purpose:
6188 Define the type of file names in Bison's default location and position
6189 types. @xref{Exposing the Location Classes}.
6190
6191 @item Accepted Values:
6192 Any type that is printable (via streams) and comparable (with @code{==} and
6193 @code{!=}).
6194
6195 @item Default Value: @code{const std::string}.
6196
6197 @item History:
6198 Introduced in Bison 2.0 as @code{filename_type} (with @code{std::string} as
6199 default), renamed as @code{api.filename.type} in Bison 3.7 (with @code{const
6200 std::string} as default).
6201 @end itemize
6202 @end deffn
6203
6204
6205 @c ================================================== api.header.include
6206 @deffn Directive {%define api.header.include} @{"header.h"@}
6207 @deffnx Directive {%define api.header.include} @{<header.h>@}
6208 @itemize
6209 @item Languages(s): C (@file{yacc.c})
6210
6211 @item Purpose: Specify how the generated parser should include the generated header.
6212
6213 Historically, when option @option{-d} or @option{--header} was used,
6214 @command{bison} generated a header and pasted an exact copy of it into the
6215 generated parser implementation file.  Since Bison 3.6, it is
6216 @code{#include}d as @samp{"@var{basename}.h"}, instead of duplicated, unless
6217 @var{file} is @samp{y.tab}, see below.
6218
6219 The @code{api.header.include} variable allows to control how the generated
6220 parser @code{#include}s the generated header.  For instance:
6221
6222 @example
6223 %define api.header.include @{"parse.h"@}
6224 @end example
6225
6226 @noindent
6227 or
6228
6229 @example
6230 %define api.header.include @{<parser/parse.h>@}
6231 @end example
6232
6233 Using @code{api.header.include} does not change the name of the generated
6234 header, only how it is included.
6235
6236 To work around limitations of Automake's @command{ylwrap} (which runs
6237 @command{bison} with @option{--yacc}), @code{api.header.include} is
6238 @emph{not} predefined when the output file is @file{y.tab.c}.  Define it to
6239 avoid the duplication.
6240
6241 @item Accepted Values:
6242 An argument for @code{#include}.
6243
6244 @item Default Value:
6245 @samp{"@var{header-basename}"}, unless the header file is @file{y.tab.h},
6246 where @var{header-basename} is the name of the generated header, without
6247 directory part.  For instance with @samp{bison -d calc/parse.y},
6248 @code{api.header.include} defaults to @samp{"parse.h"}, not
6249 @samp{"calc/parse.h"}.
6250
6251 @item History:
6252 Introduced in Bison 3.4.  Defaults to @samp{"@var{basename}.h"} since Bison
6253 3.7, unless the header file is @file{y.tab.h}.
6254 @end itemize
6255 @end deffn
6256 @c api.header.include
6257
6258
6259 @c ================================================== api.location.file
6260 @deffn {Directive} {%define api.location.file} "@var{file}"
6261 @deffnx {Directive} {%define api.location.file} @code{none}
6262
6263 @itemize @bullet
6264 @item Language(s): C++
6265
6266 @item Purpose:
6267 Define the name of the file in which Bison's default location and position
6268 types are generated. @xref{Exposing the Location Classes}.
6269
6270 @item Accepted Values:
6271 @table @asis
6272 @item @code{none}
6273 If locations are enabled, generate the definition of the @code{position} and
6274 @code{location} classes in the header file if @code{%header}, otherwise in
6275 the parser implementation.
6276
6277 @item "@var{file}"
6278 Generate the definition of the @code{position} and @code{location} classes
6279 in @var{file}.  This file name can be relative (to where the parser file is
6280 output) or absolute.
6281 @end table
6282
6283 @item Default Value:
6284 Not applicable if locations are not enabled, or if a user location type is
6285 specified (see @code{api.location.type}).  Otherwise, Bison's
6286 @code{location} is generated in @file{location.hh} (@pxref{C++ location}).
6287
6288 @item History:
6289 Introduced in Bison 3.2.
6290 @end itemize
6291 @end deffn
6292
6293
6294 @c ================================================== api.location.file
6295 @deffn {Directive} {%define api.location.include} @{"@var{file}"@}
6296 @deffnx {Directive} {%define api.location.include} @{<@var{file}>@}
6297
6298 @itemize @bullet
6299 @item Language(s): C++
6300
6301 @item Purpose:
6302 Specify how the generated file that defines the @code{position} and
6303 @code{location} classes is included.  This makes sense when the
6304 @code{location} class is exposed to the rest of your application/library in
6305 another directory.  @xref{Exposing the Location Classes}.
6306
6307 @item Accepted Values: Argument for @code{#include}.
6308
6309 @item Default Value:
6310 @samp{"@var{dir}/location.hh"} where @var{dir} is the directory part of the
6311 output.  For instance @file{src/parse} if
6312 @option{--output=src/parse/parser.cc} was given.
6313
6314 @item History:
6315 Introduced in Bison 3.2.
6316 @end itemize
6317
6318 @end deffn
6319
6320
6321 @c ================================================== api.location.type
6322 @deffn {Directive} {%define api.location.type} @{@var{type}@}
6323
6324 @itemize @bullet
6325 @item Language(s): C, C++, Java
6326
6327 @item Purpose: Define the location type.
6328 @xref{Location Type}, and @ref{User Defined Location Type}.
6329
6330 @item Accepted Values: String
6331
6332 @item Default Value: none
6333
6334 @item History:
6335 Introduced in Bison 2.7 for C++ and Java, in Bison 3.4 for C.  Was
6336 originally named @code{location_type} in Bison 2.5 and 2.6.
6337 @end itemize
6338 @end deffn
6339
6340
6341 @c ================================================== api.namespace
6342 @deffn Directive {%define api.namespace} @{@var{namespace}@}
6343 @itemize
6344 @item Languages(s): C++
6345
6346 @item Purpose: Specify the namespace for the parser class.
6347 For example, if you specify:
6348
6349 @example
6350 %define api.namespace @{foo::bar@}
6351 @end example
6352
6353 Bison uses @code{foo::bar} verbatim in references such as:
6354
6355 @example
6356 foo::bar::parser::value_type
6357 @end example
6358
6359 However, to open a namespace, Bison removes any leading @code{::} and then
6360 splits on any remaining occurrences:
6361
6362 @example
6363 namespace foo @{ namespace bar @{
6364   class position;
6365   class location;
6366 @} @}
6367 @end example
6368
6369 @item Accepted Values:
6370 Any absolute or relative C++ namespace reference without a trailing
6371 @code{"::"}.  For example, @code{"foo"} or @code{"::foo::bar"}.
6372
6373 @item Default Value:
6374 @code{yy}, unless you used the obsolete @samp{%name-prefix "@var{prefix}"}
6375 directive.
6376 @end itemize
6377 @end deffn
6378 @c api.namespace
6379
6380
6381 @c ================================================== api.parser.class
6382 @deffn Directive {%define api.parser.class} @{@var{name}@}
6383 @itemize @bullet
6384 @item Language(s):
6385 C++, Java, D
6386
6387 @item Purpose:
6388 The name of the parser class.
6389
6390 @item Accepted Values:
6391 Any valid identifier.
6392
6393 @item Default Value:
6394 In C++, @code{parser}.  In D and Java, @code{YYParser} or
6395 @code{@var{api.prefix}Parser} (@pxref{Java Bison Interface}).
6396
6397 @item History:
6398 Introduced in Bison 3.3 to replace @code{parser_class_name}.
6399 @end itemize
6400 @end deffn
6401 @c api.parser.class
6402
6403
6404 @c ================================================== api.prefix
6405 @deffn {Directive} {%define api.prefix} @{@var{prefix}@}
6406
6407 @itemize @bullet
6408 @item Language(s): C, C++, Java
6409
6410 @item Purpose: Rename exported symbols.
6411 @xref{Multiple Parsers}.
6412
6413 @item Accepted Values: String
6414
6415 @item Default Value: @code{YY} for Java, @code{yy} otherwise.
6416
6417 @item History:
6418 introduced in Bison 2.6, with its argument in double quotes.  Uses braces
6419 since Bison 3.0 (double quotes are still supported for backward
6420 compatibility).
6421 @end itemize
6422 @end deffn
6423
6424
6425 @c ================================================== api.pure
6426 @deffn Directive {%define api.pure} @var{purity}
6427
6428 @itemize @bullet
6429 @item Language(s): C
6430
6431 @item Purpose: Request a pure (reentrant) parser program.
6432 @xref{Pure Decl}.
6433
6434 @item Accepted Values: @code{true}, @code{false}, @code{full}
6435
6436 The value may be omitted: this is equivalent to specifying @code{true}, as is
6437 the case for Boolean values.
6438
6439 When @code{%define api.pure full} is used, the parser is made reentrant. This
6440 changes the signature for @code{yylex} (@pxref{Pure Calling}), and also that of
6441 @code{yyerror} when the tracking of locations has been activated, as shown
6442 below.
6443
6444 The @code{true} value is very similar to the @code{full} value, the only
6445 difference is in the signature of @code{yyerror} on Yacc parsers without
6446 @code{%parse-param}, for historical reasons.
6447
6448 I.e., if @samp{%locations %define api.pure} is passed then the prototypes for
6449 @code{yyerror} are:
6450
6451 @example
6452 void yyerror (char const *msg);                 // Yacc parsers.
6453 void yyerror (YYLTYPE *locp, char const *msg);  // GLR parsers.
6454 @end example
6455
6456 But if @samp{%locations %define api.pure %parse-param @{int *nastiness@}} is
6457 used, then both parsers have the same signature:
6458
6459 @example
6460 void yyerror (YYLTYPE *llocp, int *nastiness, char const *msg);
6461 @end example
6462
6463 (@pxref{Error Reporting Function})
6464
6465 @item Default Value: @code{false}
6466
6467 @item History:
6468 the @code{full} value was introduced in Bison 2.7
6469 @end itemize
6470 @end deffn
6471 @c api.pure
6472
6473
6474
6475 @c ================================================== api.push-pull
6476 @deffn Directive {%define api.push-pull} @var{kind}
6477
6478 @itemize @bullet
6479 @item Language(s): C (deterministic parsers only), D, Java
6480
6481 @item Purpose: Request a pull parser, a push parser, or both.
6482 @xref{Push Decl}.
6483
6484 @item Accepted Values: @code{pull}, @code{push}, @code{both}
6485
6486 @item Default Value: @code{pull}
6487 @end itemize
6488 @end deffn
6489 @c api.push-pull
6490
6491
6492
6493 @c ================================================== api.symbol.prefix
6494 @deffn Directive {%define api.symbol.prefix} @{@var{prefix}@}
6495
6496 @itemize
6497 @item Languages(s): all
6498
6499 @item Purpose:
6500 Add a prefix to the name of the symbol kinds.  For instance
6501
6502 @example
6503 %define api.symbol.prefix @{S_@}
6504 %token FILE for ERROR
6505 %%
6506 start: FILE for ERROR;
6507 @end example
6508
6509 @noindent
6510 generates this definition in C:
6511
6512 @example
6513 /* Symbol kind.  */
6514 enum yysymbol_kind_t
6515 @{
6516   S_YYEMPTY = -2,   /* No symbol.  */
6517   S_YYEOF = 0,      /* $end  */
6518   S_YYERROR = 1,    /* error  */
6519   S_YYUNDEF = 2,    /* $undefined  */
6520   S_FILE = 3,       /* FILE  */
6521   S_for = 4,        /* for  */
6522   S_ERROR = 5,      /* ERROR  */
6523   S_YYACCEPT = 6,   /* $accept  */
6524   S_start = 7       /* start  */
6525 @};
6526 @end example
6527
6528 @item Accepted Values:
6529 Any non empty string.  Must be a valid identifier in the target language
6530 (typically a non empty sequence of letters, underscores, and ---not at the
6531 beginning--- digits).
6532
6533 The empty prefix is (generally) invalid:
6534 @itemize
6535 @item
6536 in C it would create collision with the @code{YYERROR} macro, and
6537 potentially token kind definitions and symbol kind definitions would
6538 collide;
6539 @item
6540 unnamed symbols (such as @samp{'+'}) have a name which starts with a digit;
6541 @item
6542 even in languages with scoped enumerations such as Java, an empty prefix is
6543 dangerous: symbol names may collide with the target language keywords, or
6544 with other members of the @code{SymbolKind} class.
6545 @end itemize
6546
6547
6548 @item Default Value:
6549 @code{YYSYMBOL_} in C, @code{S_} in C++ and Java, empty in D.
6550 @item History:
6551 introduced in Bison 3.6.
6552 @end itemize
6553 @end deffn
6554 @c api.symbol.prefix
6555
6556
6557 @c ================================================== api.token.constructor
6558 @deffn Directive {%define api.token.constructor}
6559
6560 @itemize @bullet
6561 @item Language(s):
6562 C++, D
6563
6564 @item Purpose:
6565 Request that symbols be handled as a whole (type, value, and possibly
6566 location) in the scanner. In the case of C++, it works only when
6567 variant-based semantic values are enabled (@pxref{C++ Variants}), see
6568 @ref{Complete Symbols}, for details. In D, token constructors work with both
6569 @samp{%union} and @samp{%define api.value.type union}.
6570
6571 @item Accepted Values:
6572 Boolean.
6573
6574 @item Default Value:
6575 @code{false}
6576 @item History:
6577 introduced in Bison 3.0.
6578 @end itemize
6579 @end deffn
6580 @c api.token.constructor
6581
6582
6583 @c ================================================== api.token.prefix
6584 @anchor{api-token-prefix}
6585 @deffn Directive {%define api.token.prefix} @{@var{prefix}@}
6586 @itemize
6587 @item Languages(s): all
6588
6589 @item Purpose:
6590 Add a prefix to the token names when generating their definition in the
6591 target language.  For instance
6592
6593 @example
6594 %define api.token.prefix @{TOK_@}
6595 %token FILE for ERROR
6596 %%
6597 start: FILE for ERROR;
6598 @end example
6599
6600 @noindent
6601 generates the definition of the symbols @code{TOK_FILE}, @code{TOK_for}, and
6602 @code{TOK_ERROR} in the generated source files.  In particular, the scanner
6603 must use these prefixed token names, while the grammar itself may still use
6604 the short names (as in the sample rule given above).  The generated
6605 informational files (@file{*.output}, @file{*.xml}, @file{*.gv}) are not
6606 modified by this prefix.
6607
6608 Bison also prefixes the generated member names of the semantic value union.
6609 @xref{Type Generation}, for more
6610 details.
6611
6612 See @ref{Calc++ Parser} and @ref{Calc++ Scanner}, for a complete example.
6613
6614 @item Accepted Values:
6615 Any string.  Must be a valid identifier prefix in the target language
6616 (typically, a possibly empty sequence of letters, underscores, and ---not at
6617 the beginning--- digits).
6618
6619 @item Default Value:
6620 empty
6621 @item History:
6622 introduced in Bison 3.0.
6623 @end itemize
6624 @end deffn
6625 @c api.token.prefix
6626
6627
6628 @c ================================================== api.token.raw
6629 @deffn Directive {%define api.token.raw}
6630
6631 @itemize @bullet
6632 @item Language(s):
6633 all
6634
6635 @item Purpose:
6636 The output files normally define the enumeration of the @emph{token kinds}
6637 with Yacc-compatible token codes: sequential numbers starting at 257 except
6638 for single character tokens which stand for themselves (e.g., in ASCII,
6639 @samp{'a'} is numbered 65).  The parser however uses @emph{symbol kinds}
6640 which are assigned numbers sequentially starting at 0.  Therefore each time
6641 the scanner returns an (external) token kind, it must be mapped to the
6642 (internal) symbol kind.
6643
6644 When @code{api.token.raw} is set, the code of the token kinds are forced to
6645 coincide with the symbol kind.  This saves one table lookup per token to map
6646 them from the token kind to the symbol kind, and also saves the generation
6647 of the mapping table.  The gain is typically moderate, but in extreme cases
6648 (very simple user actions), a 10% improvement can be observed.
6649
6650 When @code{api.token.raw} is set, the grammar cannot use character literals
6651 (such as @samp{'a'}).
6652
6653 @item Accepted Values: Boolean.
6654
6655 @item Default Value:
6656 @code{true} in D, @code{false} otherwise
6657 @item History:
6658 introduced in Bison 3.5.  Was initially introduced in Bison 1.25 as
6659 @samp{%raw}, but never worked and was removed in Bison 1.29.
6660 @end itemize
6661 @end deffn
6662 @c api.token.raw
6663
6664
6665 @c ================================================== api.value.automove
6666 @deffn Directive {%define api.value.automove}
6667
6668 @itemize @bullet
6669 @item Language(s):
6670 C++
6671
6672 @item Purpose:
6673 Let occurrences of semantic values of the right-hand sides of a rule be
6674 implicitly turned in rvalues.  When enabled, a grammar such as:
6675
6676 @example
6677 exp:
6678   "number"     @{ $$ = make_number ($1); @}
6679 | exp "+" exp  @{ $$ = make_binary (add, $1, $3); @}
6680 | "(" exp ")"  @{ $$ = $2; @}
6681 @end example
6682
6683 @noindent
6684 is actually compiled as if you had written:
6685
6686 @example
6687 exp:
6688   "number"     @{ $$ = make_number (std::move ($1)); @}
6689 | exp "+" exp  @{ $$ = make_binary (add,
6690                                    std::move ($1),
6691                                    std::move ($3)); @}
6692 | "(" exp ")"  @{ $$ = std::move ($2); @}
6693 @end example
6694
6695 Using a value several times with automove enabled is typically an error.
6696 For instance, instead of:
6697
6698 @example
6699 exp: "twice" exp  @{ $$ = make_binary (add, $2, $2); @}
6700 @end example
6701
6702 @noindent
6703 write:
6704
6705 @example
6706 exp: "twice" exp @{ auto v = $2; $$ = make_binary (add, v, v); @}
6707 @end example
6708
6709 @noindent
6710 It is tempting to use @code{std::move} on one of the @code{v}, but the
6711 argument evaluation order in C++ is unspecified.
6712
6713 @item Accepted Values:
6714 Boolean.
6715
6716 @item Default Value:
6717 @code{false}
6718 @item History:
6719 introduced in Bison 3.2
6720 @end itemize
6721 @end deffn
6722 @c api.value.automove
6723
6724
6725 @c ================================================== api.value.type
6726 @deffn Directive {%define api.value.type} @var{support}
6727 @deffnx Directive {%define api.value.type} @{@var{type}@}
6728 @itemize @bullet
6729 @item Language(s):
6730 all
6731
6732 @item Purpose:
6733 The type for semantic values.
6734
6735 @item Accepted Values:
6736 @table @asis
6737 @item @samp{@{@}}
6738 This grammar has no semantic value at all.  This is not properly supported
6739 yet.
6740 @item @samp{union-directive} (C, C++, D)
6741 The type is defined thanks to the @code{%union} directive.  You don't have
6742 to define @code{api.value.type} in that case, using @code{%union} suffices.
6743 @xref{Union Decl}.
6744 For instance:
6745 @example
6746 %define api.value.type union-directive
6747 %union
6748 @{
6749   int ival;
6750   char *sval;
6751 @}
6752 %token <ival> INT "integer"
6753 %token <sval> STR "string"
6754 @end example
6755
6756 @item @samp{union} (C, C++)
6757 The symbols are defined with type names, from which Bison will generate a
6758 @code{union}.  For instance:
6759 @example
6760 %define api.value.type union
6761 %token <int> INT "integer"
6762 %token <char *> STR "string"
6763 @end example
6764 Most C++ objects cannot be stored in a @code{union}, use @samp{variant}
6765 instead.
6766
6767 @item @samp{variant} (C++)
6768 This is similar to @code{union}, but special storage techniques are used to
6769 allow any kind of C++ object to be used. For instance:
6770 @example
6771 %define api.value.type variant
6772 %token <int> INT "integer"
6773 %token <std::string> STR "string"
6774 @end example
6775 @xref{C++ Variants}.
6776
6777 @item @samp{@{@var{type}@}}
6778 Use this @var{type} as semantic value.
6779 @example
6780 %code requires
6781 @{
6782   struct my_value
6783   @{
6784     enum
6785     @{
6786       is_int, is_str
6787     @} kind;
6788     union
6789     @{
6790       int ival;
6791       char *sval;
6792     @} u;
6793   @};
6794 @}
6795 %define api.value.type @{struct my_value@}
6796 %token <u.ival> INT "integer"
6797 %token <u.sval> STR "string"
6798 @end example
6799 @end table
6800
6801 @item Default Value:
6802 @itemize @minus
6803 @item
6804 @code{union-directive} if @code{%union} is used, otherwise @dots{}
6805 @item
6806 @code{int} if type tags are used (i.e., @samp{%token <@var{type}>@dots{}} or
6807 @samp{%nterm <@var{type}>@dots{}} is used), otherwise @dots{}
6808 @item
6809 undefined.
6810 @end itemize
6811
6812 @item History:
6813 introduced in Bison 3.0.  Was introduced for Java only in 2.3b as
6814 @code{stype}.
6815 @end itemize
6816 @end deffn
6817 @c api.value.type
6818
6819
6820 @c ================================================== api.value.union.name
6821 @deffn Directive {%define api.value.union.name} @var{name}
6822 @itemize @bullet
6823 @item Language(s):
6824 C
6825
6826 @item Purpose:
6827 The tag of the generated @code{union} (@emph{not} the name of the
6828 @code{typedef}).  This variable is set to @code{@var{id}} when @samp{%union
6829 @var{id}} is used.  There is no clear reason to give this union a name.
6830
6831 @item Accepted Values:
6832 Any valid identifier.
6833
6834 @item Default Value:
6835 @code{YYSTYPE}.
6836
6837 @item History:
6838 Introduced in Bison 3.0.3.
6839 @end itemize
6840 @end deffn
6841 @c api.value.union.name
6842
6843
6844 @c ================================================== lr.default-reduction
6845
6846 @deffn Directive {%define lr.default-reduction} @var{when}
6847
6848 @itemize @bullet
6849 @item Language(s): all
6850
6851 @item Purpose: Specify the kind of states that are permitted to
6852 contain default reductions.  @xref{Default Reductions}.
6853
6854 @item Accepted Values: @code{most}, @code{consistent}, @code{accepting}
6855 @item Default Value:
6856 @itemize
6857 @item @code{accepting} if @code{lr.type} is @code{canonical-lr}.
6858 @item @code{most} otherwise.
6859 @end itemize
6860 @item History:
6861 introduced as @code{lr.default-reductions} in 2.5, renamed as
6862 @code{lr.default-reduction} in 3.0.
6863 @end itemize
6864 @end deffn
6865
6866
6867 @c ============================================ lr.keep-unreachable-state
6868
6869 @deffn Directive {%define lr.keep-unreachable-state}
6870
6871 @itemize @bullet
6872 @item Language(s): all
6873 @item Purpose: Request that Bison allow unreachable parser states to
6874 remain in the parser tables.  @xref{Unreachable States}.
6875 @item Accepted Values: Boolean
6876 @item Default Value: @code{false}
6877 @item History:
6878 introduced as @code{lr.keep_unreachable_states} in 2.3b, renamed as
6879 @code{lr.keep-unreachable-states} in 2.5, and as
6880 @code{lr.keep-unreachable-state} in 3.0.
6881 @end itemize
6882 @end deffn
6883 @c lr.keep-unreachable-state
6884
6885
6886 @c ================================================== lr.type
6887
6888 @deffn Directive {%define lr.type} @var{type}
6889
6890 @itemize @bullet
6891 @item Language(s): all
6892
6893 @item Purpose: Specify the type of parser tables within the
6894 LR(1) family.  @xref{LR Table Construction}.
6895
6896 @item Accepted Values: @code{lalr}, @code{ielr}, @code{canonical-lr}
6897
6898 @item Default Value: @code{lalr}
6899 @end itemize
6900 @end deffn
6901
6902
6903 @c ================================================== namespace
6904 @deffn Directive %define namespace @{@var{namespace}@}
6905 Obsoleted by @code{api.namespace}
6906 @end deffn
6907 @c namespace
6908
6909
6910 @c ================================================== parse.assert
6911 @deffn Directive {%define parse.assert}
6912
6913 @itemize
6914 @item Languages(s): C, C++
6915
6916 @item Purpose: Issue runtime assertions to catch invalid uses.
6917 In C, some important invariants in the implementation of the parser are
6918 checked when this option is enabled.
6919
6920 In C++, when variants are used (@pxref{C++ Variants}), symbols must be
6921 constructed and destroyed properly.  This option checks these constraints
6922 using runtime type information (RTTI).  Therefore the generated code cannot
6923 be compiled with RTTI disabled (via compiler options such as
6924 @option{-fno-rtti}).
6925
6926 @item Accepted Values: Boolean
6927
6928 @item Default Value: @code{false}
6929 @end itemize
6930 @end deffn
6931 @c parse.assert
6932
6933
6934 @c ================================================== parse.error
6935 @deffn Directive {%define parse.error} @var{verbosity}
6936 @itemize
6937 @item Languages(s):
6938 all
6939 @item Purpose:
6940 Control the generation of syntax error messages.  @xref{Error Reporting}.
6941 @item Accepted Values:
6942 @itemize
6943 @item @code{simple}
6944 Error messages passed to @code{yyerror} are simply @w{@code{"syntax
6945 error"}}.
6946
6947 @item @code{detailed}
6948 Error messages report the unexpected token, and possibly the expected ones.
6949 However, this report can often be incorrect when LAC is not enabled
6950 (@pxref{LAC}).  Token name internationalization is supported.
6951
6952 @item @code{verbose}
6953 Similar (but inferior) to @code{detailed}. The D parser does not support this value.
6954
6955 Error messages report the unexpected token, and possibly the expected ones.
6956 However, this report can often be incorrect when LAC is not enabled
6957 (@pxref{LAC}).
6958
6959 Does not support token internationalization.  Using non-ASCII characters in
6960 token aliases is not portable.
6961
6962 @item @code{custom}
6963 The user is in charge of generating the syntax error message by defining the
6964 @code{yyreport_syntax_error} function.  @xref{Syntax Error Reporting
6965 Function}.
6966 @end itemize
6967
6968 @item Default Value:
6969 @code{simple}
6970
6971 @item History:
6972 introduced in 3.0 with support for @code{simple} and @code{verbose}.  Values
6973 @code{custom} and @code{detailed} were introduced in 3.6.
6974 @end itemize
6975 @end deffn
6976 @c parse.error
6977
6978
6979 @c ================================================== parse.lac
6980 @deffn Directive {%define parse.lac} @var{when}
6981
6982 @itemize
6983 @item Languages(s): C/C++ (deterministic parsers only), D and Java.
6984
6985 @item Purpose: Enable LAC (lookahead correction) to improve
6986 syntax error handling.  @xref{LAC}.
6987 @item Accepted Values: @code{none}, @code{full}
6988 @item Default Value: @code{none}
6989 @end itemize
6990 @end deffn
6991 @c parse.lac
6992
6993
6994 @c ================================================== parse.trace
6995 @deffn Directive {%define parse.trace}
6996
6997 @itemize
6998 @item Languages(s): C, C++, D, Java
6999
7000 @item Purpose: Require parser instrumentation for tracing.
7001 @xref{Tracing}.
7002
7003 In C/C++, define the macro @code{YYDEBUG} (or @code{@var{prefix}DEBUG} with
7004 @samp{%define api.prefix @{@var{prefix}@}}), see @ref{Multiple Parsers}) to
7005 1 (if it is not already defined) so that the debugging facilities are
7006 compiled.
7007
7008 @item Accepted Values: Boolean
7009
7010 @item Default Value: @code{false}
7011 @end itemize
7012 @end deffn
7013 @c parse.trace
7014
7015
7016 @c ================================================== parser_class_name
7017 @deffn Directive %define parser_class_name @{@var{name}@}
7018 Obsoleted by @code{api.parser.class}
7019 @end deffn
7020 @c parser_class_name
7021
7022
7023
7024
7025
7026
7027 @node %code Summary
7028 @subsection %code Summary
7029 @findex %code
7030 @cindex Prologue
7031
7032 The @code{%code} directive inserts code verbatim into the output
7033 parser source at any of a predefined set of locations.  It thus serves
7034 as a flexible and user-friendly alternative to the traditional Yacc
7035 prologue, @code{%@{@var{code}%@}}.  This section summarizes the
7036 functionality of @code{%code} for the various target languages
7037 supported by Bison.  For a detailed discussion of how to use
7038 @code{%code} in place of @code{%@{@var{code}%@}} for C/C++ and why it
7039 is advantageous to do so, @pxref{Prologue Alternatives}.
7040
7041 @deffn {Directive} %code @{@var{code}@}
7042 This is the unqualified form of the @code{%code} directive.  It
7043 inserts @var{code} verbatim at a language-dependent default location
7044 in the parser implementation.
7045
7046 For C/C++, the default location is the parser implementation file
7047 after the usual contents of the parser header file.  Thus, the
7048 unqualified form replaces @code{%@{@var{code}%@}} for most purposes.
7049
7050 For D and Java, the default location is inside the parser class.
7051 @end deffn
7052
7053 @deffn {Directive} %code @var{qualifier} @{@var{code}@}
7054 This is the qualified form of the @code{%code} directive.
7055 @var{qualifier} identifies the purpose of @var{code} and thus the
7056 location(s) where Bison should insert it.  That is, if you need to
7057 specify location-sensitive @var{code} that does not belong at the
7058 default location selected by the unqualified @code{%code} form, use
7059 this form instead.
7060 @end deffn
7061
7062 For any particular qualifier or for the unqualified form, if there are
7063 multiple occurrences of the @code{%code} directive, Bison concatenates
7064 the specified code in the order in which it appears in the grammar
7065 file.
7066
7067 Not all qualifiers are accepted for all target languages.  Unaccepted
7068 qualifiers produce an error.  Some of the accepted qualifiers are:
7069
7070 @table @code
7071 @item requires
7072 @findex %code requires
7073
7074 @itemize @bullet
7075 @item Language(s): C, C++
7076
7077 @item Purpose:
7078 This is the best place to write dependency code required for the value and
7079 location types (@code{YYSTYPE} and @code{YYLTYPE} in C).  In other words,
7080 it's the best place to define types referenced in @code{%union} directives.
7081 In C, if you use @code{#define} to override Bison's default @code{YYSTYPE}
7082 and @code{YYLTYPE} definitions, then it is also the best place.  However you
7083 should rather @code{%define} @code{api.value.type} and
7084 @code{api.location.type}.
7085
7086 @item Location(s):
7087 The parser header file and the parser implementation file before the
7088 Bison-generated definitions of the value and location types (@code{YYSTYPE}
7089 and @code{YYLTYPE} in C).
7090 @end itemize
7091
7092 @item provides
7093 @findex %code provides
7094
7095 @itemize @bullet
7096 @item Language(s): C, C++
7097
7098 @item Purpose: This is the best place to write additional definitions and
7099 declarations that should be provided to other modules.
7100
7101 @item Location(s):
7102 The parser header file and the parser implementation file after the
7103 Bison-generated value and location types (@code{YYSTYPE} and @code{YYLTYPE}
7104 in C), and token definitions.
7105 @end itemize
7106
7107 @item top
7108 @findex %code top
7109
7110 @itemize @bullet
7111 @item Language(s): C, C++
7112
7113 @item Purpose: The unqualified @code{%code} or @code{%code requires}
7114 should usually be more appropriate than @code{%code top}.  However,
7115 occasionally it is necessary to insert code much nearer the top of the
7116 parser implementation file.  For example:
7117
7118 @example
7119 %code top @{
7120   #define _GNU_SOURCE
7121   #include <stdio.h>
7122 @}
7123 @end example
7124
7125 @item Location(s): Near the top of the parser implementation file.
7126 @end itemize
7127
7128 @item imports
7129 @findex %code imports
7130
7131 @itemize @bullet
7132 @item Language(s): D, Java
7133
7134 @item Purpose: This is the best place to write Java import directives. D syntax
7135 allows for import statements all throughout the code.
7136
7137 @item Location(s): The parser Java file after any Java package directive and
7138 before any class definitions. The parser D file before any class definitions.
7139 @end itemize
7140 @end table
7141
7142 Though we say the insertion locations are language-dependent, they are
7143 technically skeleton-dependent.  Writers of non-standard skeletons
7144 however should choose their locations consistently with the behavior
7145 of the standard Bison skeletons.
7146
7147
7148 @node Multiple Parsers
7149 @section Multiple Parsers in the Same Program
7150
7151 Most programs that use Bison parse only one language and therefore contain
7152 only one Bison parser.  But what if you want to parse more than one language
7153 with the same program?  Then you need to avoid name conflicts between
7154 different definitions of functions and variables such as @code{yyparse},
7155 @code{yylval}.  To use different parsers from the same compilation unit, you
7156 also need to avoid conflicts on types and macros (e.g., @code{YYSTYPE})
7157 exported in the generated header.
7158
7159 The easy way to do this is to define the @code{%define} variable
7160 @code{api.prefix}.  With different @code{api.prefix}s it is guaranteed that
7161 headers do not conflict when included together, and that compiled objects
7162 can be linked together too.  Specifying @samp{%define api.prefix
7163 @{@var{prefix}@}} (or passing the option @option{-Dapi.prefix=@{@var{prefix}@}}, see
7164 @ref{Invocation}) renames the interface functions and
7165 variables of the Bison parser to start with @var{prefix} instead of
7166 @samp{yy}, and all the macros to start by @var{PREFIX} (i.e., @var{prefix}
7167 upper-cased) instead of @samp{YY}.
7168
7169 The renamed symbols include @code{yyparse}, @code{yylex}, @code{yyerror},
7170 @code{yynerrs}, @code{yylval}, @code{yylloc}, @code{yychar} and
7171 @code{yydebug}.  If you use a push parser, @code{yypush_parse},
7172 @code{yypull_parse}, @code{yypstate}, @code{yypstate_new} and
7173 @code{yypstate_delete} will also be renamed.  The renamed macros include
7174 @code{YYSTYPE}, @code{YYLTYPE}, and @code{YYDEBUG}, which is treated
7175 specifically --- more about this below.
7176
7177 For example, if you use @samp{%define api.prefix @{c@}}, the names become
7178 @code{cparse}, @code{clex}, @dots{}, @code{CSTYPE}, @code{CLTYPE}, and so
7179 on.
7180
7181 Users of Flex must update the signature of the generated @code{yylex}
7182 function.  Since the Flex scanner usually includes the generated header of
7183 the parser (to get the definitions of the tokens, etc.), the most convenient
7184 way is to insert the declaration of @code{yylex} in the @code{provides}
7185 section:
7186
7187 @example
7188 %define api.prefix @{c@}
7189 // Emitted in the header file, after the definition of YYSTYPE.
7190 %code provides
7191 @{
7192   // Tell Flex the expected prototype of yylex.
7193   #define YY_DECL                             \
7194     int clex (CSTYPE *yylval, CLTYPE *yylloc)
7195
7196   // Declare the scanner.
7197   YY_DECL;
7198 @}
7199 @end example
7200
7201 @sp 1
7202
7203 The @code{%define} variable @code{api.prefix} works in two different ways.
7204 In the implementation file, it works by adding macro definitions to the
7205 beginning of the parser implementation file, defining @code{yyparse} as
7206 @code{@var{prefix}parse}, and so on:
7207
7208 @example
7209 #define YYSTYPE CTYPE
7210 #define yyparse cparse
7211 #define yylval  clval
7212 ...
7213 YYSTYPE yylval;
7214 int yyparse (void);
7215 @end example
7216
7217 This effectively substitutes one name for the other in the entire parser
7218 implementation file, thus the ``original'' names (@code{yylex},
7219 @code{YYSTYPE}, @dots{}) are also usable in the parser implementation file.
7220
7221 However, in the parser header file, the symbols are defined renamed, for
7222 instance:
7223
7224 @example
7225 extern CSTYPE clval;
7226 int cparse (void);
7227 @end example
7228
7229 The macro @code{YYDEBUG} is commonly used to enable the tracing support in
7230 parsers.  To comply with this tradition, when @code{api.prefix} is used,
7231 @code{YYDEBUG} (not renamed) is used as a default value:
7232
7233 @example
7234 /* Debug traces. */
7235 #ifndef CDEBUG
7236 # if defined YYDEBUG
7237 #  if YYDEBUG
7238 #   define CDEBUG 1
7239 #  else
7240 #   define CDEBUG 0
7241 #  endif
7242 # else
7243 #  define CDEBUG 0
7244 # endif
7245 #endif
7246 #if CDEBUG
7247 extern int cdebug;
7248 #endif
7249 @end example
7250
7251 @sp 2
7252
7253 Prior to Bison 2.6, a feature similar to @code{api.prefix} was provided by
7254 the obsolete directive @code{%name-prefix} (@pxref{Table of Symbols}) and
7255 the option @option{--name-prefix} (@pxref{Output Files}).
7256
7257 @node Interface
7258 @chapter Parser C-Language Interface
7259 @cindex C-language interface
7260 @cindex interface
7261
7262 The Bison parser is actually a C function named @code{yyparse}.  Here we
7263 describe the interface conventions of @code{yyparse} and the other
7264 functions that it needs to use.
7265
7266 Keep in mind that the parser uses many C identifiers starting with
7267 @samp{yy} and @samp{YY} for internal purposes.  If you use such an
7268 identifier (aside from those in this manual) in an action or in epilogue
7269 in the grammar file, you are likely to run into trouble.
7270
7271 @menu
7272 * Parser Function::         How to call @code{yyparse} and what it returns.
7273 * Push Parser Interface::   How to create, use, and destroy push parsers.
7274 * Lexical::                 You must supply a function @code{yylex}
7275                               which reads tokens.
7276 * Error Reporting::         Passing error messages to the user.
7277 * Action Features::         Special features for use in actions.
7278 * Internationalization::    How to let the parser speak in the user's
7279                               native language.
7280 @end menu
7281
7282 @node Parser Function
7283 @section The Parser Function @code{yyparse}
7284 @findex yyparse
7285
7286 You call the function @code{yyparse} to cause parsing to occur.  This
7287 function reads tokens, executes actions, and ultimately returns when it
7288 encounters end-of-input or an unrecoverable syntax error.  You can also
7289 write an action which directs @code{yyparse} to return immediately
7290 without reading further.
7291
7292
7293 @deftypefun int yyparse (@code{void})
7294 The value returned by @code{yyparse} is 0 if parsing was successful (return
7295 is due to end-of-input).
7296
7297 The value is 1 if parsing failed because of invalid input, i.e., input
7298 that contains a syntax error or that causes @code{YYABORT} to be
7299 invoked.
7300
7301 The value is 2 if parsing failed due to memory exhaustion.
7302 @end deftypefun
7303
7304 In an action, you can cause immediate return from @code{yyparse} by using
7305 these macros:
7306
7307 @defmac YYACCEPT
7308 @findex YYACCEPT
7309 Return immediately with value 0 (to report success).
7310 @end defmac
7311
7312 @defmac YYABORT
7313 @findex YYABORT
7314 Return immediately with value 1 (to report failure).
7315 @end defmac
7316
7317 @defmac YYNOMEM
7318 @findex YYNOMEM
7319 Return immediately with value 2 (to report memory exhaustion).
7320 @end defmac
7321
7322 If you use a reentrant parser, you can optionally pass additional
7323 parameter information to it in a reentrant way.  To do so, use the
7324 declaration @code{%parse-param}:
7325
7326 @deffn {Directive} %parse-param @{@var{argument-declaration}@} @dots{}
7327 @findex %parse-param
7328 Declare that one or more
7329 @var{argument-declaration} are additional @code{yyparse} arguments.
7330 The @var{argument-declaration} is used when declaring
7331 functions or prototypes.  The last identifier in
7332 @var{argument-declaration} must be the argument name.
7333 @end deffn
7334
7335 Here's an example.  Write this in the parser:
7336
7337 @example
7338 %parse-param @{int *nastiness@} @{int *randomness@}
7339 @end example
7340
7341 @noindent
7342 Then call the parser like this:
7343
7344 @example
7345 @{
7346   int nastiness, randomness;
7347   @dots{}  /* @r{Store proper data in @code{nastiness} and @code{randomness}.} */
7348   value = yyparse (&nastiness, &randomness);
7349   @dots{}
7350 @}
7351 @end example
7352
7353 @noindent
7354 In the grammar actions, use expressions like this to refer to the data:
7355
7356 @example
7357 exp: @dots{}    @{ @dots{}; *randomness += 1; @dots{} @}
7358 @end example
7359
7360 @noindent
7361 Using the following:
7362 @example
7363 %parse-param @{int *randomness@}
7364 @end example
7365
7366 Results in these signatures:
7367 @example
7368 void yyerror (int *randomness, const char *msg);
7369 int  yyparse (int *randomness);
7370 @end example
7371
7372 @noindent
7373 Or, if both @code{%define api.pure full} (or just @code{%define api.pure})
7374 and @code{%locations} are used:
7375
7376 @example
7377 void yyerror (YYLTYPE *llocp, int *randomness, const char *msg);
7378 int  yyparse (int *randomness);
7379 @end example
7380
7381 @node Push Parser Interface
7382 @section Push Parser Interface
7383
7384 @findex yypstate_new
7385 You call the function @code{yypstate_new} to create a new parser instance.
7386 This function is available if either the @samp{%define api.push-pull push}
7387 or @samp{%define api.push-pull both} declaration is used.  @xref{Push Decl}.
7388
7389 @anchor{yypstate_new}
7390 @deftypefun {yypstate*} yypstate_new (@code{void})
7391 Return a valid parser instance if there is memory available, 0 otherwise.
7392 In impure mode, it will also return 0 if a parser instance is currently
7393 allocated.
7394 @end deftypefun
7395
7396 @findex yypstate_delete
7397 You call the function @code{yypstate_delete} to delete a parser instance.
7398 function is available if either the @samp{%define api.push-pull push} or
7399 @samp{%define api.push-pull both} declaration is used.
7400 @xref{Push Decl}.
7401
7402 @anchor{yypstate_delete}
7403 @deftypefun void yypstate_delete (@code{yypstate *}@var{yyps})
7404 Reclaim the memory associated with a parser instance.  After this call, you
7405 should no longer attempt to use the parser instance.
7406 @end deftypefun
7407
7408 @findex yypush_parse
7409 You call the function @code{yypush_parse} to parse a single token.  This
7410 function is available if either the @samp{%define api.push-pull push} or
7411 @samp{%define api.push-pull both} declaration is used.  @xref{Push Decl}.
7412
7413 @anchor{yypush_parse}
7414 @deftypefun int yypush_parse (@code{yypstate *}@var{yyps})
7415 The value returned by @code{yypush_parse} is the same as for @code{yyparse}
7416 with the following exception: it returns @code{YYPUSH_MORE} if more input is
7417 required to finish parsing the grammar.
7418
7419 After @code{yypush_parse} returned, the instance may be consulted.  For
7420 instance check @code{yynerrs} to see whether there were (possibly recovered)
7421 syntax errors.
7422
7423 After @code{yypush_parse} returns a status other than @code{YYPUSH_MORE},
7424 the parser instance @code{yyps} may be reused for a new parse.
7425 @end deftypefun
7426
7427 The fact that the parser state is reusable even after an error simplifies
7428 reuse.  For example, a calculator application which parses each input line
7429 as an expression can just keep reusing the same @code{yyps} even if an input
7430 was invalid.
7431
7432 You call the function @code{yypull_parse} to parse the rest of the input
7433 stream.  This function is available if the @samp{%define api.push-pull both}
7434 declaration is used.  @xref{Push Decl}.
7435
7436 @anchor{yypull_parse}
7437 @deftypefun int yypull_parse (@code{yypstate *}@var{yyps})
7438 The value returned by @code{yypull_parse} is the same as for @code{yyparse}.
7439
7440 The parser instance @code{yyps} may be reused for new parses.
7441 @end deftypefun
7442
7443 @deftypefun int yypstate_expected_tokens (@code{const yypstate *}yyps, @code{yysymbol_kind_t} @var{argv}@code{[]}, @code{int} @var{argc})
7444 Fill @var{argv} with the expected tokens, which never includes
7445 @code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYerror}, or
7446 @code{YYSYMBOL_YYUNDEF}.
7447
7448 Never put more than @var{argc} elements into @var{argv}, and on success
7449 return the number of tokens stored in @var{argv}.  If there are more
7450 expected tokens than @var{argc}, fill @var{argv} up to @var{argc} and return
7451 0.  If there are no expected tokens, also return 0, but set @code{argv[0]}
7452 to @code{YYSYMBOL_YYEMPTY}.
7453
7454 When LAC is enabled, may return a negative number on errors,
7455 such as @code{YYENOMEM} on memory exhaustion.
7456
7457 If @var{argv} is null, return the size needed to store all the possible
7458 values, which is always less than @code{YYNTOKENS}.
7459 @end deftypefun
7460
7461
7462 @node Lexical
7463 @section The Lexical Analyzer Function @code{yylex}
7464 @findex yylex
7465 @cindex lexical analyzer
7466
7467 The @dfn{lexical analyzer} function, @code{yylex}, recognizes tokens from
7468 the input stream and returns them to the parser.  Bison does not create
7469 this function automatically; you must write it so that @code{yyparse} can
7470 call it.  The function is sometimes referred to as a lexical scanner.
7471
7472 In simple programs, @code{yylex} is often defined at the end of the Bison
7473 grammar file.  If @code{yylex} is defined in a separate source file, you
7474 need to arrange for the token-kind definitions to be available there.  To do
7475 this, use the @option{-d} option when you run Bison, so that it will write
7476 these definitions into the separate parser header file,
7477 @file{@var{name}.tab.h}, which you can include in the other source files
7478 that need it.  @xref{Invocation}.
7479
7480 @menu
7481 * Calling Convention::  How @code{yyparse} calls @code{yylex}.
7482 * Special Tokens::      Signaling end-of-file and errors to the parser.
7483 * Tokens from Literals:: Finding token kinds from string aliases.
7484 * Token Values::        How @code{yylex} must return the semantic value
7485                           of the token it has read.
7486 * Token Locations::     How @code{yylex} must return the text location
7487                           (line number, etc.) of the token, if the
7488                           actions want that.
7489 * Pure Calling::        How the calling convention differs in a pure parser
7490                           (@pxref{Pure Decl}).
7491 @end menu
7492
7493 @node Calling Convention
7494 @subsection Calling Convention for @code{yylex}
7495
7496 The value that @code{yylex} returns must be the positive numeric code for
7497 the kind of token it has just found; a zero or negative value signifies
7498 end-of-input.
7499
7500 When a token kind is referred to in the grammar rules by a name, that name
7501 in the parser implementation file becomes an enumerator of the enum
7502 @code{yytoken_kind_t} whose definition is the proper numeric code for that
7503 token kind.  So @code{yylex} should use the name to indicate that type.
7504 @xref{Symbols}.
7505
7506 When a token is referred to in the grammar rules by a character literal, the
7507 numeric code for that character is also the code for the token kind.  So
7508 @code{yylex} can simply return that character code, possibly converted to
7509 @code{unsigned char} to avoid sign-extension.  The null character must not
7510 be used this way, because its code is zero and that signifies end-of-input.
7511
7512 Here is an example showing these things:
7513
7514 @example
7515 int
7516 yylex (void)
7517 @{
7518   @dots{}
7519   if (c == EOF)    /* Detect end-of-input. */
7520     return YYEOF;
7521   @dots{}
7522   else if (c == '+' || c == '-')
7523     return c;      /* Assume token kind for '+' is '+'. */
7524   @dots{}
7525   else
7526     return INT;    /* Return the kind of the token. */
7527   @dots{}
7528 @}
7529 @end example
7530
7531 @noindent
7532 This interface has been designed so that the output from the @code{lex}
7533 utility can be used without change as the definition of @code{yylex}.
7534
7535
7536 @node Special Tokens
7537 @subsection Special Tokens
7538
7539 In addition to the user defined tokens, Bison generates a few special tokens
7540 that @code{yylex} may return.
7541
7542 The @code{YYEOF} token denotes the end of file, and signals to the parser
7543 that there is nothing left afterwards.  @xref{Calling Convention}, for an
7544 example.
7545
7546 Returning @code{YYUNDEF} tells the parser that some lexical error was found.
7547 It will emit an error message about an ``invalid token'', and enter
7548 error-recovery (@pxref{Error Recovery}).  Returning an unknown token kind
7549 results in the exact same behavior.
7550
7551 Returning @code{YYerror} requires the parser to enter error-recovery
7552 @emph{without} emitting an error message.  This way the lexical analyzer can
7553 produce an accurate error messages about the invalid input (something the
7554 parser cannot do), and yet benefit from the error-recovery features of the
7555 parser.
7556
7557 @example
7558 int
7559 yylex (void)
7560 @{
7561   @dots{}
7562   switch (c)
7563     @{
7564       @dots{}
7565       case '0': case '1': case '2': case '3': case '4':
7566       case '5': case '6': case '7': case '8': case '9':
7567         @dots{}
7568         return TOK_NUM;
7569       @dots{}
7570       case EOF:
7571         return YYEOF;
7572       default:
7573         yyerror ("syntax error: invalid character: %c", c);
7574         return YYerror;
7575     @}
7576 @}
7577 @end example
7578
7579 @node Tokens from Literals
7580 @subsection Finding Tokens by String Literals
7581
7582 If the grammar uses literal string tokens, there are two ways that
7583 @code{yylex} can determine the token kind codes for them:
7584
7585 @itemize @bullet
7586 @item
7587 If the grammar defines symbolic token names as aliases for the literal
7588 string tokens, @code{yylex} can use these symbolic names like all others.
7589 In this case, the use of the literal string tokens in the grammar file has
7590 no effect on @code{yylex}.
7591
7592 This is the preferred approach.
7593
7594 @item
7595 @code{yylex} can search for the multicharacter token in the @code{yytname}
7596 table.  This method is discouraged: the primary purpose of string aliases is
7597 forging good error messages, not describing the spelling of keywords.  In
7598 addition, looking for the token kind at runtime incurs a (small but
7599 noticeable) cost.
7600
7601 The @code{yytname} table is generated only if you use the
7602 @code{%token-table} declaration.  @xref{Decl Summary}.
7603 @end itemize
7604
7605
7606 @node Token Values
7607 @subsection Semantic Values of Tokens
7608
7609 @vindex yylval
7610 In an ordinary (nonreentrant) parser, the semantic value of the token must
7611 be stored into the global variable @code{yylval}.  When you are using just
7612 one data type for semantic values, @code{yylval} has that type.  Thus, if
7613 the type is @code{int} (the default), you might write this in @code{yylex}:
7614
7615 @example
7616 @group
7617   @dots{}
7618   yylval = value;  /* Put value onto Bison stack. */
7619   return INT;      /* Return the kind of the token. */
7620   @dots{}
7621 @end group
7622 @end example
7623
7624 When you are using multiple data types, @code{yylval}'s type is a union made
7625 from the @code{%union} declaration (@pxref{Union Decl}).  So when you store
7626 a token's value, you must use the proper member of the union.  If the
7627 @code{%union} declaration looks like this:
7628
7629 @example
7630 @group
7631 %union @{
7632   int intval;
7633   double val;
7634   symrec *tptr;
7635 @}
7636 @end group
7637 @end example
7638
7639 @noindent
7640 then the code in @code{yylex} might look like this:
7641
7642 @example
7643 @group
7644   @dots{}
7645   yylval.intval = value; /* Put value onto Bison stack. */
7646   return INT;            /* Return the kind of the token. */
7647   @dots{}
7648 @end group
7649 @end example
7650
7651 @node Token Locations
7652 @subsection Textual Locations of Tokens
7653
7654 @vindex yylloc
7655 If you are using the @samp{@@@var{n}}-feature (@pxref{Tracking Locations})
7656 in actions to keep track of the textual locations of tokens and groupings,
7657 then you must provide this information in @code{yylex}.  The function
7658 @code{yyparse} expects to find the textual location of a token just parsed
7659 in the global variable @code{yylloc}.  So @code{yylex} must store the proper
7660 data in that variable.
7661
7662 By default, the value of @code{yylloc} is a structure and you need only
7663 initialize the members that are going to be used by the actions.  The
7664 four members are called @code{first_line}, @code{first_column},
7665 @code{last_line} and @code{last_column}.  Note that the use of this
7666 feature makes the parser noticeably slower.
7667
7668 @tindex YYLTYPE
7669 The data type of @code{yylloc} has the name @code{YYLTYPE}.
7670
7671 @node Pure Calling
7672 @subsection Calling Conventions for Pure Parsers
7673
7674 When you use the Bison declaration @code{%define api.pure full} to request a
7675 pure, reentrant parser, the global communication variables @code{yylval} and
7676 @code{yylloc} cannot be used.  (@xref{Pure Decl}.)  In such parsers the two
7677 global variables are replaced by pointers passed as arguments to
7678 @code{yylex}.  You must declare them as shown here, and pass the information
7679 back by storing it through those pointers.
7680
7681 @example
7682 int
7683 yylex (YYSTYPE *lvalp, YYLTYPE *llocp)
7684 @{
7685   @dots{}
7686   *lvalp = value;  /* Put value onto Bison stack. */
7687   return INT;      /* Return the kind of the token. */
7688   @dots{}
7689 @}
7690 @end example
7691
7692 If the grammar file does not use the @samp{@@} constructs to refer to
7693 textual locations, then the type @code{YYLTYPE} will not be defined.  In
7694 this case, omit the second argument; @code{yylex} will be called with
7695 only one argument.
7696
7697 If you wish to pass additional arguments to @code{yylex}, use
7698 @code{%lex-param} just like @code{%parse-param} (@pxref{Parser
7699 Function}).  To pass additional arguments to both @code{yylex} and
7700 @code{yyparse}, use @code{%param}.
7701
7702 @deffn {Directive} %lex-param @{@var{argument-declaration}@} @dots{}
7703 @findex %lex-param
7704 Specify that @var{argument-declaration} are additional @code{yylex} argument
7705 declarations.  You may pass one or more such declarations, which is
7706 equivalent to repeating @code{%lex-param}.
7707 @end deffn
7708
7709 @deffn {Directive} %param @{@var{argument-declaration}@} @dots{}
7710 @findex %param
7711 Specify that @var{argument-declaration} are additional
7712 @code{yylex}/@code{yyparse} argument declaration.  This is equivalent to
7713 @samp{%lex-param @{@var{argument-declaration}@} @dots{} %parse-param
7714 @{@var{argument-declaration}@} @dots{}}.  You may pass one or more
7715 declarations, which is equivalent to repeating @code{%param}.
7716 @end deffn
7717
7718 @noindent
7719 For instance:
7720
7721 @example
7722 %lex-param   @{scanner_mode *mode@}
7723 %parse-param @{parser_mode *mode@}
7724 %param       @{environment_type *env@}
7725 @end example
7726
7727 @noindent
7728 results in the following signatures:
7729
7730 @example
7731 int yylex   (scanner_mode *mode, environment_type *env);
7732 int yyparse (parser_mode *mode, environment_type *env);
7733 @end example
7734
7735 If @samp{%define api.pure full} is added:
7736
7737 @example
7738 int yylex   (YYSTYPE *lvalp, scanner_mode *mode, environment_type *env);
7739 int yyparse (parser_mode *mode, environment_type *env);
7740 @end example
7741
7742 @noindent
7743 and finally, if both @samp{%define api.pure full} and @code{%locations} are
7744 used:
7745
7746 @example
7747 int yylex   (YYSTYPE *lvalp, YYLTYPE *llocp,
7748              scanner_mode *mode, environment_type *env);
7749 int yyparse (parser_mode *mode, environment_type *env);
7750 @end example
7751
7752
7753 @node Error Reporting
7754 @section Error Reporting
7755
7756 During its execution the parser may have error messages to pass to the user,
7757 such as syntax error, or memory exhaustion.  How this message is delivered
7758 to the user must be specified by the developer.
7759
7760 @menu
7761 * Error Reporting Function::         You must supply a @code{yyerror} function.
7762 * Syntax Error Reporting Function::  You can supply a @code{yyreport_syntax_error} function.
7763 @end menu
7764
7765 @node Error Reporting Function
7766 @subsection The Error Reporting Function @code{yyerror}
7767 @cindex error reporting function
7768 @findex yyerror
7769 @cindex parse error
7770 @cindex syntax error
7771
7772 The Bison parser detects a @dfn{syntax error} (or @dfn{parse error})
7773 whenever it reads a token which cannot satisfy any syntax rule.  An
7774 action in the grammar can also explicitly proclaim an error, using the
7775 macro @code{YYERROR} (@pxref{Action Features}).
7776
7777 The Bison parser expects to report the error by calling an error
7778 reporting function named @code{yyerror}, which you must supply.  It is
7779 called by @code{yyparse} whenever a syntax error is found, and it
7780 receives one argument.  For a syntax error, the string is normally
7781 @w{@code{"syntax error"}}.
7782
7783 @findex %define parse.error detailed
7784 @findex %define parse.error verbose
7785 If you invoke @samp{%define parse.error detailed} (or @samp{custom}) in the
7786 Bison declarations section (@pxref{Bison Declarations}), then Bison provides
7787 a more verbose and specific error message string instead of just plain
7788 @w{@code{"syntax error"}}.  However, that message sometimes contains
7789 incorrect information if LAC is not enabled (@pxref{LAC}).
7790
7791 The parser can detect one other kind of error: memory exhaustion.  This
7792 can happen when the input contains constructions that are very deeply
7793 nested.  It isn't likely you will encounter this, since the Bison
7794 parser normally extends its stack automatically up to a very large limit.  But
7795 if memory is exhausted, @code{yyparse} calls @code{yyerror} in the usual
7796 fashion, except that the argument string is @w{@code{"memory exhausted"}}.
7797
7798 In some cases diagnostics like @w{@code{"syntax error"}} are
7799 translated automatically from English to some other language before
7800 they are passed to @code{yyerror}.  @xref{Internationalization}.
7801
7802 The following definition suffices in simple programs:
7803
7804 @example
7805 @group
7806 void
7807 yyerror (char const *s)
7808 @{
7809 @end group
7810 @group
7811   fprintf (stderr, "%s\n", s);
7812 @}
7813 @end group
7814 @end example
7815
7816 After @code{yyerror} returns to @code{yyparse}, the latter will attempt
7817 error recovery if you have written suitable error recovery grammar rules
7818 (@pxref{Error Recovery}).  If recovery is impossible, @code{yyparse} will
7819 immediately return 1.
7820
7821 Obviously, in location tracking pure parsers, @code{yyerror} should have
7822 an access to the current location. With @code{%define api.pure}, this is
7823 indeed the case for the GLR parsers, but not for the Yacc parser, for
7824 historical reasons, and this is the why @code{%define api.pure full} should be
7825 preferred over @code{%define api.pure}.
7826
7827 When @code{%locations %define api.pure full} is used, @code{yyerror} has the
7828 following signature:
7829
7830 @example
7831 void yyerror (YYLTYPE *locp, char const *msg);
7832 @end example
7833
7834 @noindent
7835 The prototypes are only indications of how the code produced by Bison
7836 uses @code{yyerror}.  Bison-generated code always ignores the returned
7837 value, so @code{yyerror} can return any type, including @code{void}.
7838 Also, @code{yyerror} can be a variadic function; that is why the
7839 message is always passed last.
7840
7841 Traditionally @code{yyerror} returns an @code{int} that is always
7842 ignored, but this is purely for historical reasons, and @code{void} is
7843 preferable since it more accurately describes the return type for
7844 @code{yyerror}.
7845
7846 @vindex yynerrs
7847 The variable @code{yynerrs} contains the number of syntax errors
7848 reported so far.  Normally this variable is global; but if you
7849 request a pure parser (@pxref{Pure Decl})
7850 then it is a local variable which only the actions can access.
7851
7852
7853 @node Syntax Error Reporting Function
7854 @subsection The Syntax Error Reporting Function @code{yyreport_syntax_error}
7855
7856 @findex %define parse.error custom
7857 If you invoke @samp{%define parse.error custom} (@pxref{Bison
7858 Declarations}), then the parser no longer passes syntax error messages to
7859 @code{yyerror}, rather it delegates that task to the user by calling the
7860 @code{yyreport_syntax_error} function.
7861
7862 The following functions and types are ``@code{static}'': they are defined in
7863 the implementation file (@file{*.c}) and available only from there.  They
7864 are meant to be used from the grammar's epilogue.
7865
7866 @deftypefun {static int} yyreport_syntax_error (@code{const yypcontext_t *}@var{ctx})
7867 Report a syntax error to the user.  Return 0 on success, @code{YYENOMEM} on
7868 memory exhaustion.  Whether it uses @code{yyerror} is up to the user.
7869 @end deftypefun
7870
7871 Use the following types and functions to build the error message.
7872
7873 @deffn {Type} yypcontext_t
7874 An opaque type that captures the circumstances of the syntax error.
7875 @end deffn
7876
7877 @deffn {Type} yysymbol_kind_t
7878 An enum of all the grammar symbols, tokens and nonterminals.  Its
7879 enumerators are forged from the symbol names:
7880
7881 @example
7882 enum yysymbol_kind_t
7883 @{
7884   YYSYMBOL_YYEMPTY = -2,      /* No symbol.  */
7885   YYSYMBOL_YYEOF = 0,         /* "end of file"  */
7886   YYSYMBOL_YYerror = 1,       /* error  */
7887   YYSYMBOL_YYUNDEF = 2,       /* "invalid token"  */
7888   YYSYMBOL_PLUS = 3,          /* "+"  */
7889   YYSYMBOL_MINUS = 4,         /* "-"  */
7890   [...]
7891   YYSYMBOL_VAR = 14,          /* "variable"  */
7892   YYSYMBOL_NEG = 15,          /* NEG  */
7893   YYSYMBOL_YYACCEPT = 16,     /* $accept  */
7894   YYSYMBOL_exp = 17,          /* exp  */
7895   YYSYMBOL_input = 18         /* input  */
7896 @};
7897 typedef enum yysymbol_kind_t yysymbol_kind_t;
7898 @end example
7899 @end deffn
7900
7901 @deftypefun {static yysymbol_kind_t} yypcontext_token (@code{const yypcontext_t *}@var{ctx})
7902 The ``unexpected'' token: the symbol kind of the lookahead token that caused
7903 the syntax error.  Returns @code{YYSYMBOL_YYEMPTY} if there is no lookahead.
7904 @end deftypefun
7905
7906 @deftypefun {static YYLTYPE *} yypcontext_location (@code{const yypcontext_t *}@var{ctx})
7907 The location of the syntax error (that of the unexpected token).
7908 @end deftypefun
7909
7910 @deftypefun {static int} yypcontext_expected_tokens (@code{const yypcontext_t *}ctx, @code{yysymbol_kind_t} @var{argv}@code{[]}, @code{int} @var{argc})
7911 Fill @var{argv} with the expected tokens, which never includes
7912 @code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYerror}, or
7913 @code{YYSYMBOL_YYUNDEF}.
7914
7915 Never put more than @var{argc} elements into @var{argv}, and on success
7916 return the number of tokens stored in @var{argv}.  If there are more
7917 expected tokens than @var{argc}, fill @var{argv} up to @var{argc} and return
7918 0.  If there are no expected tokens, also return 0, but set @code{argv[0]}
7919 to @code{YYSYMBOL_YYEMPTY}.
7920
7921 When LAC is enabled, may return a negative number on errors,
7922 such as @code{YYENOMEM} on memory exhaustion.
7923
7924 If @var{argv} is null, return the size needed to store all the possible
7925 values, which is always less than @code{YYNTOKENS}.
7926 @end deftypefun
7927
7928 @deftypefun {static const char *} yysymbol_name (@code{symbol_kind_t} @var{symbol})
7929 The name of the symbol whose kind is @var{symbol}, possibly translated.
7930 @end deftypefun
7931
7932 A custom syntax error function looks as follows.  This implementation is
7933 inappropriate for internationalization, see the @file{c/bistromathic}
7934 example for a better alternative.
7935
7936 @example
7937 static int
7938 yyreport_syntax_error (const yypcontext_t *ctx)
7939 @{
7940   int res = 0;
7941   YYLOCATION_PRINT (stderr, *yypcontext_location (ctx));
7942   fprintf (stderr, ": syntax error");
7943   // Report the tokens expected at this point.
7944   @{
7945     enum @{ TOKENMAX = 5 @};
7946     yysymbol_kind_t expected[TOKENMAX];
7947     int n = yypcontext_expected_tokens (ctx, expected, TOKENMAX);
7948     if (n < 0)
7949       // Forward errors to yyparse.
7950       res = n;
7951     else
7952       for (int i = 0; i < n; ++i)
7953         fprintf (stderr, "%s %s",
7954                  i == 0 ? ": expected" : " or", yysymbol_name (expected[i]));
7955   @}
7956   // Report the unexpected token.
7957   @{
7958     yysymbol_kind_t lookahead = yypcontext_token (ctx);
7959     if (lookahead != YYSYMBOL_YYEMPTY)
7960       fprintf (stderr, " before %s", yysymbol_name (lookahead));
7961   @}
7962   fprintf (stderr, "\n");
7963   return res;
7964 @}
7965 @end example
7966
7967 You still must provide a @code{yyerror} function, used for instance to
7968 report memory exhaustion.
7969
7970 @node Action Features
7971 @section Special Features for Use in Actions
7972 @cindex summary, action features
7973 @cindex action features summary
7974
7975 Here is a table of Bison constructs, variables and macros that are useful in
7976 actions.
7977
7978 @deffn {Variable} $$
7979 Acts like a variable that contains the semantic value for the
7980 grouping made by the current rule.  @xref{Actions}.
7981 @end deffn
7982
7983 @deffn {Variable} $@var{n}
7984 Acts like a variable that contains the semantic value for the
7985 @var{n}th component of the current rule.  @xref{Actions}.
7986 @end deffn
7987
7988 @deffn {Variable} $<@var{typealt}>$
7989 Like @code{$$} but specifies alternative @var{typealt} in the union
7990 specified by the @code{%union} declaration.  @xref{Action Types}.
7991 @end deffn
7992
7993 @deffn {Variable} $<@var{typealt}>@var{n}
7994 Like @code{$@var{n}} but specifies alternative @var{typealt} in the
7995 union specified by the @code{%union} declaration.
7996 @xref{Action Types}.
7997 @end deffn
7998
7999 @deffn {Macro} YYABORT @code{;}
8000 Return immediately from @code{yyparse}, indicating failure.
8001 @xref{Parser Function}.
8002 @end deffn
8003
8004 @deffn {Macro} YYACCEPT @code{;}
8005 Return immediately from @code{yyparse}, indicating success.
8006 @xref{Parser Function}.
8007 @end deffn
8008
8009 @deffn {Macro} YYBACKUP (@var{token}, @var{value})@code{;}
8010 @findex YYBACKUP
8011 Unshift a token.  This macro is allowed only for rules that reduce
8012 a single value, and only when there is no lookahead token.
8013 It is also disallowed in GLR parsers.
8014 It installs a lookahead token with token kind @var{token} and
8015 semantic value @var{value}; then it discards the value that was
8016 going to be reduced by this rule.
8017
8018 If the macro is used when it is not valid, such as when there is
8019 a lookahead token already, then it reports a syntax error with
8020 a message @samp{cannot back up} and performs ordinary error
8021 recovery.
8022
8023 In either case, the rest of the action is not executed.
8024 @end deffn
8025
8026 @deffn {Value} YYEMPTY
8027 Value stored in @code{yychar} when there is no lookahead token.
8028 @end deffn
8029
8030 @deffn {Value} YYEOF
8031 Value stored in @code{yychar} when the lookahead is the end of the input
8032 stream.
8033 @end deffn
8034
8035 @deffn {Macro} YYERROR @code{;}
8036 Cause an immediate syntax error.  This statement initiates error
8037 recovery just as if the parser itself had detected an error; however, it
8038 does not call @code{yyerror}, and does not print any message.  If you
8039 want to print an error message, call @code{yyerror} explicitly before
8040 the @samp{YYERROR;} statement.  @xref{Error Recovery}.
8041 @end deffn
8042
8043 @deffn {Macro} YYNOMEM @code{;}
8044 Return immediately from @code{yyparse}, indicating memory exhaustion.
8045 @xref{Parser Function}.
8046 @end deffn
8047
8048 @deffn {Macro} YYRECOVERING
8049 @findex YYRECOVERING
8050 The expression @code{YYRECOVERING ()} yields 1 when the parser
8051 is recovering from a syntax error, and 0 otherwise.
8052 @xref{Error Recovery}.
8053 @end deffn
8054
8055 @deffn {Variable} yychar
8056 Variable containing either the lookahead token, or @code{YYEOF} when the
8057 lookahead is the end of the input stream, or @code{YYEMPTY} when no lookahead
8058 has been performed so the next token is not yet known.
8059 Do not modify @code{yychar} in a deferred semantic action (@pxref{GLR Semantic
8060 Actions}).
8061 @xref{Lookahead}.
8062 @end deffn
8063
8064 @deffn {Macro} yyclearin @code{;}
8065 Discard the current lookahead token.  This is useful primarily in
8066 error rules.
8067 Do not invoke @code{yyclearin} in a deferred semantic action (@pxref{GLR
8068 Semantic Actions}).
8069 @xref{Error Recovery}.
8070 @end deffn
8071
8072 @deffn {Macro} yyerrok @code{;}
8073 Resume generating error messages immediately for subsequent syntax
8074 errors.  This is useful primarily in error rules.
8075 @xref{Error Recovery}.
8076 @end deffn
8077
8078 @deffn {Variable} yylloc
8079 Variable containing the lookahead token location when @code{yychar} is not set
8080 to @code{YYEMPTY} or @code{YYEOF}.
8081 Do not modify @code{yylloc} in a deferred semantic action (@pxref{GLR Semantic
8082 Actions}).
8083 @xref{Actions and Locations}.
8084 @end deffn
8085
8086 @deffn {Variable} yylval
8087 Variable containing the lookahead token semantic value when @code{yychar} is
8088 not set to @code{YYEMPTY} or @code{YYEOF}.
8089 Do not modify @code{yylval} in a deferred semantic action (@pxref{GLR Semantic
8090 Actions}).
8091 @xref{Actions}.
8092 @end deffn
8093
8094 @deffn {Value} @@$
8095 Acts like a structure variable containing information on the textual
8096 location of the grouping made by the current rule.  @xref{Tracking
8097 Locations}.
8098
8099 @c Check if those paragraphs are still useful or not.
8100
8101 @c @example
8102 @c struct @{
8103 @c   int first_line, last_line;
8104 @c   int first_column, last_column;
8105 @c @};
8106 @c @end example
8107
8108 @c Thus, to get the starting line number of the third component, you would
8109 @c use @samp{@@3.first_line}.
8110
8111 @c In order for the members of this structure to contain valid information,
8112 @c you must make @code{yylex} supply this information about each token.
8113 @c If you need only certain members, then @code{yylex} need only fill in
8114 @c those members.
8115
8116 @c The use of this feature makes the parser noticeably slower.
8117 @end deffn
8118
8119 @deffn {Value} @@@var{n}
8120 @findex @@@var{n}
8121 Acts like a structure variable containing information on the textual
8122 location of the @var{n}th component of the current rule.  @xref{Tracking
8123 Locations}.
8124 @end deffn
8125
8126 @node Internationalization
8127 @section Parser Internationalization
8128 @cindex internationalization
8129 @cindex i18n
8130 @cindex NLS
8131 @cindex gettext
8132 @cindex bison-po
8133
8134 A Bison-generated parser can print diagnostics, including error and
8135 tracing messages.  By default, they appear in English.  However, Bison
8136 also supports outputting diagnostics in the user's native language.  To
8137 make this work, the user should set the usual environment variables.
8138 @xref{Users, , The User's View, gettext, GNU @code{gettext} utilities}.
8139 For example, the shell command @samp{export LC_ALL=fr_CA.UTF-8} might
8140 set the user's locale to French Canadian using the UTF-8
8141 encoding.  The exact set of available locales depends on the user's
8142 installation.
8143
8144 @menu
8145 * Enabling I18n::    Preparing your project to support internationalization.
8146 * Token I18n::       Preparing tokens for internationalization in error messages.
8147 @end menu
8148
8149 @node Enabling I18n
8150 @subsection Enabling Internationalization
8151
8152 The maintainer of a package that uses a Bison-generated parser enables
8153 the internationalization of the parser's output through the following
8154 steps.  Here we assume a package that uses GNU Autoconf and
8155 GNU Automake.
8156
8157 @enumerate
8158 @item
8159 @cindex bison-i18n.m4
8160 Into the directory containing the GNU Autoconf macros used
8161 by the package ---often called @file{m4}--- copy the
8162 @file{bison-i18n.m4} file installed by Bison under
8163 @samp{share/aclocal/bison-i18n.m4} in Bison's installation directory.
8164 For example:
8165
8166 @example
8167 cp /usr/local/share/aclocal/bison-i18n.m4 m4/bison-i18n.m4
8168 @end example
8169
8170 @item
8171 @findex BISON_I18N
8172 @vindex BISON_LOCALEDIR
8173 @vindex YYENABLE_NLS
8174 In the top-level @file{configure.ac}, after the @code{AM_GNU_GETTEXT}
8175 invocation, add an invocation of @code{BISON_I18N}.  This macro is
8176 defined in the file @file{bison-i18n.m4} that you copied earlier.  It
8177 causes @code{configure} to find the value of the
8178 @code{BISON_LOCALEDIR} variable, and it defines the source-language
8179 symbol @code{YYENABLE_NLS} to enable translations in the
8180 Bison-generated parser.
8181
8182 @item
8183 In the @code{main} function of your program, designate the directory
8184 containing Bison's runtime message catalog, through a call to
8185 @samp{bindtextdomain} with domain name @samp{bison-runtime}.
8186 For example:
8187
8188 @example
8189 bindtextdomain ("bison-runtime", BISON_LOCALEDIR);
8190 @end example
8191
8192 Typically this appears after any other call @code{bindtextdomain
8193 (PACKAGE, LOCALEDIR)} that your package already has.  Here we rely on
8194 @samp{BISON_LOCALEDIR} to be defined as a string through the
8195 @file{Makefile}.
8196
8197 @item
8198 In the @file{Makefile.am} that controls the compilation of the @code{main}
8199 function, make @samp{BISON_LOCALEDIR} available as a C preprocessor macro,
8200 either in @samp{DEFS} or in @samp{AM_CPPFLAGS}.  For example:
8201
8202 @example
8203 DEFS = @@DEFS@@ -DBISON_LOCALEDIR='"$(BISON_LOCALEDIR)"'
8204 @end example
8205
8206 or:
8207
8208 @example
8209 AM_CPPFLAGS = -DBISON_LOCALEDIR='"$(BISON_LOCALEDIR)"'
8210 @end example
8211
8212 @item
8213 Finally, invoke the command @command{autoreconf} to generate the build
8214 infrastructure.
8215 @end enumerate
8216
8217 @node Token I18n
8218 @subsection Token Internationalization
8219
8220 When the @code{%define} variable @code{parse.error} is set to @code{custom}
8221 or @code{detailed}, token aliases can be internationalized:
8222
8223 @example
8224 %token
8225     '\n'   _("end of line")
8226   <double>
8227     NUM    _("number")
8228   <symrec*>
8229     FUN    _("function")
8230     VAR    _("variable")
8231 @end example
8232
8233 The remainder of the grammar may freely use either the token symbol
8234 (@code{FUN}) or its alias (@code{"function"}), but not with the
8235 internationalization marker (@code{_("function")}).
8236
8237 If at least one token alias is internationalized, then the generated parser
8238 will use both @code{N_} and @code{_}, that must be defined
8239 (@pxref{Programmers, , The Programmer’s View, gettext, GNU @code{gettext}
8240 utilities}).  They are used only on string aliases marked for translation.
8241 In other words, even if your catalog features a translation for
8242 ``function'', then with
8243
8244 @example
8245 %token
8246   <symrec*>
8247     FUN      "function"
8248     VAR    _("variable")
8249 @end example
8250
8251 @noindent
8252 ``function'' will appear untranslated in debug traces and error messages.
8253
8254 Unless defined by the user, the end-of-file token, @code{YYEOF}, is provided
8255 ``end of file'' as an alias.  It is also internationalized if the user
8256 internationalized tokens.  To map it to another string, use:
8257
8258 @example
8259 %token END 0 _("end of input")
8260 @end example
8261
8262
8263 @node Algorithm
8264 @chapter The Bison Parser Algorithm
8265 @cindex Bison parser algorithm
8266 @cindex algorithm of parser
8267 @cindex shifting
8268 @cindex reduction
8269 @cindex parser stack
8270 @cindex stack, parser
8271
8272 As Bison reads tokens, it pushes them onto a stack along with their
8273 semantic values.  The stack is called the @dfn{parser stack}.  Pushing a
8274 token is traditionally called @dfn{shifting}.
8275
8276 For example, suppose the infix calculator has read @samp{1 + 5 *}, with a
8277 @samp{3} to come.  The stack will have four elements, one for each token
8278 that was shifted.
8279
8280 But the stack does not always have an element for each token read.  When
8281 the last @var{n} tokens and groupings shifted match the components of a
8282 grammar rule, they can be combined according to that rule.  This is called
8283 @dfn{reduction}.  Those tokens and groupings are replaced on the stack by a
8284 single grouping whose symbol is the result (left hand side) of that rule.
8285 Running the rule's action is part of the process of reduction, because this
8286 is what computes the semantic value of the resulting grouping.
8287
8288 For example, if the infix calculator's parser stack contains this:
8289
8290 @example
8291 1 + 5 * 3
8292 @end example
8293
8294 @noindent
8295 and the next input token is a newline character, then the last three
8296 elements can be reduced to 15 via the rule:
8297
8298 @example
8299 expr: expr '*' expr;
8300 @end example
8301
8302 @noindent
8303 Then the stack contains just these three elements:
8304
8305 @example
8306 1 + 15
8307 @end example
8308
8309 @noindent
8310 At this point, another reduction can be made, resulting in the single value
8311 16.  Then the newline token can be shifted.
8312
8313 The parser tries, by shifts and reductions, to reduce the entire input down
8314 to a single grouping whose symbol is the grammar's start-symbol
8315 (@pxref{Language and Grammar}).
8316
8317 This kind of parser is known in the literature as a bottom-up parser.
8318
8319 @menu
8320 * Lookahead::         Parser looks one token ahead when deciding what to do.
8321 * Shift/Reduce::      Conflicts: when either shifting or reduction is valid.
8322 * Precedence::        Operator precedence works by resolving conflicts.
8323 * Contextual Precedence::  When an operator's precedence depends on context.
8324 * Parser States::     The parser is a finite-state-machine with stack.
8325 * Reduce/Reduce::     When two rules are applicable in the same situation.
8326 * Mysterious Conflicts:: Conflicts that look unjustified.
8327 * Tuning LR::         How to tune fundamental aspects of LR-based parsing.
8328 * Generalized LR Parsing::  Parsing arbitrary context-free grammars.
8329 * Memory Management:: What happens when memory is exhausted.  How to avoid it.
8330 @end menu
8331
8332 @node Lookahead
8333 @section Lookahead Tokens
8334 @cindex lookahead token
8335
8336 The Bison parser does @emph{not} always reduce immediately as soon as the
8337 last @var{n} tokens and groupings match a rule.  This is because such a
8338 simple strategy is inadequate to handle most languages.  Instead, when a
8339 reduction is possible, the parser sometimes ``looks ahead'' at the next
8340 token in order to decide what to do.
8341
8342 When a token is read, it is not immediately shifted; first it becomes the
8343 @dfn{lookahead token}, which is not on the stack.  Now the parser can
8344 perform one or more reductions of tokens and groupings on the stack, while
8345 the lookahead token remains off to the side.  When no more reductions
8346 should take place, the lookahead token is shifted onto the stack.  This
8347 does not mean that all possible reductions have been done; depending on the
8348 token kind of the lookahead token, some rules may choose to delay their
8349 application.
8350
8351 Here is a simple case where lookahead is needed.  These three rules define
8352 expressions which contain binary addition operators and postfix unary
8353 factorial operators (@samp{!}), and allow parentheses for grouping.
8354
8355 @example
8356 @group
8357 expr:
8358   term '+' expr
8359 | term
8360 ;
8361 @end group
8362
8363 @group
8364 term:
8365   '(' expr ')'
8366 | term '!'
8367 | "number"
8368 ;
8369 @end group
8370 @end example
8371
8372 Suppose that the tokens @w{@samp{1 + 2}} have been read and shifted; what
8373 should be done?  If the following token is @samp{)}, then the first three
8374 tokens must be reduced to form an @code{expr}.  This is the only valid
8375 course, because shifting the @samp{)} would produce a sequence of symbols
8376 @w{@code{term ')'}}, and no rule allows this.
8377
8378 If the following token is @samp{!}, then it must be shifted immediately so
8379 that @w{@samp{2 !}} can be reduced to make a @code{term}.  If instead the
8380 parser were to reduce before shifting, @w{@samp{1 + 2}} would become an
8381 @code{expr}.  It would then be impossible to shift the @samp{!} because
8382 doing so would produce on the stack the sequence of symbols @code{expr
8383 '!'}.  No rule allows that sequence.
8384
8385 @vindex yychar
8386 @vindex yylval
8387 @vindex yylloc
8388 The lookahead token is stored in the variable @code{yychar}.  Its semantic
8389 value and location, if any, are stored in the variables @code{yylval} and
8390 @code{yylloc}.  @xref{Action Features}.
8391
8392 @node Shift/Reduce
8393 @section Shift/Reduce Conflicts
8394 @cindex conflicts
8395 @cindex shift/reduce conflicts
8396 @cindex dangling @code{else}
8397 @cindex @code{else}, dangling
8398
8399 Suppose we are parsing a language which has if-then and if-then-else
8400 statements, with a pair of rules like this:
8401
8402 @example
8403 @group
8404 if_stmt:
8405   "if" expr "then" stmt
8406 | "if" expr "then" stmt "else" stmt
8407 ;
8408 @end group
8409 @end example
8410
8411 @noindent
8412 Here @code{"if"}, @code{"then"} and @code{"else"} are terminal symbols for
8413 specific keyword tokens.
8414
8415 When the @code{"else"} token is read and becomes the lookahead token, the
8416 contents of the stack (assuming the input is valid) are just right for
8417 reduction by the first rule.  But it is also legitimate to shift the
8418 @code{"else"}, because that would lead to eventual reduction by the second
8419 rule.
8420
8421 This situation, where either a shift or a reduction would be valid, is
8422 called a @dfn{shift/reduce conflict}.  Bison is designed to resolve
8423 these conflicts by choosing to shift, unless otherwise directed by
8424 operator precedence declarations.  To see the reason for this, let's
8425 contrast it with the other alternative.
8426
8427 Since the parser prefers to shift the @code{"else"}, the result is to attach
8428 the else-clause to the innermost if-statement, making these two inputs
8429 equivalent:
8430
8431 @example
8432 if x then if y then win; else lose;
8433
8434 if x then do; if y then win; else lose; end;
8435 @end example
8436
8437 But if the parser chose to reduce when possible rather than shift, the
8438 result would be to attach the else-clause to the outermost if-statement,
8439 making these two inputs equivalent:
8440
8441 @example
8442 if x then if y then win; else lose;
8443
8444 if x then do; if y then win; end; else lose;
8445 @end example
8446
8447 The conflict exists because the grammar as written is ambiguous: either
8448 parsing of the simple nested if-statement is legitimate.  The established
8449 convention is that these ambiguities are resolved by attaching the
8450 else-clause to the innermost if-statement; this is what Bison accomplishes
8451 by choosing to shift rather than reduce.  (It would ideally be cleaner to
8452 write an unambiguous grammar, but that is very hard to do in this case.)
8453 This particular ambiguity was first encountered in the specifications of
8454 Algol 60 and is called the ``dangling @code{else}'' ambiguity.
8455
8456 To assist the grammar author in understanding the nature of each conflict,
8457 Bison can be asked to generate ``counterexamples''.  In the present case it
8458 actually even proves that the grammar is ambiguous by exhibiting a string
8459 with two different parses:
8460
8461 @macro danglingElseCex
8462 @group
8463 @ifnottex
8464   Example: @yellow{"if" expr "then"} @blue{"if" expr "then" stmt} @red{•} @blue{"else" stmt}
8465   Shift derivation
8466     @yellow{if_stmt}
8467     @yellow{↳ 3: "if" expr "then"} @green{stmt}
8468                            @green{↳ 2:} @blue{if_stmt}
8469                                  @blue{↳ 4: "if" expr "then" stmt} @red{•} @blue{"else" stmt}
8470   Example: @yellow{"if" expr "then"} @blue{"if" expr "then" stmt} @red{•} @yellow{"else" stmt}
8471   Reduce derivation
8472     @yellow{if_stmt}
8473     @yellow{↳ 4: "if" expr "then"} @green{stmt}                                @yellow{"else" stmt}
8474                            @green{↳ 2:} @blue{if_stmt}
8475                                  @blue{↳ 3: "if" expr "then" stmt} @red{•}
8476 @end ifnottex
8477 @iftex
8478   Example: @yellow{"if" expr "then"} @blue{"if" expr "then" stmt} @red{•} @blue{"else" stmt}
8479   Shift derivation
8480     @yellow{if_stmt}
8481     @yellow{@arrow{} 3: "if" expr "then"} @green{stmt}
8482                            @green{@arrow{} 2:} @blue{if_stmt}
8483                                  @blue{@arrow{} 4: "if" expr "then" stmt} @red{•} @blue{"else" stmt}
8484   Example: @yellow{"if" expr "then"} @blue{"if" expr "then" stmt} @red{•} @yellow{"else" stmt}
8485   Reduce derivation
8486     @yellow{if_stmt}
8487     @yellow{@arrow{} 4: "if" expr "then"} @green{stmt}                                @yellow{"else" stmt}
8488                            @green{@arrow{} 2:} @blue{if_stmt}
8489                                  @blue{@arrow{} 3: "if" expr "then" stmt} @red{•}
8490 @end iftex
8491 @end group
8492 @end macro
8493 @example
8494 @danglingElseCex
8495 @end example
8496
8497 @noindent
8498 @xref{Counterexamples}, for more details.
8499
8500 @sp 1
8501
8502 To avoid warnings from Bison about predictable, @emph{legitimate} shift/reduce
8503 conflicts, you can use the @code{%expect @var{n}} declaration.
8504 There will be no warning as long as the number of shift/reduce conflicts
8505 is exactly @var{n}, and Bison will report an error if there is a
8506 different number.
8507 @xref{Expect Decl}.  However, we don't
8508 recommend the use of @code{%expect} (except @samp{%expect 0}!), as an equal
8509 number of conflicts does not mean that they are the @emph{same}.  When
8510 possible, you should rather use precedence directives to @emph{fix} the
8511 conflicts explicitly (@pxref{Non Operators}).
8512
8513 The definition of @code{if_stmt} above is solely to blame for the
8514 conflict, but the conflict does not actually appear without additional
8515 rules.  Here is a complete Bison grammar file that actually manifests
8516 the conflict:
8517
8518 @example
8519 %%
8520 @group
8521 stmt:
8522   expr
8523 | if_stmt
8524 ;
8525 @end group
8526
8527 @group
8528 if_stmt:
8529   "if" expr "then" stmt
8530 | "if" expr "then" stmt "else" stmt
8531 ;
8532 @end group
8533
8534 expr:
8535   "identifier"
8536 ;
8537 @end example
8538
8539 @node Precedence
8540 @section Operator Precedence
8541 @cindex operator precedence
8542 @cindex precedence of operators
8543
8544 Another situation where shift/reduce conflicts appear is in arithmetic
8545 expressions.  Here shifting is not always the preferred resolution; the
8546 Bison declarations for operator precedence allow you to specify when to
8547 shift and when to reduce.
8548
8549 @menu
8550 * Why Precedence::    An example showing why precedence is needed.
8551 * Using Precedence::  How to specify precedence and associativity.
8552 * Precedence Only::   How to specify precedence only.
8553 * Precedence Examples::  How these features are used in the previous example.
8554 * How Precedence::    How they work.
8555 * Non Operators::     Using precedence for general conflicts.
8556 @end menu
8557
8558 @node Why Precedence
8559 @subsection When Precedence is Needed
8560
8561 Consider the following ambiguous grammar fragment (ambiguous because the
8562 input @w{@samp{1 - 2 * 3}} can be parsed in two different ways):
8563
8564 @example
8565 @group
8566 expr:
8567   expr '-' expr
8568 | expr '*' expr
8569 | expr '<' expr
8570 | '(' expr ')'
8571 @dots{}
8572 ;
8573 @end group
8574 @end example
8575
8576 @noindent
8577 Suppose the parser has seen the tokens @samp{1}, @samp{-} and @samp{2};
8578 should it reduce them via the rule for the subtraction operator?  It
8579 depends on the next token.  Of course, if the next token is @samp{)}, we
8580 must reduce; shifting is invalid because no single rule can reduce the
8581 token sequence @w{@samp{- 2 )}} or anything starting with that.  But if
8582 the next token is @samp{*} or @samp{<}, we have a choice: either
8583 shifting or reduction would allow the parse to complete, but with
8584 different results.
8585
8586 To decide which one Bison should do, we must consider the results.  If
8587 the next operator token @var{op} is shifted, then it must be reduced
8588 first in order to permit another opportunity to reduce the difference.
8589 The result is (in effect) @w{@samp{1 - (2 @var{op} 3)}}.  On the other
8590 hand, if the subtraction is reduced before shifting @var{op}, the result
8591 is @w{@samp{(1 - 2) @var{op} 3}}.  Clearly, then, the choice of shift or
8592 reduce should depend on the relative precedence of the operators
8593 @samp{-} and @var{op}: @samp{*} should be shifted first, but not
8594 @samp{<}.
8595
8596 @cindex associativity
8597 What about input such as @w{@samp{1 - 2 - 5}}; should this be
8598 @w{@samp{(1 - 2) - 5}} or should it be @w{@samp{1 - (2 - 5)}}?  For most
8599 operators we prefer the former, which is called @dfn{left association}.
8600 The latter alternative, @dfn{right association}, is desirable for
8601 assignment operators.  The choice of left or right association is a
8602 matter of whether the parser chooses to shift or reduce when the stack
8603 contains @w{@samp{1 - 2}} and the lookahead token is @samp{-}: shifting
8604 makes right-associativity.
8605
8606 @node Using Precedence
8607 @subsection Specifying Operator Precedence
8608 @findex %left
8609 @findex %nonassoc
8610 @findex %precedence
8611 @findex %right
8612
8613 Bison allows you to specify these choices with the operator precedence
8614 declarations @code{%left} and @code{%right}.  Each such declaration
8615 contains a list of tokens, which are operators whose precedence and
8616 associativity is being declared.  The @code{%left} declaration makes all
8617 those operators left-associative and the @code{%right} declaration makes
8618 them right-associative.  A third alternative is @code{%nonassoc}, which
8619 declares that it is a syntax error to find the same operator twice ``in a
8620 row''.
8621 The last alternative, @code{%precedence}, allows to define only
8622 precedence and no associativity at all.  As a result, any
8623 associativity-related conflict that remains will be reported as an
8624 compile-time error.  The directive @code{%nonassoc} creates run-time
8625 error: using the operator in a associative way is a syntax error.  The
8626 directive @code{%precedence} creates compile-time errors: an operator
8627 @emph{can} be involved in an associativity-related conflict, contrary to
8628 what expected the grammar author.
8629
8630 The relative precedence of different operators is controlled by the
8631 order in which they are declared.  The first precedence/associativity
8632 declaration in the file declares the operators whose
8633 precedence is lowest, the next such declaration declares the operators
8634 whose precedence is a little higher, and so on.
8635
8636 @node Precedence Only
8637 @subsection Specifying Precedence Only
8638 @findex %precedence
8639
8640 Since POSIX Yacc defines only @code{%left}, @code{%right}, and
8641 @code{%nonassoc}, which all defines precedence and associativity, little
8642 attention is paid to the fact that precedence cannot be defined without
8643 defining associativity.  Yet, sometimes, when trying to solve a
8644 conflict, precedence suffices.  In such a case, using @code{%left},
8645 @code{%right}, or @code{%nonassoc} might hide future (associativity
8646 related) conflicts that would remain hidden.
8647
8648 The dangling @code{else} ambiguity (@pxref{Shift/Reduce}) can be solved
8649 explicitly.  This shift/reduce conflicts occurs in the following situation,
8650 where the period denotes the current parsing state:
8651
8652 @example
8653 if @var{e1} then if  @var{e2} then @var{s1} • else @var{s2}
8654 @end example
8655
8656 The conflict involves the reduction of the rule @samp{IF expr THEN
8657 stmt}, which precedence is by default that of its last token
8658 (@code{THEN}), and the shifting of the token @code{ELSE}.  The usual
8659 disambiguation (attach the @code{else} to the closest @code{if}),
8660 shifting must be preferred, i.e., the precedence of @code{ELSE} must be
8661 higher than that of @code{THEN}.  But neither is expected to be involved
8662 in an associativity related conflict, which can be specified as follows.
8663
8664 @example
8665 %precedence THEN
8666 %precedence ELSE
8667 @end example
8668
8669 The unary-minus is another typical example where associativity is usually
8670 over-specified, see @ref{Infix Calc}.  The @code{%left} directive is
8671 traditionally used to declare the precedence of @code{NEG}, which is more
8672 than needed since it also defines its associativity.  While this is harmless
8673 in the traditional example, who knows how @code{NEG} might be used in future
8674 evolutions of the grammar@dots{}
8675
8676 @node Precedence Examples
8677 @subsection Precedence Examples
8678
8679 In our example, we would want the following declarations:
8680
8681 @example
8682 %left '<'
8683 %left '-'
8684 %left '*'
8685 @end example
8686
8687 In a more complete example, which supports other operators as well, we
8688 would declare them in groups of equal precedence.  For example, @code{'+'} is
8689 declared with @code{'-'}:
8690
8691 @example
8692 %left '<' '>' '=' "!=" "<=" ">="
8693 %left '+' '-'
8694 %left '*' '/'
8695 @end example
8696
8697 @node How Precedence
8698 @subsection How Precedence Works
8699
8700 The first effect of the precedence declarations is to assign precedence
8701 levels to the terminal symbols declared.  The second effect is to assign
8702 precedence levels to certain rules: each rule gets its precedence from
8703 the last terminal symbol mentioned in the components.  (You can also
8704 specify explicitly the precedence of a rule.  @xref{Contextual
8705 Precedence}.)
8706
8707 Finally, the resolution of conflicts works by comparing the precedence
8708 of the rule being considered with that of the lookahead token.  If the
8709 token's precedence is higher, the choice is to shift.  If the rule's
8710 precedence is higher, the choice is to reduce.  If they have equal
8711 precedence, the choice is made based on the associativity of that
8712 precedence level.  The verbose output file made by @option{-v}
8713 (@pxref{Invocation}) says how each conflict was
8714 resolved.
8715
8716 Not all rules and not all tokens have precedence.  If either the rule or
8717 the lookahead token has no precedence, then the default is to shift.
8718
8719 @node Non Operators
8720 @subsection Using Precedence For Non Operators
8721
8722 Using properly precedence and associativity directives can help fixing
8723 shift/reduce conflicts that do not involve arithmetic-like operators.  For
8724 instance, the ``dangling @code{else}'' problem (@pxref{Shift/Reduce}) can be
8725 solved elegantly in two different ways.
8726
8727 In the present case, the conflict is between the token @code{"else"} willing
8728 to be shifted, and the rule @samp{if_stmt: "if" expr "then" stmt}, asking
8729 for reduction.  By default, the precedence of a rule is that of its last
8730 token, here @code{"then"}, so the conflict will be solved appropriately
8731 by giving @code{"else"} a precedence higher than that of @code{"then"}, for
8732 instance as follows:
8733
8734 @example
8735 @group
8736 %precedence "then"
8737 %precedence "else"
8738 @end group
8739 @end example
8740
8741 Alternatively, you may give both tokens the same precedence, in which case
8742 associativity is used to solve the conflict.  To preserve the shift action,
8743 use right associativity:
8744
8745 @example
8746 %right "then" "else"
8747 @end example
8748
8749 Neither solution is perfect however.  Since Bison does not provide, so far,
8750 ``scoped'' precedence, both force you to declare the precedence
8751 of these keywords with respect to the other operators your grammar.
8752 Therefore, instead of being warned about new conflicts you would be unaware
8753 of (e.g., a shift/reduce conflict due to @samp{if test then 1 else 2 + 3}
8754 being ambiguous: @samp{if test then 1 else (2 + 3)} or @samp{(if test then 1
8755 else 2) + 3}?), the conflict will be already ``fixed''.
8756
8757 @node Contextual Precedence
8758 @section Context-Dependent Precedence
8759 @cindex context-dependent precedence
8760 @cindex unary operator precedence
8761 @cindex precedence, context-dependent
8762 @cindex precedence, unary operator
8763 @findex %prec
8764
8765 Often the precedence of an operator depends on the context.  This sounds
8766 outlandish at first, but it is really very common.  For example, a minus
8767 sign typically has a very high precedence as a unary operator, and a
8768 somewhat lower precedence (lower than multiplication) as a binary operator.
8769
8770 The Bison precedence declarations
8771 can only be used once for a given token; so a token has
8772 only one precedence declared in this way.  For context-dependent
8773 precedence, you need to use an additional mechanism: the @code{%prec}
8774 modifier for rules.
8775
8776 The @code{%prec} modifier declares the precedence of a particular rule by
8777 specifying a terminal symbol whose precedence should be used for that rule.
8778 It's not necessary for that symbol to appear otherwise in the rule.  The
8779 modifier's syntax is:
8780
8781 @example
8782 %prec @var{terminal-symbol}
8783 @end example
8784
8785 @noindent
8786 and it is written after the components of the rule.  Its effect is to
8787 assign the rule the precedence of @var{terminal-symbol}, overriding
8788 the precedence that would be deduced for it in the ordinary way.  The
8789 altered rule precedence then affects how conflicts involving that rule
8790 are resolved (@pxref{Precedence}).
8791
8792 Here is how @code{%prec} solves the problem of unary minus.  First, declare
8793 a precedence for a fictitious terminal symbol named @code{UMINUS}.  There
8794 are no tokens of this type, but the symbol serves to stand for its
8795 precedence:
8796
8797 @example
8798 @dots{}
8799 %left '+' '-'
8800 %left '*'
8801 %left UMINUS
8802 @end example
8803
8804 Now the precedence of @code{UMINUS} can be used in specific rules:
8805
8806 @example
8807 @group
8808 exp:
8809   @dots{}
8810 | exp '-' exp
8811   @dots{}
8812 | '-' exp %prec UMINUS
8813 @end group
8814 @end example
8815
8816 @ifset defaultprec
8817 If you forget to append @code{%prec UMINUS} to the rule for unary
8818 minus, Bison silently assumes that minus has its usual precedence.
8819 This kind of problem can be tricky to debug, since one typically
8820 discovers the mistake only by testing the code.
8821
8822 The @code{%no-default-prec;} declaration makes it easier to discover
8823 this kind of problem systematically.  It causes rules that lack a
8824 @code{%prec} modifier to have no precedence, even if the last terminal
8825 symbol mentioned in their components has a declared precedence.
8826
8827 If @code{%no-default-prec;} is in effect, you must specify @code{%prec}
8828 for all rules that participate in precedence conflict resolution.
8829 Then you will see any shift/reduce conflict until you tell Bison how
8830 to resolve it, either by changing your grammar or by adding an
8831 explicit precedence.  This will probably add declarations to the
8832 grammar, but it helps to protect against incorrect rule precedences.
8833
8834 The effect of @code{%no-default-prec;} can be reversed by giving
8835 @code{%default-prec;}, which is the default.
8836 @end ifset
8837
8838 @node Parser States
8839 @section Parser States
8840 @cindex finite-state machine
8841 @cindex parser state
8842 @cindex state (of parser)
8843
8844 The function @code{yyparse} is implemented using a finite-state machine.
8845 The values pushed on the parser stack are not simply token kind codes; they
8846 represent the entire sequence of terminal and nonterminal symbols at or
8847 near the top of the stack.  The current state collects all the information
8848 about previous input which is relevant to deciding what to do next.
8849
8850 Each time a lookahead token is read, the current parser state together with
8851 the kind of lookahead token are looked up in a table.  This table entry can
8852 say, ``Shift the lookahead token.''  In this case, it also specifies the new
8853 parser state, which is pushed onto the top of the parser stack.  Or it can
8854 say, ``Reduce using rule number @var{n}.''  This means that a certain number
8855 of tokens or groupings are taken off the top of the stack, and replaced by
8856 one grouping.  In other words, that number of states are popped from the
8857 stack, and one new state is pushed.
8858
8859 There is one other alternative: the table can say that the lookahead token
8860 is erroneous in the current state.  This causes error processing to begin
8861 (@pxref{Error Recovery}).
8862
8863 @node Reduce/Reduce
8864 @section Reduce/Reduce Conflicts
8865 @cindex reduce/reduce conflict
8866 @cindex conflicts, reduce/reduce
8867
8868 A reduce/reduce conflict occurs if there are two or more rules that apply
8869 to the same sequence of input.  This usually indicates a serious error
8870 in the grammar.
8871
8872 For example, here is an erroneous attempt to define a sequence
8873 of zero or more @code{word} groupings.
8874
8875 @example
8876 @group
8877 sequence:
8878   %empty         @{ printf ("empty sequence\n"); @}
8879 | maybeword
8880 | sequence word  @{ printf ("added word %s\n", $2); @}
8881 ;
8882 @end group
8883
8884 @group
8885 maybeword:
8886   %empty    @{ printf ("empty maybeword\n"); @}
8887 | word      @{ printf ("single word %s\n", $1); @}
8888 ;
8889 @end group
8890 @end example
8891
8892 @noindent
8893 The error is an ambiguity: as counterexample generation would demonstrate
8894 (@pxref{Counterexamples}), there is more than one way to parse a single
8895 @code{word} into a @code{sequence}.  It could be reduced to a
8896 @code{maybeword} and then into a @code{sequence} via the second rule.
8897 Alternatively, nothing-at-all could be reduced into a @code{sequence}
8898 via the first rule, and this could be combined with the @code{word}
8899 using the third rule for @code{sequence}.
8900
8901 There is also more than one way to reduce nothing-at-all into a
8902 @code{sequence}.  This can be done directly via the first rule,
8903 or indirectly via @code{maybeword} and then the second rule.
8904
8905 You might think that this is a distinction without a difference, because it
8906 does not change whether any particular input is valid or not.  But it does
8907 affect which actions are run.  One parsing order runs the second rule's
8908 action; the other runs the first rule's action and the third rule's action.
8909 In this example, the output of the program changes.
8910
8911 Bison resolves a reduce/reduce conflict by choosing to use the rule that
8912 appears first in the grammar, but it is very risky to rely on this.  Every
8913 reduce/reduce conflict must be studied and usually eliminated.  Here is the
8914 proper way to define @code{sequence}:
8915
8916 @example
8917 @group
8918 sequence:
8919   %empty         @{ printf ("empty sequence\n"); @}
8920 | sequence word  @{ printf ("added word %s\n", $2); @}
8921 ;
8922 @end group
8923 @end example
8924
8925 Here is another common error that yields a reduce/reduce conflict:
8926
8927 @example
8928 @group
8929 sequence:
8930   %empty
8931 | sequence words
8932 | sequence redirects
8933 ;
8934 @end group
8935
8936 @group
8937 words:
8938   %empty
8939 | words word
8940 ;
8941 @end group
8942
8943 @group
8944 redirects:
8945   %empty
8946 | redirects redirect
8947 ;
8948 @end group
8949 @end example
8950
8951 @noindent
8952 The intention here is to define a sequence which can contain either
8953 @code{word} or @code{redirect} groupings.  The individual definitions of
8954 @code{sequence}, @code{words} and @code{redirects} are error-free, but the
8955 three together make a subtle ambiguity: even an empty input can be parsed
8956 in infinitely many ways!
8957
8958 Consider: nothing-at-all could be a @code{words}.  Or it could be two
8959 @code{words} in a row, or three, or any number.  It could equally well be a
8960 @code{redirects}, or two, or any number.  Or it could be a @code{words}
8961 followed by three @code{redirects} and another @code{words}.  And so on.
8962
8963 Here are two ways to correct these rules.  First, to make it a single level
8964 of sequence:
8965
8966 @example
8967 sequence:
8968   %empty
8969 | sequence word
8970 | sequence redirect
8971 ;
8972 @end example
8973
8974 Second, to prevent either a @code{words} or a @code{redirects}
8975 from being empty:
8976
8977 @example
8978 @group
8979 sequence:
8980   %empty
8981 | sequence words
8982 | sequence redirects
8983 ;
8984 @end group
8985
8986 @group
8987 words:
8988   word
8989 | words word
8990 ;
8991 @end group
8992
8993 @group
8994 redirects:
8995   redirect
8996 | redirects redirect
8997 ;
8998 @end group
8999 @end example
9000
9001 Yet this proposal introduces another kind of ambiguity!  The input
9002 @samp{word word} can be parsed as a single @code{words} composed of two
9003 @samp{word}s, or as two one-@code{word} @code{words} (and likewise for
9004 @code{redirect}/@code{redirects}).  However this ambiguity is now a
9005 shift/reduce conflict, and therefore it can now be addressed with precedence
9006 directives.
9007
9008 To simplify the matter, we will proceed with @code{word} and @code{redirect}
9009 being tokens: @code{"word"} and @code{"redirect"}.
9010
9011 To prefer the longest @code{words}, the conflict between the token
9012 @code{"word"} and the rule @samp{sequence: sequence words} must be resolved
9013 as a shift.  To this end, we use the same techniques as exposed above, see
9014 @ref{Non Operators}.  One solution
9015 relies on precedences: use @code{%prec} to give a lower precedence to the
9016 rule:
9017
9018 @example
9019 %precedence "word"
9020 %precedence "sequence"
9021 %%
9022 @group
9023 sequence:
9024   %empty
9025 | sequence word      %prec "sequence"
9026 | sequence redirect  %prec "sequence"
9027 ;
9028 @end group
9029
9030 @group
9031 words:
9032   word
9033 | words "word"
9034 ;
9035 @end group
9036 @end example
9037
9038 Another solution relies on associativity: provide both the token and the
9039 rule with the same precedence, but make them right-associative:
9040
9041 @example
9042 %right "word" "redirect"
9043 %%
9044 @group
9045 sequence:
9046   %empty
9047 | sequence word      %prec "word"
9048 | sequence redirect  %prec "redirect"
9049 ;
9050 @end group
9051 @end example
9052
9053 @node Mysterious Conflicts
9054 @section Mysterious Conflicts
9055 @cindex Mysterious Conflicts
9056
9057 Sometimes reduce/reduce conflicts can occur that don't look warranted.
9058 Here is an example:
9059
9060 @example
9061 @group
9062 %%
9063 def: param_spec return_spec ',';
9064 param_spec:
9065   type
9066 | name_list ':' type
9067 ;
9068 @end group
9069
9070 @group
9071 return_spec:
9072   type
9073 | name ':' type
9074 ;
9075 @end group
9076
9077 type: "id";
9078
9079 @group
9080 name: "id";
9081 name_list:
9082   name
9083 | name ',' name_list
9084 ;
9085 @end group
9086 @end example
9087
9088 It would seem that this grammar can be parsed with only a single token of
9089 lookahead: when a @code{param_spec} is being read, an @code{"id"} is a
9090 @code{name} if a comma or colon follows, or a @code{type} if another
9091 @code{"id"} follows.  In other words, this grammar is LR(1).  Yet Bison
9092 finds one reduce/reduce conflict, for which counterexample generation
9093 (@pxref{Counterexamples}) would find a @emph{nonunifying} example.
9094
9095 @cindex LR
9096 @cindex LALR
9097 This is because Bison does not handle all LR(1) grammars @emph{by default},
9098 for historical reasons.
9099 In this grammar, two contexts, that after an @code{"id"} at the beginning
9100 of a @code{param_spec} and likewise at the beginning of a
9101 @code{return_spec}, are similar enough that Bison assumes they are the
9102 same.
9103 They appear similar because the same set of rules would be
9104 active---the rule for reducing to a @code{name} and that for reducing to
9105 a @code{type}.  Bison is unable to determine at that stage of processing
9106 that the rules would require different lookahead tokens in the two
9107 contexts, so it makes a single parser state for them both.  Combining
9108 the two contexts causes a conflict later.  In parser terminology, this
9109 occurrence means that the grammar is not LALR(1).
9110
9111 @cindex IELR
9112 @cindex canonical LR
9113 For many practical grammars (specifically those that fall into the non-LR(1)
9114 class), the limitations of LALR(1) result in difficulties beyond just
9115 mysterious reduce/reduce conflicts.  The best way to fix all these problems
9116 is to select a different parser table construction algorithm.  Either
9117 IELR(1) or canonical LR(1) would suffice, but the former is more efficient
9118 and easier to debug during development.  @xref{LR Table Construction}, for
9119 details.
9120
9121 If you instead wish to work around LALR(1)'s limitations, you
9122 can often fix a mysterious conflict by identifying the two parser states
9123 that are being confused, and adding something to make them look
9124 distinct.  In the above example, adding one rule to
9125 @code{return_spec} as follows makes the problem go away:
9126
9127 @example
9128 @group
9129 @dots{}
9130 return_spec:
9131   type
9132 | name ':' type
9133 | "id" "bogus"       /* This rule is never used. */
9134 ;
9135 @end group
9136 @end example
9137
9138 This corrects the problem because it introduces the possibility of an
9139 additional active rule in the context after the @code{"id"} at the beginning of
9140 @code{return_spec}.  This rule is not active in the corresponding context
9141 in a @code{param_spec}, so the two contexts receive distinct parser states.
9142 As long as the token @code{"bogus"} is never generated by @code{yylex},
9143 the added rule cannot alter the way actual input is parsed.
9144
9145 In this particular example, there is another way to solve the problem:
9146 rewrite the rule for @code{return_spec} to use @code{"id"} directly
9147 instead of via @code{name}.  This also causes the two confusing
9148 contexts to have different sets of active rules, because the one for
9149 @code{return_spec} activates the altered rule for @code{return_spec}
9150 rather than the one for @code{name}.
9151
9152 @example
9153 @group
9154 param_spec:
9155   type
9156 | name_list ':' type
9157 ;
9158 @end group
9159
9160 @group
9161 return_spec:
9162   type
9163 | "id" ':' type
9164 ;
9165 @end group
9166 @end example
9167
9168 For a more detailed exposition of LALR(1) parsers and parser generators, see
9169 @tcite{DeRemer 1982}.
9170
9171 @node Tuning LR
9172 @section Tuning LR
9173
9174 The default behavior of Bison's LR-based parsers is chosen mostly for
9175 historical reasons, but that behavior is often not robust.  For example, in
9176 the previous section, we discussed the mysterious conflicts that can be
9177 produced by LALR(1), Bison's default parser table construction algorithm.
9178 Another example is Bison's @code{%define parse.error verbose} directive,
9179 which instructs the generated parser to produce verbose syntax error
9180 messages, which can sometimes contain incorrect information.
9181
9182 In this section, we explore several modern features of Bison that allow you
9183 to tune fundamental aspects of the generated LR-based parsers.  Some of
9184 these features easily eliminate shortcomings like those mentioned above.
9185 Others can be helpful purely for understanding your parser.
9186
9187 @menu
9188 * LR Table Construction:: Choose a different construction algorithm.
9189 * Default Reductions::    Disable default reductions.
9190 * LAC::                   Correct lookahead sets in the parser states.
9191 * Unreachable States::    Keep unreachable parser states for debugging.
9192 @end menu
9193
9194 @node LR Table Construction
9195 @subsection LR Table Construction
9196 @cindex Mysterious Conflict
9197 @cindex LALR
9198 @cindex IELR
9199 @cindex canonical LR
9200 @findex %define lr.type
9201
9202 For historical reasons, Bison constructs LALR(1) parser tables by default.
9203 However, LALR does not possess the full language-recognition power of LR.
9204 As a result, the behavior of parsers employing LALR parser tables is often
9205 mysterious.  We presented a simple example of this effect in @ref{Mysterious
9206 Conflicts}.
9207
9208 As we also demonstrated in that example, the traditional approach to
9209 eliminating such mysterious behavior is to restructure the grammar.
9210 Unfortunately, doing so correctly is often difficult.  Moreover, merely
9211 discovering that LALR causes mysterious behavior in your parser can be
9212 difficult as well.
9213
9214 Fortunately, Bison provides an easy way to eliminate the possibility of such
9215 mysterious behavior altogether.  You simply need to activate a more powerful
9216 parser table construction algorithm by using the @code{%define lr.type}
9217 directive.
9218
9219 @deffn {Directive} {%define lr.type} @var{type}
9220 Specify the type of parser tables within the LR(1) family.  The accepted
9221 values for @var{type} are:
9222
9223 @itemize
9224 @item @code{lalr} (default)
9225 @item @code{ielr}
9226 @item @code{canonical-lr}
9227 @end itemize
9228 @end deffn
9229
9230 For example, to activate IELR, you might add the following directive to you
9231 grammar file:
9232
9233 @example
9234 %define lr.type ielr
9235 @end example
9236
9237 @noindent For the example in @ref{Mysterious Conflicts}, the mysterious
9238 conflict is then eliminated, so there is no need to invest time in
9239 comprehending the conflict or restructuring the grammar to fix it.  If,
9240 during future development, the grammar evolves such that all mysterious
9241 behavior would have disappeared using just LALR, you need not fear that
9242 continuing to use IELR will result in unnecessarily large parser tables.
9243 That is, IELR generates LALR tables when LALR (using a deterministic parsing
9244 algorithm) is sufficient to support the full language-recognition power of
9245 LR.  Thus, by enabling IELR at the start of grammar development, you can
9246 safely and completely eliminate the need to consider LALR's shortcomings.
9247
9248 While IELR is almost always preferable, there are circumstances where LALR
9249 or the canonical LR parser tables described by Knuth @pcite{Knuth 1965} can
9250 be useful.  Here we summarize the relative advantages of each parser table
9251 construction algorithm within Bison:
9252
9253 @itemize
9254 @item LALR
9255
9256 There are at least two scenarios where LALR can be worthwhile:
9257
9258 @itemize
9259 @item GLR without static conflict resolution.
9260
9261 @cindex GLR with LALR
9262 When employing GLR parsers (@pxref{GLR Parsers}), if you do not resolve any
9263 conflicts statically (for example, with @code{%left} or @code{%precedence}),
9264 then
9265 the parser explores all potential parses of any given input.  In this case,
9266 the choice of parser table construction algorithm is guaranteed not to alter
9267 the language accepted by the parser.  LALR parser tables are the smallest
9268 parser tables Bison can currently construct, so they may then be preferable.
9269 Nevertheless, once you begin to resolve conflicts statically, GLR behaves
9270 more like a deterministic parser in the syntactic contexts where those
9271 conflicts appear, and so either IELR or canonical LR can then be helpful to
9272 avoid LALR's mysterious behavior.
9273
9274 @item Malformed grammars.
9275
9276 Occasionally during development, an especially malformed grammar with a
9277 major recurring flaw may severely impede the IELR or canonical LR parser
9278 table construction algorithm.  LALR can be a quick way to construct parser
9279 tables in order to investigate such problems while ignoring the more subtle
9280 differences from IELR and canonical LR.
9281 @end itemize
9282
9283 @item IELR
9284
9285 IELR (Inadequacy Elimination LR) is a minimal LR algorithm.  That is, given
9286 any grammar (LR or non-LR), parsers using IELR or canonical LR parser tables
9287 always accept exactly the same set of sentences.  However, like LALR, IELR
9288 merges parser states during parser table construction so that the number of
9289 parser states is often an order of magnitude less than for canonical LR.
9290 More importantly, because canonical LR's extra parser states may contain
9291 duplicate conflicts in the case of non-LR grammars, the number of conflicts
9292 for IELR is often an order of magnitude less as well.  This effect can
9293 significantly reduce the complexity of developing a grammar.
9294
9295 @item Canonical LR
9296
9297 @cindex delayed syntax error detection
9298 @cindex LAC
9299 @findex %nonassoc
9300 While inefficient, canonical LR parser tables can be an interesting means to
9301 explore a grammar because they possess a property that IELR and LALR tables
9302 do not.  That is, if @code{%nonassoc} is not used and default reductions are
9303 left disabled (@pxref{Default Reductions}), then, for every left context of
9304 every canonical LR state, the set of tokens accepted by that state is
9305 guaranteed to be the exact set of tokens that is syntactically acceptable in
9306 that left context.  It might then seem that an advantage of canonical LR
9307 parsers in production is that, under the above constraints, they are
9308 guaranteed to detect a syntax error as soon as possible without performing
9309 any unnecessary reductions.  However, IELR parsers that use LAC are also
9310 able to achieve this behavior without sacrificing @code{%nonassoc} or
9311 default reductions.  For details and a few caveats of LAC, @pxref{LAC}.
9312 @end itemize
9313
9314 For a more detailed exposition of the mysterious behavior in LALR parsers
9315 and the benefits of IELR, see @tcite{Denny 2008}, and @tcite{Denny 2010
9316 November}.
9317
9318 @node Default Reductions
9319 @subsection Default Reductions
9320 @cindex default reductions
9321 @findex %define lr.default-reduction
9322 @findex %nonassoc
9323
9324 After parser table construction, Bison identifies the reduction with the
9325 largest lookahead set in each parser state.  To reduce the size of the
9326 parser state, traditional Bison behavior is to remove that lookahead set and
9327 to assign that reduction to be the default parser action.  Such a reduction
9328 is known as a @dfn{default reduction}.
9329
9330 Default reductions affect more than the size of the parser tables.  They
9331 also affect the behavior of the parser:
9332
9333 @itemize
9334 @item Delayed @code{yylex} invocations.
9335
9336 @cindex delayed yylex invocations
9337 @cindex consistent states
9338 @cindex defaulted states
9339 A @dfn{consistent state} is a state that has only one possible parser
9340 action.  If that action is a reduction and is encoded as a default
9341 reduction, then that consistent state is called a @dfn{defaulted state}.
9342 Upon reaching a defaulted state, a Bison-generated parser does not bother to
9343 invoke @code{yylex} to fetch the next token before performing the reduction.
9344 In other words, whether default reductions are enabled in consistent states
9345 determines how soon a Bison-generated parser invokes @code{yylex} for a
9346 token: immediately when it @emph{reaches} that token in the input or when it
9347 eventually @emph{needs} that token as a lookahead to determine the next
9348 parser action.  Traditionally, default reductions are enabled, and so the
9349 parser exhibits the latter behavior.
9350
9351 The presence of defaulted states is an important consideration when
9352 designing @code{yylex} and the grammar file.  That is, if the behavior of
9353 @code{yylex} can influence or be influenced by the semantic actions
9354 associated with the reductions in defaulted states, then the delay of the
9355 next @code{yylex} invocation until after those reductions is significant.
9356 For example, the semantic actions might pop a scope stack that @code{yylex}
9357 uses to determine what token to return.  Thus, the delay might be necessary
9358 to ensure that @code{yylex} does not look up the next token in a scope that
9359 should already be considered closed.
9360
9361 @item Delayed syntax error detection.
9362
9363 @cindex delayed syntax error detection
9364 When the parser fetches a new token by invoking @code{yylex}, it checks
9365 whether there is an action for that token in the current parser state.  The
9366 parser detects a syntax error if and only if either (1) there is no action
9367 for that token or (2) the action for that token is the error action (due to
9368 the use of @code{%nonassoc}).  However, if there is a default reduction in
9369 that state (which might or might not be a defaulted state), then it is
9370 impossible for condition 1 to exist.  That is, all tokens have an action.
9371 Thus, the parser sometimes fails to detect the syntax error until it reaches
9372 a later state.
9373
9374 @cindex LAC
9375 @c If there's an infinite loop, default reductions can prevent an incorrect
9376 @c sentence from being rejected.
9377 While default reductions never cause the parser to accept syntactically
9378 incorrect sentences, the delay of syntax error detection can have unexpected
9379 effects on the behavior of the parser.  However, the delay can be caused
9380 anyway by parser state merging and the use of @code{%nonassoc}, and it can
9381 be fixed by another Bison feature, LAC.  We discuss the effects of delayed
9382 syntax error detection and LAC more in the next section (@pxref{LAC}).
9383 @end itemize
9384
9385 For canonical LR, the only default reduction that Bison enables by default
9386 is the accept action, which appears only in the accepting state, which has
9387 no other action and is thus a defaulted state.  However, the default accept
9388 action does not delay any @code{yylex} invocation or syntax error detection
9389 because the accept action ends the parse.
9390
9391 For LALR and IELR, Bison enables default reductions in nearly all states by
9392 default.  There are only two exceptions.  First, states that have a shift
9393 action on the @code{error} token do not have default reductions because
9394 delayed syntax error detection could then prevent the @code{error} token
9395 from ever being shifted in that state.  However, parser state merging can
9396 cause the same effect anyway, and LAC fixes it in both cases, so future
9397 versions of Bison might drop this exception when LAC is activated.  Second,
9398 GLR parsers do not record the default reduction as the action on a lookahead
9399 token for which there is a conflict.  The correct action in this case is to
9400 split the parse instead.
9401
9402 To adjust which states have default reductions enabled, use the
9403 @code{%define lr.default-reduction} directive.
9404
9405 @deffn {Directive} {%define lr.default-reduction} @var{where}
9406 Specify the kind of states that are permitted to contain default reductions.
9407 The accepted values of @var{where} are:
9408 @itemize
9409 @item @code{most} (default for LALR and IELR)
9410 @item @code{consistent}
9411 @item @code{accepting} (default for canonical LR)
9412 @end itemize
9413 @end deffn
9414
9415 @node LAC
9416 @subsection LAC
9417 @findex %define parse.lac
9418 @cindex LAC
9419 @cindex lookahead correction
9420
9421 Canonical LR, IELR, and LALR can suffer from a couple of problems upon
9422 encountering a syntax error.  First, the parser might perform additional
9423 parser stack reductions before discovering the syntax error.  Such
9424 reductions can perform user semantic actions that are unexpected because
9425 they are based on an invalid token, and they cause error recovery to begin
9426 in a different syntactic context than the one in which the invalid token was
9427 encountered.  Second, when verbose error messages are enabled (@pxref{Error
9428 Reporting}), the expected token list in the syntax error message can both
9429 contain invalid tokens and omit valid tokens.
9430
9431 The culprits for the above problems are @code{%nonassoc}, default reductions
9432 in inconsistent states (@pxref{Default Reductions}), and parser state
9433 merging.  Because IELR and LALR merge parser states, they suffer the most.
9434 Canonical LR can suffer only if @code{%nonassoc} is used or if default
9435 reductions are enabled for inconsistent states.
9436
9437 LAC (Lookahead Correction) is a new mechanism within the parsing algorithm
9438 that solves these problems for canonical LR, IELR, and LALR without
9439 sacrificing @code{%nonassoc}, default reductions, or state merging.  You can
9440 enable LAC with the @code{%define parse.lac} directive.
9441
9442 @deffn {Directive} {%define parse.lac} @var{value}
9443 Enable LAC to improve syntax error handling.
9444 @itemize
9445 @item @code{none} (default)
9446 @item @code{full}
9447 @end itemize
9448 This feature is currently only available for deterministic parsers in C and C++.
9449 @end deffn
9450
9451 Conceptually, the LAC mechanism is straight-forward.  Whenever the parser
9452 fetches a new token from the scanner so that it can determine the next
9453 parser action, it immediately suspends normal parsing and performs an
9454 exploratory parse using a temporary copy of the normal parser state stack.
9455 During this exploratory parse, the parser does not perform user semantic
9456 actions.  If the exploratory parse reaches a shift action, normal parsing
9457 then resumes on the normal parser stacks.  If the exploratory parse reaches
9458 an error instead, the parser reports a syntax error.  If verbose syntax
9459 error messages are enabled, the parser must then discover the list of
9460 expected tokens, so it performs a separate exploratory parse for each token
9461 in the grammar.
9462
9463 There is one subtlety about the use of LAC.  That is, when in a consistent
9464 parser state with a default reduction, the parser will not attempt to fetch
9465 a token from the scanner because no lookahead is needed to determine the
9466 next parser action.  Thus, whether default reductions are enabled in
9467 consistent states (@pxref{Default Reductions}) affects how soon the parser
9468 detects a syntax error: immediately when it @emph{reaches} an erroneous
9469 token or when it eventually @emph{needs} that token as a lookahead to
9470 determine the next parser action.  The latter behavior is probably more
9471 intuitive, so Bison currently provides no way to achieve the former behavior
9472 while default reductions are enabled in consistent states.
9473
9474 Thus, when LAC is in use, for some fixed decision of whether to enable
9475 default reductions in consistent states, canonical LR and IELR behave almost
9476 exactly the same for both syntactically acceptable and syntactically
9477 unacceptable input.  While LALR still does not support the full
9478 language-recognition power of canonical LR and IELR, LAC at least enables
9479 LALR's syntax error handling to correctly reflect LALR's
9480 language-recognition power.
9481
9482 There are a few caveats to consider when using LAC:
9483
9484 @itemize
9485 @item Infinite parsing loops.
9486
9487 IELR plus LAC does have one shortcoming relative to canonical LR.  Some
9488 parsers generated by Bison can loop infinitely.  LAC does not fix infinite
9489 parsing loops that occur between encountering a syntax error and detecting
9490 it, but enabling canonical LR or disabling default reductions sometimes
9491 does.
9492
9493 @item Verbose error message limitations.
9494
9495 Because of internationalization considerations, Bison-generated parsers
9496 limit the size of the expected token list they are willing to report in a
9497 verbose syntax error message.  If the number of expected tokens exceeds that
9498 limit, the list is simply dropped from the message.  Enabling LAC can
9499 increase the size of the list and thus cause the parser to drop it.  Of
9500 course, dropping the list is better than reporting an incorrect list.
9501
9502 @item Performance.
9503
9504 Because LAC requires many parse actions to be performed twice, it can have a
9505 performance penalty.  However, not all parse actions must be performed
9506 twice.  Specifically, during a series of default reductions in consistent
9507 states and shift actions, the parser never has to initiate an exploratory
9508 parse.  Moreover, the most time-consuming tasks in a parse are often the
9509 file I/O, the lexical analysis performed by the scanner, and the user's
9510 semantic actions, but none of these are performed during the exploratory
9511 parse.  Finally, the base of the temporary stack used during an exploratory
9512 parse is a pointer into the normal parser state stack so that the stack is
9513 never physically copied.  In our experience, the performance penalty of LAC
9514 has proved insignificant for practical grammars.
9515 @end itemize
9516
9517 While the LAC algorithm shares techniques that have been recognized in the
9518 parser community for years, for the publication that introduces LAC, see
9519 @tcite{Denny 2010 May}.
9520
9521 @node Unreachable States
9522 @subsection Unreachable States
9523 @findex %define lr.keep-unreachable-state
9524 @cindex unreachable states
9525
9526 If there exists no sequence of transitions from the parser's start state to
9527 some state @var{s}, then Bison considers @var{s} to be an @dfn{unreachable
9528 state}.  A state can become unreachable during conflict resolution if Bison
9529 disables a shift action leading to it from a predecessor state.
9530
9531 By default, Bison removes unreachable states from the parser after conflict
9532 resolution because they are useless in the generated parser.  However,
9533 keeping unreachable states is sometimes useful when trying to understand the
9534 relationship between the parser and the grammar.
9535
9536 @deffn {Directive} {%define lr.keep-unreachable-state} @var{value}
9537 Request that Bison allow unreachable states to remain in the parser tables.
9538 @var{value} must be a Boolean.  The default is @code{false}.
9539 @end deffn
9540
9541 There are a few caveats to consider:
9542
9543 @itemize @bullet
9544 @item Missing or extraneous warnings.
9545
9546 Unreachable states may contain conflicts and may use rules not used in any
9547 other state.  Thus, keeping unreachable states may induce warnings that are
9548 irrelevant to your parser's behavior, and it may eliminate warnings that are
9549 relevant.  Of course, the change in warnings may actually be relevant to a
9550 parser table analysis that wants to keep unreachable states, so this
9551 behavior will likely remain in future Bison releases.
9552
9553 @item Other useless states.
9554
9555 While Bison is able to remove unreachable states, it is not guaranteed to
9556 remove other kinds of useless states.  Specifically, when Bison disables
9557 reduce actions during conflict resolution, some goto actions may become
9558 useless, and thus some additional states may become useless.  If Bison were
9559 to compute which goto actions were useless and then disable those actions,
9560 it could identify such states as unreachable and then remove those states.
9561 However, Bison does not compute which goto actions are useless.
9562 @end itemize
9563
9564 @node Generalized LR Parsing
9565 @section Generalized LR (GLR) Parsing
9566 @cindex GLR parsing
9567 @cindex generalized LR (GLR) parsing
9568 @cindex ambiguous grammars
9569 @cindex nondeterministic parsing
9570
9571 Bison produces @emph{deterministic} parsers that choose uniquely
9572 when to reduce and which reduction to apply
9573 based on a summary of the preceding input and on one extra token of lookahead.
9574 As a result, normal Bison handles a proper subset of the family of
9575 context-free languages.
9576 Ambiguous grammars, since they have strings with more than one possible
9577 sequence of reductions cannot have deterministic parsers in this sense.
9578 The same is true of languages that require more than one symbol of
9579 lookahead, since the parser lacks the information necessary to make a
9580 decision at the point it must be made in a shift/reduce parser.
9581 Finally, as previously mentioned (@pxref{Mysterious Conflicts}),
9582 there are languages where Bison's default choice of how to
9583 summarize the input seen so far loses necessary information.
9584
9585 When you use the @samp{%glr-parser} declaration in your grammar file,
9586 Bison generates a parser that uses a different algorithm, called
9587 Generalized LR (or GLR).  A Bison GLR
9588 parser uses the same basic
9589 algorithm for parsing as an ordinary Bison parser, but behaves
9590 differently in cases where there is a shift/reduce conflict that has not
9591 been resolved by precedence rules (@pxref{Precedence}) or a
9592 reduce/reduce conflict.  When a GLR parser encounters such a
9593 situation, it
9594 effectively @emph{splits} into a several parsers, one for each possible
9595 shift or reduction.  These parsers then proceed as usual, consuming
9596 tokens in lock-step.  Some of the stacks may encounter other conflicts
9597 and split further, with the result that instead of a sequence of states,
9598 a Bison GLR parsing stack is what is in effect a tree of states.
9599
9600 In effect, each stack represents a guess as to what the proper parse
9601 is.  Additional input may indicate that a guess was wrong, in which case
9602 the appropriate stack silently disappears.  Otherwise, the semantics
9603 actions generated in each stack are saved, rather than being executed
9604 immediately.  When a stack disappears, its saved semantic actions never
9605 get executed.  When a reduction causes two stacks to become equivalent,
9606 their sets of semantic actions are both saved with the state that
9607 results from the reduction.  We say that two stacks are equivalent
9608 when they both represent the same sequence of states,
9609 and each pair of corresponding states represents a
9610 grammar symbol that produces the same segment of the input token
9611 stream.
9612
9613 Whenever the parser makes a transition from having multiple
9614 states to having one, it reverts to the normal deterministic parsing
9615 algorithm, after resolving and executing the saved-up actions.
9616 At this transition, some of the states on the stack will have semantic
9617 values that are sets (actually multisets) of possible actions.  The
9618 parser tries to pick one of the actions by first finding one whose rule
9619 has the highest dynamic precedence, as set by the @samp{%dprec}
9620 declaration.  Otherwise, if the alternative actions are not ordered by
9621 precedence, but there the same merging function is declared for both
9622 rules by the @samp{%merge} declaration,
9623 Bison resolves and evaluates both and then calls the merge function on
9624 the result.  Otherwise, it reports an ambiguity.
9625
9626 It is possible to use a data structure for the GLR parsing tree that
9627 permits the processing of any LR(1) grammar in linear time (in the
9628 size of the input), any unambiguous (not necessarily
9629 LR(1)) grammar in
9630 quadratic worst-case time, and any general (possibly ambiguous)
9631 context-free grammar in cubic worst-case time.  However, Bison currently
9632 uses a simpler data structure that requires time proportional to the
9633 length of the input times the maximum number of stacks required for any
9634 prefix of the input.  Thus, really ambiguous or nondeterministic
9635 grammars can require exponential time and space to process.  Such badly
9636 behaving examples, however, are not generally of practical interest.
9637 Usually, nondeterminism in a grammar is local---the parser is ``in
9638 doubt'' only for a few tokens at a time.  Therefore, the current data
9639 structure should generally be adequate.  On LR(1) portions of a
9640 grammar, in particular, it is only slightly slower than with the
9641 deterministic LR(1) Bison parser.
9642
9643 For a more detailed exposition of GLR parsers, see @tcite{Scott 2000}.
9644
9645 @node Memory Management
9646 @section Memory Management, and How to Avoid Memory Exhaustion
9647 @cindex memory exhaustion
9648 @cindex memory management
9649 @cindex stack overflow
9650 @cindex parser stack overflow
9651 @cindex overflow of parser stack
9652
9653 The Bison parser stack can run out of memory if too many tokens are shifted and
9654 not reduced.  When this happens, the parser function @code{yyparse}
9655 calls @code{yyerror} and then returns 2.
9656
9657 Because Bison parsers have growing stacks, hitting the upper limit
9658 usually results from using a right recursion instead of a left
9659 recursion, see @ref{Recursion}.
9660
9661 @vindex YYMAXDEPTH
9662 By defining the macro @code{YYMAXDEPTH}, you can control how deep the
9663 parser stack can become before memory is exhausted.  Define the
9664 macro with a value that is an integer.  This value is the maximum number
9665 of tokens that can be shifted (and not reduced) before overflow.
9666
9667 The stack space allowed is not necessarily allocated.  If you specify a
9668 large value for @code{YYMAXDEPTH}, the parser normally allocates a small
9669 stack at first, and then makes it bigger by stages as needed.  This
9670 increasing allocation happens automatically and silently.  Therefore,
9671 you do not need to make @code{YYMAXDEPTH} painfully small merely to save
9672 space for ordinary inputs that do not need much stack.
9673
9674 However, do not allow @code{YYMAXDEPTH} to be a value so large that
9675 arithmetic overflow could occur when calculating the size of the stack
9676 space.  Also, do not allow @code{YYMAXDEPTH} to be less than
9677 @code{YYINITDEPTH}.
9678
9679 @cindex default stack limit
9680 The default value of @code{YYMAXDEPTH}, if you do not define it, is
9681 10000.
9682
9683 @vindex YYINITDEPTH
9684 You can control how much stack is allocated initially by defining the
9685 macro @code{YYINITDEPTH} to a positive integer.  For the deterministic
9686 parser in C, this value must be a compile-time constant
9687 unless you are assuming C99 or some other target language or compiler
9688 that allows variable-length arrays.  The default is 200.
9689
9690 Do not allow @code{YYINITDEPTH} to be greater than @code{YYMAXDEPTH}.
9691
9692 You can generate a deterministic parser containing C++ user code from the
9693 default (C) skeleton, as well as from the C++ skeleton (@pxref{C++
9694 Parsers}).  However, if you do use the default skeleton and want to allow
9695 the parsing stack to grow, be careful not to use semantic types or location
9696 types that require non-trivial copy constructors.  The C skeleton bypasses
9697 these constructors when copying data to new, larger stacks.
9698
9699 @node Error Recovery
9700 @chapter Error Recovery
9701 @cindex error recovery
9702 @cindex recovery from errors
9703
9704 It is not usually acceptable to have a program terminate on a syntax
9705 error.  For example, a compiler should recover sufficiently to parse the
9706 rest of the input file and check it for errors; a calculator should accept
9707 another expression.
9708
9709 In a simple interactive command parser where each input is one line, it may
9710 be sufficient to allow @code{yyparse} to return 1 on error and have the
9711 caller ignore the rest of the input line when that happens (and then call
9712 @code{yyparse} again).  But this is inadequate for a compiler, because it
9713 forgets all the syntactic context leading up to the error.  A syntax error
9714 deep within a function in the compiler input should not cause the compiler
9715 to treat the following line like the beginning of a source file.
9716
9717 @findex error
9718 You can define how to recover from a syntax error by writing rules to
9719 recognize the special token @code{error}.  This is a terminal symbol that
9720 is always defined (you need not declare it) and reserved for error
9721 handling.  The Bison parser generates an @code{error} token whenever a
9722 syntax error happens; if you have provided a rule to recognize this token
9723 in the current context, the parse can continue.
9724
9725 For example:
9726
9727 @example
9728 stmts:
9729   %empty
9730 | stmts '\n'
9731 | stmts exp '\n'
9732 | stmts error '\n'
9733 @end example
9734
9735 The fourth rule in this example says that an error followed by a newline
9736 makes a valid addition to any @code{stmts}.
9737
9738 What happens if a syntax error occurs in the middle of an @code{exp}?  The
9739 error recovery rule, interpreted strictly, applies to the precise sequence
9740 of a @code{stmts}, an @code{error} and a newline.  If an error occurs in
9741 the middle of an @code{exp}, there will probably be some additional tokens
9742 and subexpressions on the stack after the last @code{stmts}, and there
9743 will be tokens to read before the next newline.  So the rule is not
9744 applicable in the ordinary way.
9745
9746 But Bison can force the situation to fit the rule, by discarding part of the
9747 semantic context and part of the input.  First it discards states and
9748 objects from the stack until it gets back to a state in which the
9749 @code{error} token is acceptable.  (This means that the subexpressions
9750 already parsed are discarded, back to the last complete @code{stmts}.)  At
9751 this point the @code{error} token can be shifted.  Then, if the old
9752 lookahead token is not acceptable to be shifted next, the parser reads
9753 tokens and discards them until it finds a token which is acceptable.  In
9754 this example, Bison reads and discards input until the next newline so that
9755 the fourth rule can apply.  Note that discarded symbols are possible sources
9756 of memory leaks, see @ref{Destructor Decl}, for a means to reclaim this
9757 memory.
9758
9759 The choice of error rules in the grammar is a choice of strategies for
9760 error recovery.  A simple and useful strategy is simply to skip the rest of
9761 the current input line or current statement if an error is detected:
9762
9763 @example
9764 stmt: error ';'  /* On error, skip until ';' is read. */
9765 @end example
9766
9767 It is also useful to recover to the matching close-delimiter of an
9768 opening-delimiter that has already been parsed.  Otherwise the
9769 close-delimiter will probably appear to be unmatched, and generate another,
9770 spurious error message:
9771
9772 @example
9773 primary:
9774   '(' expr ')'
9775 | '(' error ')'
9776 @dots{}
9777 ;
9778 @end example
9779
9780 Error recovery strategies are necessarily guesses.  When they guess wrong,
9781 one syntax error often leads to another.  In the above example, the error
9782 recovery rule guesses that an error is due to bad input within one
9783 @code{stmt}.  Suppose that instead a spurious semicolon is inserted in the
9784 middle of a valid @code{stmt}.  After the error recovery rule recovers from
9785 the first error, another syntax error will be found straight away, since the
9786 text following the spurious semicolon is also an invalid @code{stmt}.
9787
9788 To prevent an outpouring of error messages, the parser will output no error
9789 message for another syntax error that happens shortly after the first; only
9790 after three consecutive input tokens have been successfully shifted will
9791 error messages resume.
9792
9793 Note that rules which accept the @code{error} token may have actions, just
9794 as any other rules can.
9795
9796 @findex yyerrok
9797 You can make error messages resume immediately by using the macro
9798 @code{yyerrok} in an action.  If you do this in the error rule's action, no
9799 error messages will be suppressed.  This macro requires no arguments;
9800 @samp{yyerrok;} is a valid C statement.
9801
9802 @findex yyclearin
9803 The previous lookahead token is reanalyzed immediately after an error.  If
9804 this is unacceptable, then the macro @code{yyclearin} may be used to clear
9805 this token.  Write the statement @samp{yyclearin;} in the error rule's
9806 action.
9807 @xref{Action Features}.
9808
9809 For example, suppose that on a syntax error, an error handling routine is
9810 called that advances the input stream to some point where parsing should
9811 once again commence.  The next symbol returned by the lexical scanner is
9812 probably correct.  The previous lookahead token ought to be discarded
9813 with @samp{yyclearin;}.
9814
9815 @vindex YYRECOVERING
9816 The expression @code{YYRECOVERING ()} yields 1 when the parser
9817 is recovering from a syntax error, and 0 otherwise.
9818 Syntax error diagnostics are suppressed while recovering from a syntax
9819 error.
9820
9821 @node Context Dependency
9822 @chapter Handling Context Dependencies
9823
9824 The Bison paradigm is to parse tokens first, then group them into larger
9825 syntactic units.  In many languages, the meaning of a token is affected by
9826 its context.  Although this violates the Bison paradigm, certain techniques
9827 (known as @dfn{kludges}) may enable you to write Bison parsers for such
9828 languages.
9829
9830 @menu
9831 * Semantic Tokens::   Token parsing can depend on the semantic context.
9832 * Lexical Tie-ins::   Token parsing can depend on the syntactic context.
9833 * Tie-in Recovery::   Lexical tie-ins have implications for how
9834                         error recovery rules must be written.
9835 @end menu
9836
9837 (Actually, ``kludge'' means any technique that gets its job done but is
9838 neither clean nor robust.)
9839
9840 @node Semantic Tokens
9841 @section Semantic Info in Token Kinds
9842
9843 The C language has a context dependency: the way an identifier is used
9844 depends on what its current meaning is.  For example, consider this:
9845
9846 @example
9847 foo (x);
9848 @end example
9849
9850 This looks like a function call statement, but if @code{foo} is a typedef
9851 name, then this is actually a declaration of @code{x}.  How can a Bison
9852 parser for C decide how to parse this input?
9853
9854 The method used in GNU C is to have two different token kinds,
9855 @code{IDENTIFIER} and @code{TYPENAME}.  When @code{yylex} finds an
9856 identifier, it looks up the current declaration of the identifier in order
9857 to decide which token kind to return: @code{TYPENAME} if the identifier is
9858 declared as a typedef, @code{IDENTIFIER} otherwise.
9859
9860 The grammar rules can then express the context dependency by the choice of
9861 token kind to recognize.  @code{IDENTIFIER} is accepted as an expression,
9862 but @code{TYPENAME} is not.  @code{TYPENAME} can start a declaration, but
9863 @code{IDENTIFIER} cannot.  In contexts where the meaning of the identifier
9864 is @emph{not} significant, such as in declarations that can shadow a
9865 typedef name, either @code{TYPENAME} or @code{IDENTIFIER} is
9866 accepted---there is one rule for each of the two token kinds.
9867
9868 This technique is simple to use if the decision of which kinds of
9869 identifiers to allow is made at a place close to where the identifier is
9870 parsed.  But in C this is not always so: C allows a declaration to
9871 redeclare a typedef name provided an explicit type has been specified
9872 earlier:
9873
9874 @example
9875 typedef int foo, bar;
9876 int baz (void)
9877 @group
9878 @{
9879   static bar (bar);      /* @r{redeclare @code{bar} as static variable} */
9880   extern foo foo (foo);  /* @r{redeclare @code{foo} as function} */
9881   return foo (bar);
9882 @}
9883 @end group
9884 @end example
9885
9886 Unfortunately, the name being declared is separated from the declaration
9887 construct itself by a complicated syntactic structure---the ``declarator''.
9888
9889 As a result, part of the Bison parser for C needs to be duplicated, with
9890 all the nonterminal names changed: once for parsing a declaration in
9891 which a typedef name can be redefined, and once for parsing a
9892 declaration in which that can't be done.  Here is a part of the
9893 duplication, with actions omitted for brevity:
9894
9895 @example
9896 @group
9897 initdcl:
9898   declarator maybeasm '=' init
9899 | declarator maybeasm
9900 ;
9901 @end group
9902
9903 @group
9904 notype_initdcl:
9905   notype_declarator maybeasm '=' init
9906 | notype_declarator maybeasm
9907 ;
9908 @end group
9909 @end example
9910
9911 @noindent
9912 Here @code{initdcl} can redeclare a typedef name, but @code{notype_initdcl}
9913 cannot.  The distinction between @code{declarator} and
9914 @code{notype_declarator} is the same sort of thing.
9915
9916 There is some similarity between this technique and a lexical tie-in
9917 (described next), in that information which alters the lexical analysis is
9918 changed during parsing by other parts of the program.  The difference is
9919 here the information is global, and is used for other purposes in the
9920 program.  A true lexical tie-in has a special-purpose flag controlled by
9921 the syntactic context.
9922
9923 @node Lexical Tie-ins
9924 @section Lexical Tie-ins
9925 @cindex lexical tie-in
9926
9927 One way to handle context-dependency is the @dfn{lexical tie-in}: a flag
9928 which is set by Bison actions, whose purpose is to alter the way tokens are
9929 parsed.
9930
9931 For example, suppose we have a language vaguely like C, but with a special
9932 construct @samp{hex (@var{hex-expr})}.  After the keyword @code{hex} comes
9933 an expression in parentheses in which all integers are hexadecimal.  In
9934 particular, the token @samp{a1b} must be treated as an integer rather than
9935 as an identifier if it appears in that context.  Here is how you can do it:
9936
9937 @example
9938 @group
9939 %@{
9940   int hexflag;
9941   int yylex (void);
9942   void yyerror (char const *);
9943 %@}
9944 %%
9945 @dots{}
9946 @end group
9947 @group
9948 expr:
9949   IDENTIFIER
9950 | constant
9951 | HEX '('        @{ hexflag = 1; @}
9952     expr ')'     @{ hexflag = 0; $$ = $4; @}
9953 | expr '+' expr  @{ $$ = make_sum ($1, $3); @}
9954 @dots{}
9955 ;
9956 @end group
9957
9958 @group
9959 constant:
9960   INTEGER
9961 | STRING
9962 ;
9963 @end group
9964 @end example
9965
9966 @noindent
9967 Here we assume that @code{yylex} looks at the value of @code{hexflag}; when
9968 it is nonzero, all integers are parsed in hexadecimal, and tokens starting
9969 with letters are parsed as integers if possible.
9970
9971 The declaration of @code{hexflag} shown in the prologue of the grammar file
9972 is needed to make it accessible to the actions (@pxref{Prologue}).  You must
9973 also write the code in @code{yylex} to obey the flag.
9974
9975 @node Tie-in Recovery
9976 @section Lexical Tie-ins and Error Recovery
9977
9978 Lexical tie-ins make strict demands on any error recovery rules you have.
9979 @xref{Error Recovery}.
9980
9981 The reason for this is that the purpose of an error recovery rule is to
9982 abort the parsing of one construct and resume in some larger construct.
9983 For example, in C-like languages, a typical error recovery rule is to skip
9984 tokens until the next semicolon, and then start a new statement, like this:
9985
9986 @example
9987 stmt:
9988   expr ';'
9989 | IF '(' expr ')' stmt @{ @dots{} @}
9990 @dots{}
9991 | error ';'  @{ hexflag = 0; @}
9992 ;
9993 @end example
9994
9995 If there is a syntax error in the middle of a @samp{hex (@var{expr})}
9996 construct, this error rule will apply, and then the action for the
9997 completed @samp{hex (@var{expr})} will never run.  So @code{hexflag} would
9998 remain set for the entire rest of the input, or until the next @code{hex}
9999 keyword, causing identifiers to be misinterpreted as integers.
10000
10001 To avoid this problem the error recovery rule itself clears @code{hexflag}.
10002
10003 There may also be an error recovery rule that works within expressions.
10004 For example, there could be a rule which applies within parentheses
10005 and skips to the close-parenthesis:
10006
10007 @example
10008 @group
10009 expr:
10010   @dots{}
10011 | '(' expr ')'   @{ $$ = $2; @}
10012 | '(' error ')'
10013 @dots{}
10014 @end group
10015 @end example
10016
10017 If this rule acts within the @code{hex} construct, it is not going to abort
10018 that construct (since it applies to an inner level of parentheses within
10019 the construct).  Therefore, it should not clear the flag: the rest of
10020 the @code{hex} construct should be parsed with the flag still in effect.
10021
10022 What if there is an error recovery rule which might abort out of the
10023 @code{hex} construct or might not, depending on circumstances?  There is no
10024 way you can write the action to determine whether a @code{hex} construct is
10025 being aborted or not.  So if you are using a lexical tie-in, you had better
10026 make sure your error recovery rules are not of this kind.  Each rule must
10027 be such that you can be sure that it always will, or always won't, have to
10028 clear the flag.
10029
10030 @c ================================================== Debugging Your Parser
10031
10032 @node Debugging
10033 @chapter Debugging Your Parser
10034
10035 Developing a parser can be a challenge, especially if you don't understand
10036 the algorithm (@pxref{Algorithm}).  This chapter explains how to understand
10037 and debug a parser.
10038
10039 The most frequent issue users face is solving their conflicts.  To fix them,
10040 the first step is understanding how they arise in a given grammar.  This is
10041 made much easier by automated generation of counterexamples, cover in the
10042 first section (@pxref{Counterexamples}).
10043
10044 In most cases though, looking at the structure of the automaton is still
10045 needed.  The following sections explain how to generate and read the
10046 detailed structural description of the automaton.  There are several formats
10047 available:
10048 @itemize @minus
10049 @item
10050 as text, see @ref{Understanding};
10051
10052 @item
10053 as a graph, see @ref{Graphviz};
10054
10055 @item
10056 or as a markup report that can be turned, for instance, into HTML, see
10057 @ref{Xml}.
10058 @end itemize
10059
10060 The last section focuses on the dynamic part of the parser: how to enable
10061 and understand the parser run-time traces (@pxref{Tracing}).
10062
10063 @menu
10064 * Counterexamples::   Understanding conflicts.
10065 * Understanding::     Understanding the structure of your parser.
10066 * Graphviz::          Getting a visual representation of the parser.
10067 * Xml::               Getting a markup representation of the parser.
10068 * Tracing::           Tracing the execution of your parser.
10069 @end menu
10070
10071 @node Counterexamples
10072 @section Generation of Counterexamples
10073 @cindex cex
10074 @cindex counterexamples
10075 @cindex conflict counterexamples
10076
10077 Solving conflicts is probably the most delicate part of the design of an LR
10078 parser, as demonstrated by the number of sections devoted to them in this
10079 very documentation.  To solve a conflict, one must understand it: when does
10080 it occur?  Is it because of a flaw in the grammar?  Is it rather because
10081 LR(1) cannot cope with this grammar?
10082
10083 One difficulty is that conflicts occur in the @emph{automaton}, and it can
10084 be tricky to relate them to issues in the @emph{grammar} itself.  With
10085 experience and patience, analysis of the detailed description of the
10086 automaton (@pxref{Understanding}) allows one to find example strings that
10087 reach these conflicts.
10088
10089 That task is made much easier thanks to the generation of counterexamples,
10090 initially developed by Chinawat Isradisaikul and Andrew Myers
10091 @pcite{Isradisaikul 2015}.
10092
10093 As a first example, see the grammar of @ref{Shift/Reduce}, which features
10094 one shift/reduce conflict:
10095
10096 @c see doc/else.y
10097 @example
10098 $ @kbd{bison else.y}
10099 else.y: @dwarning{warning}: 1 shift/reduce conflict [@dwarning{-Wconflicts-sr}]
10100 else.y: @dnotice{note}: rerun with option '-Wcounterexamples' to generate conflict counterexamples
10101 @end example
10102
10103 @noindent
10104 Let's rerun @command{bison} with the option
10105 @option{-Wcex}/@option{-Wcounterexamples}@inlinefmt{info, (the following
10106 output is actually in color)}:
10107
10108 @example
10109 else.y: @dwarning{warning}: 1 shift/reduce conflict [@dwarning{-Wconflicts-sr}]
10110 else.y: @dwarning{warning}: shift/reduce conflict on token "else" [@dwarning{-Wcounterexamples}]
10111 @danglingElseCex
10112 @end example
10113
10114 This shows two different derivations for one single expression, which proves
10115 that the grammar is ambiguous.
10116
10117 @sp 1
10118
10119 As a more delicate example, consider the example grammar of
10120 @ref{Reduce/Reduce}, which features a reduce/reduce conflict:
10121
10122 @c doc/sequence.y
10123 @example
10124 %%
10125 sequence:
10126   %empty
10127 | maybeword
10128 | sequence "word"
10129 ;
10130 maybeword:
10131   %empty
10132 | "word"
10133 ;
10134 @end example
10135
10136 Bison generates the following counterexamples:
10137
10138 @example
10139 @group
10140 $ @kbd{bison -Wcex sequence.y}
10141 sequence.y: @dwarning{warning}: 1 shift/reduce conflict [@dwarning{-Wconflicts-sr}]
10142 sequence.y: @dwarning{warning}: 2 reduce/reduce conflicts [@dwarning{-Wconflicts-rr}]
10143 @end group
10144 @ifnottex
10145 @group
10146 sequence.y: @dwarning{warning}: shift/reduce conflict on token "word" [@dwarning{-Wcounterexamples}]
10147   Example: @red{•} @green{"word"}
10148   Shift derivation
10149     @yellow{sequence}
10150     @yellow{↳ 2:} @green{maybeword}
10151           @green{↳ 5:} @red{•} @green{"word"}
10152   Example: @red{•} @yellow{"word"}
10153   Reduce derivation
10154     @yellow{sequence}
10155     @yellow{↳ 3:} @green{sequence} @yellow{"word"}
10156           @green{↳ 1:} @red{•}
10157 @end group
10158 @group
10159 sequence.y: @dwarning{warning}: reduce/reduce conflict on tokens $end, "word" [@dwarning{-Wcounterexamples}]
10160   Example: @red{•}
10161   First reduce derivation
10162     @yellow{sequence}
10163     @yellow{↳ 1:} @red{•}
10164   Example: @red{•}
10165   Second reduce derivation
10166     @yellow{sequence}
10167     @yellow{↳ 2:} @green{maybeword}
10168           @green{↳ 4:} @red{•}
10169 @end group
10170 @group
10171 sequence.y: @dwarning{warning}: shift/reduce conflict on token "word" [@dwarning{-Wcounterexamples}]
10172   Example: @red{•} @green{"word"}
10173   Shift derivation
10174     @yellow{sequence}
10175     @yellow{↳ 2:} @green{maybeword}
10176           @green{↳ 5:} @red{•} @green{"word"}
10177   Example: @red{•} @yellow{"word"}
10178   Reduce derivation
10179     @yellow{sequence}
10180     @yellow{↳ 3:} @green{sequence}        @yellow{"word"}
10181           @green{↳ 2:} @blue{maybeword}
10182                 @blue{↳ 4:} @red{•}
10183 @end group
10184 @group
10185 sequence.y:8.3-45: @dwarning{warning}: rule useless in parser due to conflicts [@dwarning{-Wother}]
10186     8 |   @dwarning{%empty    @{ printf ("empty maybeword\n"); @}}
10187       |   @dwarning{^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}
10188 @end group
10189 @end ifnottex
10190 @iftex
10191 @group
10192 sequence.y: @dwarning{warning}: shift/reduce conflict on token "word" [@dwarning{-Wcounterexamples}]
10193   Example: @red{•} @green{"word"}
10194   Shift derivation
10195     @yellow{sequence}
10196     @yellow{@arrow{} 2:} @green{maybeword}
10197           @green{@arrow{} 5:} @red{•} @green{"word"}
10198   Example: @red{•} @yellow{"word"}
10199   Reduce derivation
10200     @yellow{sequence}
10201     @yellow{@arrow{} 3:} @green{sequence} @yellow{"word"}
10202           @green{@arrow{} 1:} @red{•}
10203 @end group
10204 @group
10205 sequence.y: @dwarning{warning}: reduce/reduce conflict on tokens $end, "word" [@dwarning{-Wcounterexamples}]
10206   Example: @red{•}
10207   First reduce derivation
10208     @yellow{sequence}
10209     @yellow{@arrow{} 1:} @red{•}
10210   Example: @red{•}
10211   Second reduce derivation
10212     @yellow{sequence}
10213     @yellow{@arrow{} 2:} @green{maybeword}
10214           @green{@arrow{}: 4} @red{•}
10215 @end group
10216 @group
10217 sequence.y: @dwarning{warning}: shift/reduce conflict on token "word" [@dwarning{-Wcounterexamples}]
10218   Example: @red{•} @green{"word"}
10219   Shift derivation
10220     @yellow{sequence}
10221     @yellow{@arrow{} 2:} @green{maybeword}
10222           @green{@arrow{} 5:} @red{•} @green{"word"}
10223   Example: @red{•} @yellow{"word"}
10224   Reduce derivation
10225     @yellow{sequence}
10226     @yellow{@arrow{} 3:} @green{sequence}        @yellow{"word"}
10227           @green{@arrow{} 2:} @blue{maybeword}
10228                 @blue{@arrow{} 4:} @red{•}
10229 @end group
10230 @group
10231 sequence.y:8.3-45: @dwarning{warning}: rule useless in parser due to conflicts [@dwarning{-Wother}]
10232     8 |   @dwarning{%empty    @{ printf ("empty maybeword\n"); @}}
10233       |   @dwarning{^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}
10234 @end group
10235 @end iftex
10236 @end example
10237
10238 Each of these three conflicts, again, prove that the grammar is ambiguous.
10239 For instance, the second conflict (the reduce/reduce one) shows that the
10240 grammar accepts the empty input in two different ways.
10241
10242 @sp 1
10243
10244 Sometimes, the search will not find an example that can be derived in two
10245 ways.  In these cases, counterexample generation will provide two examples
10246 that are the same up until the dot.  Most notably, this will happen when
10247 your grammar requires a stronger parser (more lookahead, LR instead of
10248 LALR).  The following example isn't LR(1):
10249
10250 @c doc/ids.y
10251 @example
10252 %token ID
10253 %%
10254 s: a ID
10255 a: expr
10256 expr: %empty | expr ID ','
10257 @end example
10258
10259 @command{bison} reports:
10260
10261 @example
10262 ids.y: @dwarning{warning}: 1 shift/reduce conflict [@dwarning{-Wconflicts-sr}]
10263 ids.y: @dwarning{warning}: shift/reduce conflict on token ID [@dwarning{-Wcounterexamples}]
10264 @ifnottex
10265 @group
10266   First example: @purple{expr} @red{•} @purple{ID ','} @green{ID} @yellow{$end}
10267   Shift derivation
10268     @yellow{$accept}
10269     @yellow{↳ 0:} @green{s}                                 @yellow{$end}
10270          @green{↳ 1:} @blue{a}                        @green{ID}
10271               @blue{↳ 2:} @purple{expr}
10272                     @purple{↳ 4: expr} @red{•} @purple{ID ','}
10273   Second example: @blue{expr} @red{•} @green{ID} @yellow{$end}
10274   Reduce derivation
10275     @yellow{$accept}
10276     @yellow{↳ 0:} @green{s}                   @yellow{$end}
10277          @green{↳ 1:} @blue{a}           @green{ID}
10278               @blue{↳ 2: expr} @red{•}
10279 @end group
10280 @group
10281 ids.y:4.4-7: @dwarning{warning}: rule useless in parser due to conflicts [@dwarning{-Wother}]
10282     4 | a: expr
10283       |    ^~~~
10284 @end group
10285 @end ifnottex
10286 @iftex
10287 @group
10288   First example: @purple{expr} @red{•} @purple{ID ','} @green{ID} @yellow{$end}
10289   Shift derivation
10290     @yellow{$accept}
10291     @yellow{@arrow{} 0:} @green{s}                                   @yellow{$end}
10292           @green{@arrow{} 1:} @blue{a}                         @green{ID}
10293                 @blue{@arrow{} 2:} @purple{expr}
10294                       @purple{@arrow{} 4: expr} @red{•} @purple{ID ','}
10295   Second example: @blue{expr} @red{•} @green{ID} @yellow{$end}
10296   Reduce derivation
10297     @yellow{$accept}
10298     @yellow{@arrow{} 0:} @green{s}                     @yellow{$end}
10299           @green{@arrow{} 1:} @blue{a}            @green{ID}
10300                 @blue{@arrow{} 2: expr} @red{•}
10301 @end group
10302 @group
10303 ids.y:4.4-7: @dwarning{warning}: rule useless in parser due to conflicts [@dwarning{-Wother}]
10304     4 | a: expr
10305       |    ^~~~
10306 @end group
10307 @end iftex
10308 @end example
10309
10310 This conflict is caused by the parser not having enough information to know
10311 the difference between these two examples.  The parser would need an
10312 additional lookahead token to know whether or not a comma follows the
10313 @code{ID} after @code{expr}.  These types of conflicts tend to be more
10314 difficult to fix, and usually need a rework of the grammar.  In this case,
10315 it can be fixed by changing around the recursion: @code{expr: ID | ',' expr
10316 ID}.
10317
10318 Alternatively, you might also want to consider using a GLR parser
10319 (@pxref{GLR Parsers}).
10320
10321 @sp 1
10322
10323 On occasions, it is useful to look at counterexamples @emph{in situ}: with
10324 the automaton report (@xref{Understanding}, in particular @ref{state-8,,
10325 State 8}).
10326
10327 @node Understanding
10328 @section Understanding Your Parser
10329
10330 Bison parsers are @dfn{shift/reduce automata} (@pxref{Algorithm}).  In some
10331 cases (much more frequent than one would hope), looking at this automaton is
10332 required to tune or simply fix a parser.
10333
10334 The textual file is generated when the options @option{--report} or
10335 @option{--verbose} are specified, see @ref{Invocation}.  Its name is made by
10336 removing @samp{.tab.c} or @samp{.c} from the parser implementation file
10337 name, and adding @samp{.output} instead.  Therefore, if the grammar file is
10338 @file{foo.y}, then the parser implementation file is called @file{foo.tab.c}
10339 by default.  As a consequence, the verbose output file is called
10340 @file{foo.output}.
10341
10342 The following grammar file, @file{calc.y}, will be used in the sequel:
10343
10344 @c doc/calc.y
10345 @example
10346 @group
10347 %union
10348 @{
10349   int ival;
10350   const char *sval;
10351 @}
10352 @end group
10353 @group
10354 %token <ival> NUM
10355 %nterm <ival> exp
10356 @end group
10357 @group
10358 %token <sval> STR
10359 %nterm <sval> useless
10360 @end group
10361 @group
10362 %left '+' '-'
10363 %left '*'
10364 @end group
10365 %%
10366 @group
10367 exp:
10368   exp '+' exp
10369 | exp '-' exp
10370 | exp '*' exp
10371 | exp '/' exp
10372 | NUM
10373 ;
10374 @end group
10375 useless: STR;
10376 %%
10377 @end example
10378
10379 @command{bison} reports:
10380
10381 @smallexample
10382 calc.y: @dwarning{warning}: 1 nonterminal useless in grammar [@dwarning{-Wother}]
10383 calc.y: @dwarning{warning}: 1 rule useless in grammar [@dwarning{-Wother}]
10384 calc.y:19.1-7: @dwarning{warning}: nonterminal useless in grammar: useless [@dwarning{-Wother}]
10385    19 | @dwarning{useless: STR;}
10386       | @dwarning{^~~~~~~}
10387 calc.y: @dwarning{warning}: 7 shift/reduce conflicts [@dwarning{-Wconflicts-sr}]
10388 calc.y: @dnotice{note}: rerun with option '-Wcounterexamples' to generate conflict counterexamples
10389 @end smallexample
10390
10391 Going back to the calc example, when given @option{--report=state},
10392 in addition to @file{calc.tab.c}, it creates a file @file{calc.output}
10393 with contents detailed below.  The order of the output and the exact
10394 presentation might vary, but the interpretation is the same.
10395
10396 @noindent
10397 @cindex token, useless
10398 @cindex useless token
10399 @cindex nonterminal, useless
10400 @cindex useless nonterminal
10401 @cindex rule, useless
10402 @cindex useless rule
10403 The first section reports useless tokens, nonterminals and rules.  Useless
10404 nonterminals and rules are removed in order to produce a smaller parser, but
10405 useless tokens are preserved, since they might be used by the scanner (note
10406 the difference between ``useless'' and ``unused'' below):
10407
10408 @example
10409 Nonterminals useless in grammar
10410    useless
10411
10412 Terminals unused in grammar
10413    STR
10414
10415 Rules useless in grammar
10416     6 useless: STR
10417 @end example
10418
10419 @noindent
10420 The next section lists states that still have conflicts.
10421
10422 @example
10423 State 8 conflicts: 1 shift/reduce
10424 State 9 conflicts: 1 shift/reduce
10425 State 10 conflicts: 1 shift/reduce
10426 State 11 conflicts: 4 shift/reduce
10427 @end example
10428
10429 @noindent
10430 Then Bison reproduces the exact grammar it used:
10431
10432 @example
10433 Grammar
10434
10435     0 $accept: exp $end
10436
10437     1 exp: exp '+' exp
10438     2    | exp '-' exp
10439     3    | exp '*' exp
10440     4    | exp '/' exp
10441     5    | NUM
10442 @end example
10443
10444 @noindent
10445 and reports the uses of the symbols:
10446
10447 @example
10448 @group
10449 Terminals, with rules where they appear
10450
10451     $end (0) 0
10452     '*' (42) 3
10453     '+' (43) 1
10454     '-' (45) 2
10455     '/' (47) 4
10456     error (256)
10457     NUM <ival> (258) 5
10458     STR <sval> (259)
10459 @end group
10460
10461 @group
10462 Nonterminals, with rules where they appear
10463
10464     $accept (9)
10465         on left: 0
10466     exp <ival> (10)
10467         on left: 1 2 3 4 5
10468         on right: 0 1 2 3 4
10469 @end group
10470 @end example
10471
10472 @noindent
10473 @cindex item
10474 @cindex dotted rule
10475 @cindex rule, dotted
10476 Bison then proceeds onto the automaton itself, describing each state with
10477 its set of @dfn{items}, also known as @dfn{dotted rules}.  Each item is a
10478 production rule together with a point (@samp{.}) marking the location of the
10479 input cursor.
10480
10481 @example
10482 State 0
10483
10484     0 $accept: • exp $end
10485
10486     NUM  shift, and go to state 1
10487
10488     exp  go to state 2
10489 @end example
10490
10491 This reads as follows: ``state 0 corresponds to being at the very
10492 beginning of the parsing, in the initial rule, right before the start
10493 symbol (here, @code{exp}).  When the parser returns to this state right
10494 after having reduced a rule that produced an @code{exp}, the control
10495 flow jumps to state 2.  If there is no such transition on a nonterminal
10496 symbol, and the lookahead is a @code{NUM}, then this token is shifted onto
10497 the parse stack, and the control flow jumps to state 1.  Any other
10498 lookahead triggers a syntax error.''
10499
10500 @cindex core, item set
10501 @cindex item set core
10502 @cindex kernel, item set
10503 @cindex item set core
10504 Even though the only active rule in state 0 seems to be rule 0, the
10505 report lists @code{NUM} as a lookahead token because @code{NUM} can be
10506 at the beginning of any rule deriving an @code{exp}.  By default Bison
10507 reports the so-called @dfn{core} or @dfn{kernel} of the item set, but if
10508 you want to see more detail you can invoke @command{bison} with
10509 @option{--report=itemset} to list the derived items as well:
10510
10511 @example
10512 State 0
10513
10514     0 $accept: • exp $end
10515     1 exp: • exp '+' exp
10516     2    | • exp '-' exp
10517     3    | • exp '*' exp
10518     4    | • exp '/' exp
10519     5    | • NUM
10520
10521     NUM  shift, and go to state 1
10522
10523     exp  go to state 2
10524 @end example
10525
10526 @noindent
10527 In the state 1@dots{}
10528
10529 @example
10530 State 1
10531
10532     5 exp: NUM •
10533
10534     $default  reduce using rule 5 (exp)
10535 @end example
10536
10537 @noindent
10538 the rule 5, @samp{exp: NUM;}, is completed.  Whatever the lookahead token
10539 (@samp{$default}), the parser will reduce it.  If it was coming from State
10540 0, then, after this reduction it will return to state 0, and will jump to
10541 state 2 (@samp{exp: go to state 2}).
10542
10543 @example
10544 State 2
10545
10546     0 $accept: exp • $end
10547     1 exp: exp • '+' exp
10548     2    | exp • '-' exp
10549     3    | exp • '*' exp
10550     4    | exp • '/' exp
10551
10552     $end  shift, and go to state 3
10553     '+'   shift, and go to state 4
10554     '-'   shift, and go to state 5
10555     '*'   shift, and go to state 6
10556     '/'   shift, and go to state 7
10557 @end example
10558
10559 @noindent
10560 In state 2, the automaton can only shift a symbol.  For instance, because of
10561 the item @samp{exp: exp • '+' exp}, if the lookahead is @samp{+} it is
10562 shifted onto the parse stack, and the automaton jumps to state 4,
10563 corresponding to the item @samp{exp: exp '+' • exp}.  Since there is no
10564 default action, any lookahead not listed triggers a syntax error.
10565
10566 @cindex accepting state
10567 The state 3 is named the @dfn{final state}, or the @dfn{accepting
10568 state}:
10569
10570 @example
10571 State 3
10572
10573     0 $accept: exp $end •
10574
10575     $default  accept
10576 @end example
10577
10578 @noindent
10579 the initial rule is completed (the start symbol and the end-of-input were
10580 read), the parsing exits successfully.
10581
10582 The interpretation of states 4 to 7 is straightforward, and is left to
10583 the reader.
10584
10585 @example
10586 State 4
10587
10588     1 exp: exp '+' • exp
10589
10590     NUM  shift, and go to state 1
10591
10592     exp  go to state 8
10593
10594
10595 State 5
10596
10597     2 exp: exp '-' • exp
10598
10599     NUM  shift, and go to state 1
10600
10601     exp  go to state 9
10602
10603
10604 State 6
10605
10606     3 exp: exp '*' • exp
10607
10608     NUM  shift, and go to state 1
10609
10610     exp  go to state 10
10611
10612
10613 State 7
10614
10615     4 exp: exp '/' • exp
10616
10617     NUM  shift, and go to state 1
10618
10619     exp  go to state 11
10620 @end example
10621
10622 @anchor{state-8}
10623 As was announced in beginning of the report, @samp{State 8 conflicts:
10624 1 shift/reduce}:
10625
10626 @example
10627 State 8
10628
10629     1 exp: exp • '+' exp
10630     1    | exp '+' exp •
10631     2    | exp • '-' exp
10632     3    | exp • '*' exp
10633     4    | exp • '/' exp
10634
10635     '*'  shift, and go to state 6
10636     '/'  shift, and go to state 7
10637
10638     '/'       [reduce using rule 1 (exp)]
10639     $default  reduce using rule 1 (exp)
10640 @end example
10641
10642 Indeed, there are two actions associated to the lookahead @samp{/}:
10643 either shifting (and going to state 7), or reducing rule 1.  The
10644 conflict means that either the grammar is ambiguous, or the parser lacks
10645 information to make the right decision.  Indeed the grammar is
10646 ambiguous, as, since we did not specify the precedence of @samp{/}, the
10647 sentence @samp{NUM + NUM / NUM} can be parsed as @samp{NUM + (NUM /
10648 NUM)}, which corresponds to shifting @samp{/}, or as @samp{(NUM + NUM) /
10649 NUM}, which corresponds to reducing rule 1.
10650
10651 Because in deterministic parsing a single decision can be made, Bison
10652 arbitrarily chose to disable the reduction, see @ref{Shift/Reduce}.
10653 Discarded actions are reported between square brackets.
10654
10655 Note that all the previous states had a single possible action: either
10656 shifting the next token and going to the corresponding state, or
10657 reducing a single rule.  In the other cases, i.e., when shifting
10658 @emph{and} reducing is possible or when @emph{several} reductions are
10659 possible, the lookahead is required to select the action.  State 8 is
10660 one such state: if the lookahead is @samp{*} or @samp{/} then the action
10661 is shifting, otherwise the action is reducing rule 1.  In other words,
10662 the first two items, corresponding to rule 1, are not eligible when the
10663 lookahead token is @samp{*}, since we specified that @samp{*} has higher
10664 precedence than @samp{+}.  More generally, some items are eligible only
10665 with some set of possible lookahead tokens.  When run with
10666 @option{--report=lookahead}, Bison specifies these lookahead tokens:
10667
10668 @example
10669 State 8
10670
10671     1 exp: exp • '+' exp
10672     1    | exp '+' exp •  [$end, '+', '-', '/']
10673     2    | exp • '-' exp
10674     3    | exp • '*' exp
10675     4    | exp • '/' exp
10676
10677     '*'  shift, and go to state 6
10678     '/'  shift, and go to state 7
10679
10680     '/'       [reduce using rule 1 (exp)]
10681     $default  reduce using rule 1 (exp)
10682 @end example
10683
10684 Note however that while @samp{NUM + NUM / NUM} is ambiguous (which results in
10685 the conflicts on @samp{/}), @samp{NUM + NUM * NUM} is not: the conflict was
10686 solved thanks to associativity and precedence directives.  If invoked with
10687 @option{--report=solved}, Bison includes information about the solved
10688 conflicts in the report:
10689
10690 @example
10691 Conflict between rule 1 and token '+' resolved as reduce (%left '+').
10692 Conflict between rule 1 and token '-' resolved as reduce (%left '-').
10693 Conflict between rule 1 and token '*' resolved as shift ('+' < '*').
10694 @end example
10695
10696 When given @option{--report=counterexamples}, @command{bison} will generate
10697 counterexamples within the report, augmented with the corresponding items
10698 (@pxref{Counterexamples}).
10699
10700 @ifnottex
10701 @example
10702 shift/reduce conflict on token '/':
10703     1 exp: exp '+' exp •
10704     4 exp: exp • '/' exp
10705 @group
10706   Example: exp '+' exp • '/' exp
10707   Shift derivation
10708     exp
10709     ↳ 1: exp '+' exp
10710                  ↳ 4: exp • '/' exp
10711   Example: exp '+' exp • '/' exp
10712   Reduce derivation
10713     exp
10714     ↳ 4: exp                 '/' exp
10715          ↳ 1: exp '+' exp •
10716 @end group
10717 @end example
10718 @end ifnottex
10719 @iftex
10720 @example
10721 shift/reduce conflict on token '/':
10722     1 exp: exp '+' exp •
10723     4 exp: exp • '/' exp
10724 @group
10725   Example: exp '+' exp • '/' exp
10726   Shift derivation
10727     exp
10728     @arrow{} 1: exp '+' exp
10729                   @arrow{} 4: exp • '/' exp
10730   Example: exp '+' exp • '/' exp
10731   Reduce derivation
10732     exp
10733     @arrow{} 4: exp                '/' exp
10734           @arrow{} 1: exp '+' exp •
10735 @end group
10736 @end example
10737 @end iftex
10738
10739 This shows two separate derivations in the grammar for the same @code{exp}:
10740 @samp{e1 + e2 / e3}.  The derivations show how your rules would parse the
10741 given example. Here, the first derivation completes a reduction when seeing
10742 @samp{/}, causing @samp{e1 + e2} to be grouped as an @code{exp}. The second
10743 derivation shifts on @samp{/}, resulting in @samp{e2 / e3} being grouped as
10744 an @code{exp}.  Therefore, it is easy to see that adding
10745 precedence/associativity directives would fix this conflict.
10746
10747 The remaining states are similar:
10748
10749 @example
10750 @group
10751 State 9
10752
10753     1 exp: exp • '+' exp
10754     2    | exp • '-' exp
10755     2    | exp '-' exp •
10756     3    | exp • '*' exp
10757     4    | exp • '/' exp
10758
10759     '*'  shift, and go to state 6
10760     '/'  shift, and go to state 7
10761
10762     '/'       [reduce using rule 2 (exp)]
10763     $default  reduce using rule 2 (exp)
10764 @end group
10765
10766 @group
10767 State 10
10768
10769     1 exp: exp • '+' exp
10770     2    | exp • '-' exp
10771     3    | exp • '*' exp
10772     3    | exp '*' exp •
10773     4    | exp • '/' exp
10774
10775     '/'  shift, and go to state 7
10776
10777     '/'       [reduce using rule 3 (exp)]
10778     $default  reduce using rule 3 (exp)
10779 @end group
10780
10781 @group
10782 State 11
10783
10784     1 exp: exp • '+' exp
10785     2    | exp • '-' exp
10786     3    | exp • '*' exp
10787     4    | exp • '/' exp
10788     4    | exp '/' exp •
10789
10790     '+'  shift, and go to state 4
10791     '-'  shift, and go to state 5
10792     '*'  shift, and go to state 6
10793     '/'  shift, and go to state 7
10794
10795     '+'       [reduce using rule 4 (exp)]
10796     '-'       [reduce using rule 4 (exp)]
10797     '*'       [reduce using rule 4 (exp)]
10798     '/'       [reduce using rule 4 (exp)]
10799     $default  reduce using rule 4 (exp)
10800 @end group
10801 @end example
10802
10803 @noindent
10804 Observe that state 11 contains conflicts not only due to the lack of
10805 precedence of @samp{/} with respect to @samp{+}, @samp{-}, and @samp{*}, but
10806 also because the associativity of @samp{/} is not specified.
10807
10808 Bison may also produce an HTML version of this output, via an XML file and
10809 XSLT processing (@pxref{Xml}).
10810
10811 @c ================================================= Graphical Representation
10812
10813 @node Graphviz
10814 @section Visualizing Your Parser
10815 @cindex dot
10816
10817 As another means to gain better understanding of the shift/reduce
10818 automaton corresponding to the Bison parser, a DOT file can be generated. Note
10819 that debugging a real grammar with this is tedious at best, and impractical
10820 most of the times, because the generated files are huge (the generation of
10821 a PDF or PNG file from it will take very long, and more often than not it will
10822 fail due to memory exhaustion). This option was rather designed for beginners,
10823 to help them understand LR parsers.
10824
10825 This file is generated when the @option{--graph} option is specified
10826 (@pxref{Invocation}).  Its name is made by removing
10827 @samp{.tab.c} or @samp{.c} from the parser implementation file name, and
10828 adding @samp{.gv} instead.  If the grammar file is @file{foo.y}, the
10829 Graphviz output file is called @file{foo.gv}.  A DOT file may also be
10830 produced via an XML file and XSLT processing (@pxref{Xml}).
10831
10832
10833 The following grammar file, @file{rr.y}, will be used in the sequel:
10834
10835 @example
10836 %%
10837 @group
10838 exp: a ";" | b ".";
10839 a: "0";
10840 b: "0";
10841 @end group
10842 @end example
10843
10844 The graphical output
10845 @ifnotinfo
10846 (see @ref{fig:graph})
10847 @end ifnotinfo
10848 is very similar to the textual one, and as such it is easier understood by
10849 making direct comparisons between them.  @xref{Debugging}, for a detailed
10850 analysis of the textual report.
10851
10852 @ifnotinfo
10853 @float Figure,fig:graph
10854 @center @image{figs/example, 430pt,,,.svg}
10855 @caption{A graphical rendering of the parser.}
10856 @end float
10857 @end ifnotinfo
10858
10859 @subheading Graphical Representation of States
10860
10861 The items (dotted rules) for each state are grouped together in graph nodes.
10862 Their numbering is the same as in the verbose file. See the following
10863 points, about transitions, for examples
10864
10865 When invoked with @option{--report=lookaheads}, the lookahead tokens, when
10866 needed, are shown next to the relevant rule between square brackets as a
10867 comma separated list. This is the case in the figure for the representation of
10868 reductions, below.
10869
10870 @sp 1
10871
10872 The transitions are represented as directed edges between the current and
10873 the target states.
10874
10875 @subheading Graphical Representation of Shifts
10876
10877 Shifts are shown as solid arrows, labeled with the lookahead token for that
10878 shift. The following describes a reduction in the @file{rr.output} file:
10879
10880 @example
10881 @group
10882 State 3
10883
10884     1 exp: a • ";"
10885
10886     ";"  shift, and go to state 6
10887 @end group
10888 @end example
10889
10890 A Graphviz rendering of this portion of the graph could be:
10891
10892 @center @image{figs/example-shift, 100pt,,,.svg}
10893
10894 @subheading Graphical Representation of Reductions
10895
10896 Reductions are shown as solid arrows, leading to a diamond-shaped node
10897 bearing the number of the reduction rule. The arrow is labeled with the
10898 appropriate comma separated lookahead tokens. If the reduction is the default
10899 action for the given state, there is no such label.
10900
10901 This is how reductions are represented in the verbose file @file{rr.output}:
10902 @example
10903 State 1
10904
10905     3 a: "0" •  [";"]
10906     4 b: "0" •  ["."]
10907
10908     "."       reduce using rule 4 (b)
10909     $default  reduce using rule 3 (a)
10910 @end example
10911
10912 A Graphviz rendering of this portion of the graph could be:
10913
10914 @center @image{figs/example-reduce, 120pt,,,.svg}
10915
10916 When unresolved conflicts are present, because in deterministic parsing
10917 a single decision can be made, Bison can arbitrarily choose to disable a
10918 reduction, see @ref{Shift/Reduce}.  Discarded actions
10919 are distinguished by a red filling color on these nodes, just like how they are
10920 reported between square brackets in the verbose file.
10921
10922 The reduction corresponding to the rule number 0 is the acceptation
10923 state. It is shown as a blue diamond, labeled ``Acc''.
10924
10925 @subheading Graphical Representation of Gotos
10926
10927 The @samp{go to} jump transitions are represented as dotted lines bearing
10928 the name of the rule being jumped to.
10929
10930 @c ================================================= XML
10931
10932 @node Xml
10933 @section Visualizing your parser in multiple formats
10934 @cindex xml
10935
10936 Bison supports two major report formats: textual output
10937 (@pxref{Understanding}) when invoked
10938 with option @option{--verbose}, and DOT
10939 (@pxref{Graphviz}) when invoked with
10940 option @option{--graph}. However,
10941 another alternative is to output an XML file that may then be, with
10942 @command{xsltproc}, rendered as either a raw text format equivalent to the
10943 verbose file, or as an HTML version of the same file, with clickable
10944 transitions, or even as a DOT. The @file{.output} and DOT files obtained via
10945 XSLT have no difference whatsoever with those obtained by invoking
10946 @command{bison} with options @option{--verbose} or @option{--graph}.
10947
10948 The XML file is generated when the options @option{-x} or
10949 @option{--xml[=FILE]} are specified, see @ref{Invocation}.
10950 If not specified, its name is made by removing @samp{.tab.c} or @samp{.c}
10951 from the parser implementation file name, and adding @samp{.xml} instead.
10952 For instance, if the grammar file is @file{foo.y}, the default XML output
10953 file is @file{foo.xml}.
10954
10955 Bison ships with a @file{data/xslt} directory, containing XSL Transformation
10956 files to apply to the XML file. Their names are non-ambiguous:
10957
10958 @table @file
10959 @item xml2dot.xsl
10960 Used to output a copy of the DOT visualization of the automaton.
10961 @item xml2text.xsl
10962 Used to output a copy of the @samp{.output} file.
10963 @item xml2xhtml.xsl
10964 Used to output an xhtml enhancement of the @samp{.output} file.
10965 @end table
10966
10967 Sample usage (requires @command{xsltproc}):
10968 @example
10969 $ @kbd{bison -x gr.y}
10970 @group
10971 $ @kbd{bison --print-datadir}
10972 /usr/local/share/bison
10973 @end group
10974 $ @kbd{xsltproc /usr/local/share/bison/xslt/xml2xhtml.xsl gr.xml >gr.html}
10975 @end example
10976
10977 @c ================================================= Tracing
10978
10979 @node Tracing
10980 @section Tracing Your Parser
10981 @findex yydebug
10982 @cindex debugging
10983 @cindex tracing the parser
10984
10985 When a Bison grammar compiles properly but parses ``incorrectly'', the
10986 @code{yydebug} parser-trace feature helps figuring out why.
10987
10988 @menu
10989 * Enabling Traces::    Activating run-time trace support
10990 * Mfcalc Traces::      Extending @code{mfcalc} to support traces
10991 @end menu
10992
10993 @node Enabling Traces
10994 @subsection  Enabling Traces
10995 There are several means to enable compilation of trace facilities, in
10996 decreasing order of preference:
10997
10998 @table @asis
10999 @item the variable @samp{parse.trace}
11000 @findex %define parse.trace
11001 Add the @samp{%define parse.trace} directive (@pxref{%define
11002 Summary}), or pass the @option{-Dparse.trace} option
11003 (@pxref{Tuning the Parser}).  This is a Bison extension.  Unless POSIX and
11004 Yacc portability matter to you, this is the preferred solution.
11005
11006 @item the option @option{-t} (POSIX Yacc compliant)
11007 @itemx the option @option{--debug} (Bison extension)
11008 Use the @option{-t} option when you run Bison (@pxref{Invocation}).  With
11009 @samp{%define api.prefix @{c@}}, it defines @code{CDEBUG} to 1, otherwise it
11010 defines @code{YYDEBUG} to 1.
11011
11012 @item the directive @samp{%debug} (deprecated)
11013 @findex %debug
11014 Add the @code{%debug} directive (@pxref{Decl Summary}).  This Bison
11015 extension is maintained for backward compatibility; use @code{%define
11016 parse.trace} instead.
11017
11018 @item the macro @code{YYDEBUG} (C/C++ only)
11019 @findex YYDEBUG
11020 Define the macro @code{YYDEBUG} to a nonzero value when you compile the
11021 parser.  This is compliant with POSIX Yacc.  You could use
11022 @option{-DYYDEBUG=1} as a compiler option or you could put @samp{#define
11023 YYDEBUG 1} in the prologue of the grammar file (@pxref{Prologue}).
11024
11025 If the @code{%define} variable @code{api.prefix} is used (@pxref{Multiple
11026 Parsers}), for instance @samp{%define
11027 api.prefix @{c@}}, then if @code{CDEBUG} is defined, its value controls the
11028 tracing feature (enabled if and only if nonzero); otherwise tracing is
11029 enabled if and only if @code{YYDEBUG} is nonzero.
11030
11031 In C++, where POSIX compliance makes no sense, avoid this option, and prefer
11032 @samp{%define parse.trace}.  If you @code{#define} the @code{YYDEBUG} macro
11033 at the wrong place (e.g., in @samp{%code top} instead of @samp{%code
11034 require}), the parser class will have two different definitions, thus
11035 leading to ODR violations and happy debugging times.
11036 @end table
11037
11038 We suggest that you always enable the trace option so that debugging is
11039 always possible.
11040
11041 @findex YYFPRINTF
11042 In C the trace facility outputs messages with macro calls of the form
11043 @code{YYFPRINTF (stderr, @var{format}, @var{args})} where @var{format} and
11044 @var{args} are the usual @code{printf} format and variadic arguments.  If
11045 you define @code{YYDEBUG} to a nonzero value but do not define
11046 @code{YYFPRINTF}, @code{<stdio.h>} is automatically included and
11047 @code{YYFPRINTF} is defined to @code{fprintf}.
11048
11049 Once you have compiled the program with trace facilities, the way to request
11050 a trace is to store a nonzero value in the variable @code{yydebug}.  You can
11051 do this by making the C code do it (in @code{main}, perhaps), or you can
11052 alter the value with a C debugger.
11053
11054 Each step taken by the parser when @code{yydebug} is nonzero produces a line
11055 or two of trace information, written on @code{stderr}.  The trace messages
11056 tell you these things:
11057
11058 @itemize @bullet
11059 @item
11060 Each time the parser calls @code{yylex}, what kind of token was read.
11061
11062 @item
11063 Each time a token is shifted, the depth and complete contents of the state
11064 stack (@pxref{Parser States}).
11065
11066 @item
11067 Each time a rule is reduced, which rule it is, and the complete contents of
11068 the state stack afterward.
11069 @end itemize
11070
11071 To make sense of this information, it helps to refer to the automaton
11072 description file (@pxref{Understanding}).  This
11073 file shows the meaning of each state in terms of positions in various rules,
11074 and also what each state will do with each possible input token.  As you
11075 read the successive trace messages, you can see that the parser is
11076 functioning according to its specification in the listing file.  Eventually
11077 you will arrive at the place where something undesirable happens, and you
11078 will see which parts of the grammar are to blame.
11079
11080 The parser implementation file is a C/C++/D/Java program and you can use
11081 debuggers on it, but it's not easy to interpret what it is doing.  The
11082 parser function is a finite-state machine interpreter, and aside from the
11083 actions it executes the same code over and over.  Only the values of
11084 variables show where in the grammar it is working.
11085
11086 @node Mfcalc Traces
11087 @subsection Enabling Debug Traces for @code{mfcalc}
11088
11089 The debugging information normally gives the token kind of each token read,
11090 but not its semantic value.  The @code{%printer} directive allows specify
11091 how semantic values are reported, see @ref{Printer Decl}.
11092
11093 As a demonstration of @code{%printer}, consider the multi-function
11094 calculator, @code{mfcalc} (@pxref{Multi-function Calc}).  To enable run-time
11095 traces, and semantic value reports, insert the following directives in its
11096 prologue:
11097
11098 @comment file: c/mfcalc/mfcalc.y: 2
11099 @example
11100 /* Generate the parser description file. */
11101 %verbose
11102 /* Enable run-time traces (yydebug). */
11103 %define parse.trace
11104
11105 /* Formatting semantic values. */
11106 %printer @{ fprintf (yyo, "%s", $$->name); @} VAR;
11107 %printer @{ fprintf (yyo, "%s()", $$->name); @} FUN;
11108 %printer @{ fprintf (yyo, "%g", $$); @} <double>;
11109 @end example
11110
11111 The @code{%define} directive instructs Bison to generate run-time trace
11112 support.  Then, activation of these traces is controlled at run-time by the
11113 @code{yydebug} variable, which is disabled by default.  Because these traces
11114 will refer to the ``states'' of the parser, it is helpful to ask for the
11115 creation of a description of that parser; this is the purpose of (admittedly
11116 ill-named) @code{%verbose} directive.
11117
11118 The set of @code{%printer} directives demonstrates how to format the
11119 semantic value in the traces.  Note that the specification can be done
11120 either on the symbol type (e.g., @code{VAR} or @code{FUN}), or on the type
11121 tag: since @code{<double>} is the type for both @code{NUM} and @code{exp},
11122 this printer will be used for them.
11123
11124 Here is a sample of the information provided by run-time traces.  The traces
11125 are sent onto standard error.
11126
11127 @example
11128 $ @kbd{echo 'sin(1-1)' | ./mfcalc -p}
11129 Starting parse
11130 Entering state 0
11131 Reducing stack by rule 1 (line 34):
11132 -> $$ = nterm input ()
11133 Stack now 0
11134 Entering state 1
11135 @end example
11136
11137 @noindent
11138 This first batch shows a specific feature of this grammar: the first rule
11139 (which is in line 34 of @file{mfcalc.y} can be reduced without even having
11140 to look for the first token.  The resulting left-hand symbol (@code{$$}) is
11141 a valueless (@samp{()}) @code{input} nonterminal (@code{nterm}).
11142
11143 Then the parser calls the scanner.
11144 @example
11145 Reading a token
11146 Next token is token FUN (sin())
11147 Shifting token FUN (sin())
11148 Entering state 6
11149 @end example
11150
11151 @noindent
11152 That token (@code{token}) is a function (@code{FUN}) whose value is
11153 @samp{sin} as formatted per our @code{%printer} specification: @samp{sin()}.
11154 The parser stores (@code{Shifting}) that token, and others, until it can do
11155 something about it.
11156
11157 @example
11158 Reading a token
11159 Next token is token '(' ()
11160 Shifting token '(' ()
11161 Entering state 14
11162 Reading a token
11163 Next token is token NUM (1.000000)
11164 Shifting token NUM (1.000000)
11165 Entering state 4
11166 Reducing stack by rule 6 (line 44):
11167    $1 = token NUM (1.000000)
11168 -> $$ = nterm exp (1.000000)
11169 Stack now 0 1 6 14
11170 Entering state 24
11171 @end example
11172
11173 @noindent
11174 The previous reduction demonstrates the @code{%printer} directive for
11175 @code{<double>}: both the token @code{NUM} and the resulting nonterminal
11176 @code{exp} have @samp{1} as value.
11177
11178 @example
11179 Reading a token
11180 Next token is token '-' ()
11181 Shifting token '-' ()
11182 Entering state 17
11183 Reading a token
11184 Next token is token NUM (1.000000)
11185 Shifting token NUM (1.000000)
11186 Entering state 4
11187 Reducing stack by rule 6 (line 44):
11188    $1 = token NUM (1.000000)
11189 -> $$ = nterm exp (1.000000)
11190 Stack now 0 1 6 14 24 17
11191 Entering state 26
11192 Reading a token
11193 Next token is token ')' ()
11194 Reducing stack by rule 11 (line 49):
11195    $1 = nterm exp (1.000000)
11196    $2 = token '-' ()
11197    $3 = nterm exp (1.000000)
11198 -> $$ = nterm exp (0.000000)
11199 Stack now 0 1 6 14
11200 Entering state 24
11201 @end example
11202
11203 @noindent
11204 The rule for the subtraction was just reduced.  The parser is about to
11205 discover the end of the call to @code{sin}.
11206
11207 @example
11208 Next token is token ')' ()
11209 Shifting token ')' ()
11210 Entering state 31
11211 Reducing stack by rule 9 (line 47):
11212    $1 = token FUN (sin())
11213    $2 = token '(' ()
11214    $3 = nterm exp (0.000000)
11215    $4 = token ')' ()
11216 -> $$ = nterm exp (0.000000)
11217 Stack now 0 1
11218 Entering state 11
11219 @end example
11220
11221 @noindent
11222 Finally, the end-of-line allow the parser to complete the computation, and
11223 display its result.
11224
11225 @example
11226 Reading a token
11227 Next token is token '\n' ()
11228 Shifting token '\n' ()
11229 Entering state 22
11230 Reducing stack by rule 4 (line 40):
11231    $1 = nterm exp (0.000000)
11232    $2 = token '\n' ()
11233 @result{} 0
11234 -> $$ = nterm line ()
11235 Stack now 0 1
11236 Entering state 10
11237 Reducing stack by rule 2 (line 35):
11238    $1 = nterm input ()
11239    $2 = nterm line ()
11240 -> $$ = nterm input ()
11241 Stack now 0
11242 Entering state 1
11243 @end example
11244
11245 The parser has returned into state 1, in which it is waiting for the next
11246 expression to evaluate, or for the end-of-file token, which causes the
11247 completion of the parsing.
11248
11249 @example
11250 Reading a token
11251 Now at end of input.
11252 Shifting token $end ()
11253 Entering state 2
11254 Stack now 0 1 2
11255 Cleanup: popping token $end ()
11256 Cleanup: popping nterm input ()
11257 @end example
11258
11259
11260 @c ================================================= Invoking Bison
11261
11262 @node Invocation
11263 @chapter Invoking Bison
11264 @cindex invoking Bison
11265 @cindex Bison invocation
11266 @cindex options for invoking Bison
11267
11268 The usual way to invoke Bison is as follows:
11269
11270 @example
11271 $ @kbd{bison @var{file}}
11272 @end example
11273
11274 Here @var{file} is the grammar file name, which usually ends in @samp{.y}.
11275 The parser implementation file's name is made by replacing the @samp{.y}
11276 with @samp{.tab.c} and removing any leading directory.  Thus, the
11277 @samp{bison foo.y} file name yields @file{foo.tab.c}, and the @samp{bison
11278 hack/foo.y} file name yields @file{foo.tab.c}.  It's also possible, in case
11279 you are writing C++ code instead of C in your grammar file, to name it
11280 @file{foo.ypp} or @file{foo.y++}.  Then, the output files will take an
11281 extension like the given one as input (respectively @file{foo.tab.cpp} and
11282 @file{foo.tab.c++}).  This feature takes effect with all options that
11283 manipulate file names like @option{-o} or @option{-d}.
11284
11285 For example:
11286
11287 @example
11288 $ @kbd{bison -d @var{file.yxx}}
11289 @end example
11290 @noindent
11291 will produce @file{file.tab.cxx} and @file{file.tab.hxx}, and
11292
11293 @example
11294 $ @kbd{bison -d -o @var{output.c++} @var{file.y}}
11295 @end example
11296 @noindent
11297 will produce @file{output.c++} and @file{output.h++}.
11298
11299 For compatibility with POSIX, the standard Bison distribution also contains
11300 a shell script called @command{yacc} that invokes Bison with the @option{-y}
11301 option.
11302
11303 @sp 1
11304
11305 The exit status of @command{bison} is:
11306 @table @asis
11307 @item 0 (success)
11308 when there were no errors.  Warnings, which are diagnostics about dubious
11309 constructs, do not change the exit status, unless they are turned into
11310 errors (@pxref{Werror,,@option{-Werror}}).
11311
11312 @item 1 (failure)
11313 when there were errors.  No file was generated (except the reports generated
11314 by @option{--verbose}, etc.).  In particular, the output files that possibly
11315 existed were not changed.
11316
11317 @item 63 (mismatch)
11318 when @command{bison} does not meet the version requirements of the grammar
11319 file. @xref{Require Decl}.  No file was generated or changed.
11320 @end table
11321
11322
11323 @menu
11324 * Bison Options::     All the options described in detail,
11325                         in alphabetical order by short options.
11326 * Option Cross Key::  Alphabetical list of long options.
11327 * Yacc Library::      Yacc-compatible @code{yylex} and @code{main}.
11328 @end menu
11329
11330 @node Bison Options
11331 @section Bison Options
11332
11333 Bison supports both traditional single-letter options and mnemonic long
11334 option names.  Long option names are indicated with @option{--} instead of
11335 @option{-}.  Abbreviations for option names are allowed as long as they
11336 are unique.  When a long option takes an argument, like
11337 @option{--file-prefix}, connect the option name and the argument with
11338 @samp{=}.
11339
11340 Here is a list of options that can be used with Bison.  It is followed by a
11341 cross key alphabetized by long option.
11342
11343 @menu
11344 * Operation Modes::    Options controlling the global behavior of @command{bison}
11345 * Diagnostics::        Options controlling the diagnostics
11346 * Tuning the Parser::  Options changing the generated parsers
11347 * Output Files::       Options controlling the output
11348 @end menu
11349
11350 @node Operation Modes
11351 @subsection Operation Modes
11352
11353 Options controlling the global behavior of @command{bison}.
11354
11355 @c Please, keep this ordered as in 'bison --help'.
11356 @table @option
11357 @item -h
11358 @itemx --help
11359 Print a summary of the command-line options to Bison and exit.
11360
11361 @item -V
11362 @itemx --version
11363 Print the version number of Bison and exit.
11364
11365 @item --print-localedir
11366 Print the name of the directory containing locale-dependent data.
11367
11368 @item --print-datadir
11369 Print the name of the directory containing skeletons, CSS and XSLT.
11370
11371 @item -u
11372 @item --update
11373 Update the grammar file (remove duplicates, update deprecated directives,
11374 etc.) and exit (i.e., do not generate any of the output files).  Leaves a
11375 backup of the original file with a @code{~} appended.  For instance:
11376
11377 @example
11378 @group
11379 $ @kbd{cat foo.y}
11380 %error-verbose
11381 %define parse.error verbose
11382 %%
11383 exp:;
11384 @end group
11385 @group
11386 $ @kbd{bison -u foo.y}
11387 foo.y:1.1-14: @dwarning{warning}: deprecated directive, use '%define parse.error verbose' [@dwarning{-Wdeprecated}]
11388     1 | @dwarning{%error-verbose}
11389       | @dwarning{^~~~~~~~~~~~~~}
11390 foo.y:2.1-27: @dwarning{warning}: %define variable 'parse.error' redefined [@dwarning{-Wother}]
11391     2 | @dwarning{%define parse.error verbose}
11392       | @dwarning{^~~~~~~~~~~~~~~~~~~~~~~~~~~}
11393 foo.y:1.1-14:     previous definition
11394     1 | @dnotice{%error-verbose}
11395       | @dnotice{^~~~~~~~~~~~~~}
11396 bison: file 'foo.y' was updated (backup: 'foo.y~')
11397 @end group
11398 @group
11399 $ @kbd{cat foo.y}
11400 %define parse.error verbose
11401 %%
11402 exp:;
11403 @end group
11404 @end example
11405
11406 See the documentation of @option{--feature=fixit} below for more details.
11407
11408 @item -f [@var{feature}]
11409 @itemx --feature[=@var{feature}]
11410 Activate miscellaneous @var{feature}s. @var{Feature} can be one of:
11411 @table @code
11412 @item caret
11413 @itemx diagnostics-show-caret
11414 Show caret errors, in a manner similar to GCC's
11415 @option{-fdiagnostics-show-caret}, or Clang's
11416 @option{-fcaret-diagnostics}. The location provided with the message is used
11417 to quote the corresponding line of the source file, underlining the
11418 important part of it with carets (@samp{^}). Here is an example, using the
11419 following file @file{in.y}:
11420
11421 @example
11422 %nterm <ival> exp
11423 %%
11424 exp: exp '+' exp @{ $exp = $1 + $2; @};
11425 @end example
11426
11427 When invoked with @option{-fcaret} (or nothing), Bison will report:
11428
11429 @example
11430 @group
11431 in.y:3.20-23: @derror{error}: ambiguous reference: '$exp'
11432     3 | exp: exp '+' exp @{ @derror{$exp} = $1 + $2; @};
11433       |                    @derror{^~~~}
11434 @end group
11435 @group
11436 in.y:3.1-3:       refers to: $exp at $$
11437     3 | @dnotice{exp}: exp '+' exp @{ $exp = $1 + $2; @};
11438       | @dnotice{^~~}
11439 @end group
11440 @group
11441 in.y:3.6-8:       refers to: $exp at $1
11442     3 | exp: @dnotice{exp} '+' exp @{ $exp = $1 + $2; @};
11443       |      @dnotice{^~~}
11444 @end group
11445 @group
11446 in.y:3.14-16:     refers to: $exp at $3
11447     3 | exp: exp '+' @dnotice{exp} @{ $exp = $1 + $2; @};
11448       |              @dnotice{^~~}
11449 @end group
11450 @group
11451 in.y:3.32-33: @derror{error}: $2 of 'exp' has no declared type
11452     3 | exp: exp '+' exp @{ $exp = $1 + @derror{$2}; @};
11453       |                                @derror{^~}
11454 @end group
11455 @end example
11456
11457 Whereas, when invoked with @option{-fno-caret}, Bison will only report:
11458
11459 @example
11460 @group
11461 in.y:3.20-23: @derror{error}: ambiguous reference: '$exp'
11462 in.y:3.1-3:       refers to: $exp at $$
11463 in.y:3.6-8:       refers to: $exp at $1
11464 in.y:3.14-16:     refers to: $exp at $3
11465 in.y:3.32-33: @derror{error}: $2 of 'exp' has no declared type
11466 @end group
11467 @end example
11468
11469 This option is activated by default.
11470
11471 @item fixit
11472 @itemx diagnostics-parseable-fixits
11473 Show machine-readable fixes, in a manner similar to GCC's and Clang's
11474 @option{-fdiagnostics-parseable-fixits}.
11475
11476 Fix-its are generated for duplicate directives:
11477
11478 @example
11479 @group
11480 $ @kbd{cat foo.y}
11481 %define api.prefix @{foo@}
11482 %define api.prefix @{bar@}
11483 %%
11484 exp:;
11485 @end group
11486
11487 @group
11488 $ @kbd{bison -ffixit foo.y}
11489 foo.y:2.1-24: @derror{error}: %define variable 'api.prefix' redefined
11490     2 | @derror{%define api.prefix @{bar@}}
11491       | @derror{^~~~~~~~~~~~~~~~~~~~~~~~}
11492 foo.y:1.1-24:     previous definition
11493     1 | @dnotice{%define api.prefix @{foo@}}
11494       | @dnotice{^~~~~~~~~~~~~~~~~~~~~~~~}
11495 fix-it:"foo.y":@{2:1-2:25@}:""
11496 foo.y: @dwarning{warning}: fix-its can be applied.  Rerun with option '--update'. [@dwarning{-Wother}]
11497 @end group
11498 @end example
11499
11500 They are also generated to update deprecated directives, unless
11501 @option{-Wno-deprecated} was given:
11502
11503 @example
11504 @group
11505 $ @kbd{cat /tmp/foo.yy}
11506 %error-verbose
11507 %name-prefix "foo"
11508 %%
11509 exp:;
11510 @end group
11511 @group
11512 $ @kbd{bison foo.y}
11513 foo.y:1.1-14: @dwarning{warning}: deprecated directive, use '%define parse.error verbose' [@dwarning{-Wdeprecated}]
11514     1 | @dwarning{%error-verbose}
11515       | @dwarning{^~~~~~~~~~~~~~}
11516 foo.y:2.1-18: @dwarning{warning}: deprecated directive, use '%define api.prefix @{foo@}' [@dwarning{-Wdeprecated}]
11517     2 | @dwarning{%name-prefix "foo"}
11518       | @dwarning{^~~~~~~~~~~~~~~~~~}
11519 foo.y: @dwarning{warning}: fix-its can be applied.  Rerun with option '--update'. [@dwarning{-Wother}]
11520 @end group
11521 @end example
11522
11523 The fix-its are applied by @command{bison} itself when given the option
11524 @option{-u}/@option{--update}.  See its documentation above.
11525
11526 @item syntax-only
11527 Do not generate the output files.  The name of this feature is somewhat
11528 misleading as more than just checking the syntax is done: every stage is run
11529 (including checking for conflicts for instance), except the generation of
11530 the output files.
11531
11532 @end table
11533 @end table
11534
11535 @node Diagnostics
11536 @subsection Diagnostics
11537
11538 Options controlling the diagnostics.
11539
11540 @c Please, keep this ordered as in 'bison --help'.
11541 @table @code
11542 @item -W [@var{category}]
11543 @itemx --warnings[=@var{category}]
11544 Output warnings falling in @var{category}.  @var{category} can be one
11545 of:
11546 @table @code
11547 @item @anchor{Wconflicts-sr}conflicts-sr
11548 @itemx @anchor{Wconflicts-rr}conflicts-rr
11549 S/R and R/R conflicts.  These warnings are enabled by default.  However, if
11550 the @code{%expect} or @code{%expect-rr} directive is specified, an
11551 unexpected number of conflicts is an error, and an expected number of
11552 conflicts is not reported, so @option{-W} and @option{--warning} then have
11553 no effect on the conflict report.
11554
11555 @item @anchor{Wcounterexamples}counterexamples
11556 @itemx cex
11557 Provide counterexamples for conflicts.  @xref{Counterexamples}.
11558 Counterexamples take time to compute.  The option @option{-Wcex} should be
11559 used by the developer when working on the grammar; it hardly makes sense to
11560 use it in a CI.
11561
11562 @item @anchor{Wdangling-alias}dangling-alias
11563 Report string literals that are not bound to a token symbol.
11564
11565 String literals, which allow for better error messages, are (too) liberally
11566 accepted by Bison, which might result in silent errors.  For instance
11567
11568 @example
11569 %type <exVal> cond "condition"
11570 @end example
11571
11572 @noindent
11573 does not define ``condition'' as a string alias to @code{cond}---nonterminal
11574 symbols do not have string aliases.  It is rather equivalent to
11575
11576 @example
11577 %nterm <exVal> cond
11578 %token <exVal> "condition"
11579 @end example
11580
11581 @noindent
11582 i.e., it gives the @samp{"condition"} token the type @code{exVal}.
11583
11584 Also, because string aliases do not need to be defined, typos such as
11585 @samp{"baz"} instead of @samp{"bar"} will be not reported.
11586
11587 The option @option{-Wdangling-alias} catches these situations.  On
11588
11589 @example
11590 %token BAR "bar"
11591 %type <ival> foo "foo"
11592 %%
11593 foo: "baz" @{@}
11594 @end example
11595
11596 @noindent
11597 @samp{bison -Wdangling-alias} reports
11598
11599 @example
11600 @dwarning{warning}: string literal not attached to a symbol
11601       | %type <ival> foo @dwarning{"foo"}
11602       |                  @dwarning{^~~~~}
11603 @dwarning{warning}: string literal not attached to a symbol
11604       | foo: @dwarning{"baz"} @{@}
11605       |      @dwarning{^~~~~}
11606 @end example
11607
11608 @item @anchor{Wdeprecated}deprecated
11609 Deprecated constructs whose support will be removed in future versions of
11610 Bison.
11611
11612 @item @anchor{Wempty-rule}empty-rule
11613 Empty rules without @code{%empty}.  @xref{Empty Rules}.  Disabled by
11614 default, but enabled by uses of @code{%empty}, unless
11615 @option{-Wno-empty-rule} was specified.
11616
11617 @item @anchor{Wmidrule-values}midrule-values
11618 Warn about midrule values that are set but not used within any of the actions
11619 of the parent rule.
11620 For example, warn about unused @code{$2} in:
11621
11622 @example
11623 exp: '1' @{ $$ = 1; @} '+' exp @{ $$ = $1 + $4; @};
11624 @end example
11625
11626 Also warn about midrule values that are used but not set.
11627 For example, warn about unset @code{$$} in the midrule action in:
11628
11629 @example
11630 exp: '1' @{ $1 = 1; @} '+' exp @{ $$ = $2 + $4; @};
11631 @end example
11632
11633 These warnings are not enabled by default since they sometimes prove to
11634 be false alarms in existing grammars employing the Yacc constructs
11635 @code{$0} or @code{$-@var{n}} (where @var{n} is some positive integer).
11636
11637 @item @anchor{Wprecedence}precedence
11638 Useless precedence and associativity directives.  Disabled by default.
11639
11640 Consider for instance the following grammar:
11641
11642 @example
11643 @group
11644 %nonassoc "="
11645 %left "+"
11646 %left "*"
11647 %precedence "("
11648 @end group
11649 %%
11650 @group
11651 stmt:
11652   exp
11653 | "var" "=" exp
11654 ;
11655 @end group
11656
11657 @group
11658 exp:
11659   exp "+" exp
11660 | exp "*" "number"
11661 | "(" exp ")"
11662 | "number"
11663 ;
11664 @end group
11665 @end example
11666
11667 Bison reports:
11668
11669 @c cannot leave the location and the [-Wprecedence] for lack of
11670 @c width in PDF.
11671 @example
11672 @group
11673 @dwarning{warning}: useless precedence and associativity for "="
11674       | %nonassoc @dwarning{"="}
11675       |           @dwarning{^~~}
11676 @end group
11677 @group
11678 @dwarning{warning}: useless associativity for "*", use %precedence
11679       | %left @dwarning{"*"}
11680       |       @dwarning{^~~}
11681 @end group
11682 @group
11683 @dwarning{warning}: useless precedence for "("
11684       | %precedence @dwarning{"("}
11685       |             @dwarning{^~~}
11686 @end group
11687 @end example
11688
11689 One would get the exact same parser with the following directives instead:
11690
11691 @example
11692 @group
11693 %left "+"
11694 %precedence "*"
11695 @end group
11696 @end example
11697
11698 @item @anchor{Wyacc}yacc
11699 Incompatibilities with POSIX Yacc.
11700
11701 @item @anchor{Wother}other
11702 All warnings not categorized above.  These warnings are enabled by default.
11703
11704 This category is provided merely for the sake of completeness.  Future
11705 releases of Bison may move warnings from this category to new, more specific
11706 categories.
11707
11708 @item @anchor{Wall}all
11709 All the warnings except @code{counterexamples}, @code{dangling-alias} and
11710 @code{yacc}.
11711
11712 @item @anchor{Wnone}none
11713 Turn off all the warnings.
11714
11715 @item error
11716 See @option{-Werror}, below.
11717 @end table
11718
11719 A category can be turned off by prefixing its name with @samp{no-}.  For
11720 instance, @option{-Wno-yacc} will hide the warnings about
11721 POSIX Yacc incompatibilities.
11722
11723 @item @anchor{Werror}-Werror
11724 Turn enabled warnings for every @var{category} into errors, unless they are
11725 explicitly disabled by @option{-Wno-error=@var{category}}.
11726
11727 @item -Werror=@var{category}
11728 Enable warnings falling in @var{category}, and treat them as errors.
11729
11730 @var{category} is the same as for @option{--warnings}, with the exception that
11731 it may not be prefixed with @samp{no-} (see above).
11732
11733 Note that the precedence of the @samp{=} and @samp{,} operators is such that
11734 the following commands are @emph{not} equivalent, as the first will not treat
11735 S/R conflicts as errors.
11736
11737 @example
11738 $ @kbd{bison -Werror=yacc,conflicts-sr input.y}
11739 $ @kbd{bison -Werror=yacc,error=conflicts-sr input.y}
11740 @end example
11741
11742 @item -Wno-error
11743 Do not turn enabled warnings for every @var{category} into errors, unless
11744 they are explicitly enabled by @option{-Werror=@var{category}}.
11745
11746 @item -Wno-error=@var{category}
11747 Deactivate the error treatment for this @var{category}. However, the warning
11748 itself won't be disabled, or enabled, by this option.
11749
11750 @item --color
11751 Equivalent to @option{--color=always}.
11752
11753 @item --color=@var{when}
11754 Control whether diagnostics are colorized, depending on @var{when}:
11755 @table @code
11756 @item always
11757 @itemx yes
11758 Enable colorized diagnostics.
11759
11760 @item never
11761 @itemx no
11762 Disable colorized diagnostics.
11763
11764 @item auto @r{(default)}
11765 @itemx tty
11766 Diagnostics will be colorized if the output device is a tty, i.e. when the
11767 output goes directly to a text screen or terminal emulator window.
11768 @end table
11769
11770 @item --style=@var{file}
11771 Specifies the CSS style @var{file} to use when colorizing. It has an effect
11772 only when the @option{--color} option is effective.  The
11773 @file{bison-default.css} file provide a good example from which to define
11774 your own style file.  See the documentation of libtextstyle for more
11775 details.
11776 @end table
11777
11778 @node Tuning the Parser
11779 @subsection Tuning the Parser
11780
11781 Options changing the generated parsers.
11782
11783 @c Please, keep this ordered as in 'bison --help'.
11784 @table @option
11785 @item -t
11786 @itemx --debug
11787 In the parser implementation file, define the macro @code{YYDEBUG} to 1 if
11788 it is not already defined, so that the debugging facilities are compiled.
11789 @xref{Tracing}.
11790
11791 @item -D @var{name}[=@var{value}]
11792 @itemx --define=@var{name}[=@var{value}]
11793 @itemx -F @var{name}[=@var{value}]
11794 @itemx --force-define=@var{name}[=@var{value}]
11795 Each of these is equivalent to @samp{%define @var{name} @var{value}}
11796 (@pxref{%define Summary}).  Note that the delimiters are part of
11797 @var{value}: @option{-Dapi.value.type=union},
11798 @option{-Dapi.value.type=@{union@}} and @option{-Dapi.value.type="union"}
11799 correspond to @samp{%define api.value.type union}, @samp{%define
11800 api.value.type @{union@}} and @samp{%define api.value.type "union"}.
11801
11802 Bison processes multiple definitions for the same @var{name} as follows:
11803
11804 @itemize
11805 @item
11806 Bison quietly ignores all command-line definitions for @var{name} except
11807 the last.
11808 @item
11809 If that command-line definition is specified by a @option{-D} or
11810 @option{--define}, Bison reports an error for any @code{%define} definition
11811 for @var{name}.
11812 @item
11813 If that command-line definition is specified by a @option{-F} or
11814 @option{--force-define} instead, Bison quietly ignores all @code{%define}
11815 definitions for @var{name}.
11816 @item
11817 Otherwise, Bison reports an error if there are multiple @code{%define}
11818 definitions for @var{name}.
11819 @end itemize
11820
11821 You should avoid using @option{-F} and @option{--force-define} in your
11822 make files unless you are confident that it is safe to quietly ignore
11823 any conflicting @code{%define} that may be added to the grammar file.
11824
11825 @item -L @var{language}
11826 @itemx --language=@var{language}
11827 Specify the programming language for the generated parser, as if
11828 @code{%language} was specified (@pxref{Decl Summary}).  Currently supported
11829 languages include C, C++, D and Java.  @var{language} is case-insensitive.
11830
11831 @item --locations
11832 Pretend that @code{%locations} was specified.  @xref{Decl Summary}.
11833
11834 @item -p @var{prefix}
11835 @itemx --name-prefix=@var{prefix}
11836 Pretend that @code{%name-prefix "@var{prefix}"} was specified (@pxref{Decl
11837 Summary}).  The option @option{-p} is specified by POSIX.  When POSIX
11838 compatibility is not a requirement, @option{-Dapi.prefix=@var{prefix}} is a
11839 better option (@pxref{Multiple Parsers}).
11840
11841 @item -l
11842 @itemx --no-lines
11843 Don't put any @code{#line} preprocessor commands in the parser
11844 implementation file.  Ordinarily Bison puts them in the parser
11845 implementation file so that the C compiler and debuggers will
11846 associate errors with your source file, the grammar file.  This option
11847 causes them to associate errors with the parser implementation file,
11848 treating it as an independent source file in its own right.
11849
11850 @item -S @var{file}
11851 @itemx --skeleton=@var{file}
11852 Specify the skeleton to use, similar to @code{%skeleton}
11853 (@pxref{Decl Summary}).
11854
11855 @c You probably don't need this option unless you are developing Bison.
11856 @c You should use @option{--language} if you want to specify the skeleton for a
11857 @c different language, because it is clearer and because it will always
11858 @c choose the correct skeleton for non-deterministic or push parsers.
11859
11860 If @var{file} does not contain a @code{/}, @var{file} is the name of a skeleton
11861 file in the Bison installation directory.
11862 If it does, @var{file} is an absolute file name or a file name relative to the
11863 current working directory.
11864 This is similar to how most shells resolve commands.
11865
11866 @item -k
11867 @itemx --token-table
11868 Pretend that @code{%token-table} was specified.  @xref{Decl Summary}.
11869
11870 @item -y
11871 @itemx @anchor{option-yacc} --yacc
11872 Act more like the traditional @command{yacc} command:
11873 @itemize
11874 @item
11875 Generate different diagnostics (it implies @option{-Wyacc}).
11876 @item
11877 Generate @code{#define} statements in addition to an @code{enum} to
11878 associate token codes with token kind names.
11879 @item
11880 If the @code{POSIXLY_CORRECT} environment variable is defined, generate
11881 prototypes for @code{yyerror} and @code{yylex}@footnote{See
11882 @url{https://austingroupbugs.net/view.php?id=1388#c5220}.} (since Bison
11883 3.8):
11884 @example
11885 int yylex (void);
11886 void yyerror (const char *);
11887 @end example
11888 As a Bison extension, additional arguments required by @code{%pure-parser},
11889 @code{%locations}, @code{%lex-param} and @code{%parse-param} are taken into
11890 account.  You may disable @code{yyerror}'s prototype with @samp{#define
11891 yyerror yyerror} (as specified by POSIX), or with @samp{#define
11892 YYERROR_IS_DECLARED} (a Bison extension).  Likewise for @code{yylex}.
11893 @item
11894 Imitate Yacc's output file name conventions, so that the parser
11895 implementation file is called @file{y.tab.c}, and the other outputs are
11896 called @file{y.output} and @file{y.tab.h}.  Do not use @option{--yacc} just
11897 to change the output file names since it also triggers all the
11898 aforementioned behavior changes; rather use @samp{-o y.tab.c}.
11899 @end itemize
11900
11901 The @option{-y}/@option{--yacc} option is intended for use with traditional
11902 Yacc grammars.  This option only makes sense for the default C skeleton,
11903 @file{yacc.c}.  If your grammar uses Bison extensions Bison cannot be
11904 Yacc-compatible, even if this option is specified.
11905
11906 Thus, the following shell script can substitute for Yacc, and the Bison
11907 distribution contains such a @command{yacc} script for compatibility with
11908 POSIX:
11909
11910 @example
11911 #! /bin/sh
11912 bison -y "$@@"
11913 @end example
11914 @end table
11915
11916 @node Output Files
11917 @subsection Output Files
11918
11919 Options controlling the output.
11920
11921 @c Please, keep this ordered as in 'bison --help'.
11922 @table @option
11923 @item -H [@var{file}]
11924 @itemx --header=[@var{file}]
11925 Pretend that @code{%header} was specified, i.e., write an extra output file
11926 containing definitions for the token kind names defined in the grammar, as
11927 well as a few other declarations.  @xref{Decl Summary}.
11928
11929 @item --defines[=@var{file}]
11930 Historical name for option @option{--header} before Bison 3.8.
11931
11932 @item -d
11933 This is the same as @option{--header} except @option{-d} does not accept a
11934 @var{file} argument since POSIX Yacc requires that @option{-d} can be
11935 bundled with other short options.
11936
11937 @item -b @var{file-prefix}
11938 @itemx --file-prefix=@var{prefix}
11939 Pretend that @code{%file-prefix} was specified, i.e., specify prefix to use
11940 for all Bison output file names.  @xref{Decl Summary}.
11941
11942 @item -r @var{things}
11943 @itemx --report=@var{things}
11944 Write an extra output file containing verbose description of the comma
11945 separated list of @var{things} among:
11946
11947 @table @code
11948 @item state
11949 Description of the grammar, conflicts (resolved and unresolved), and
11950 parser's automaton.
11951
11952 @item itemset
11953 Implies @code{state} and augments the description of the automaton with
11954 the full set of items for each state, instead of its core only.
11955
11956 @item lookahead
11957 Implies @code{state} and augments the description of the automaton with
11958 each rule's lookahead set.
11959
11960 @item solved
11961 Implies @code{state}.  Explain how conflicts were solved thanks to
11962 precedence and associativity directives.
11963
11964 @item counterexamples
11965 @itemx cex
11966 Look for counterexamples for the conflicts.  @xref{Counterexamples}.
11967 Counterexamples take time to compute.  The option @option{-rcex} should be
11968 used by the developer when working on the grammar; it hardly makes sense to
11969 use it in a CI.
11970
11971 @item all
11972 Enable all the items.
11973
11974 @item none
11975 Do not generate the report.
11976 @end table
11977
11978 @item --report-file=@var{file}
11979 Specify the @var{file} for the verbose description.
11980
11981 @item -v
11982 @itemx --verbose
11983 Pretend that @code{%verbose} was specified, i.e., write an extra output
11984 file containing verbose descriptions of the grammar and
11985 parser.  @xref{Decl Summary}.
11986
11987 @item -o @var{file}
11988 @itemx --output=@var{file}
11989 Specify the @var{file} for the parser implementation file.
11990
11991 The names of the other output files are constructed from @var{file} as
11992 described under the @option{-v} and @option{-d} options.
11993
11994 @item -g [@var{file}]
11995 @itemx --graph[=@var{file}]
11996 Output a graphical representation of the parser's automaton computed by
11997 Bison, in @uref{https://www.graphviz.org/, Graphviz}
11998 @uref{https://www.graphviz.org/doc/info/lang.html, DOT} format.
11999 @code{@var{file}} is optional.  If omitted and the grammar file is
12000 @file{foo.y}, the output file will be @file{foo.gv}.
12001
12002 @item -x [@var{file}]
12003 @itemx --xml[=@var{file}]
12004 Output an XML report of the parser's automaton computed by Bison.
12005 @code{@var{file}} is optional.
12006 If omitted and the grammar file is @file{foo.y}, the output file will be
12007 @file{foo.xml}.
12008
12009 @item -M @var{old}=@var{new}
12010 @itemx --file-prefix-map=@var{old}=@var{new}
12011 Replace prefix @var{old} with @var{new} when writing file paths in output
12012 files.
12013 @end table
12014
12015 @node Option Cross Key
12016 @section Option Cross Key
12017
12018 Here is a list of options, alphabetized by long option, to help you find
12019 the corresponding short option and directive.
12020
12021 @multitable {@option{--force-define=@var{name}[=@var{value}]}} {@option{-F @var{name}[=@var{value}]}} {@code{%nondeterministic-parser}}
12022 @headitem Long Option @tab Short Option @tab Bison Directive
12023 @include cross-options.texi
12024 @end multitable
12025
12026 @node Yacc Library
12027 @section Yacc Library
12028
12029 The Yacc library contains default implementations of the @code{yyerror} and
12030 @code{main} functions.  These default implementations are normally not
12031 useful, but POSIX requires them.  To use the Yacc library, link your program
12032 with the @option{-ly} option.  Note that Bison's implementation of the Yacc
12033 library is distributed under the terms of the GNU General Public License
12034 (@pxref{Copying}).
12035
12036 If you use the Yacc library's @code{yyerror} function, you should declare
12037 @code{yyerror} as follows:
12038
12039 @example
12040 int yyerror (char const *);
12041 @end example
12042
12043 @noindent
12044 The @code{int} value returned by this @code{yyerror} is ignored.
12045
12046 The implementation of Yacc library's @code{main} function is:
12047
12048 @example
12049 int main (void)
12050 @{
12051   setlocale (LC_ALL, "");
12052   return yyparse ();
12053 @}
12054 @end example
12055
12056 @noindent
12057 so if you use it, the internationalization support is enabled (e.g., error
12058 messages are translated), and your @code{yyparse} function should have the
12059 following type signature:
12060
12061 @example
12062 int yyparse (void);
12063 @end example
12064
12065 @c ================================================= C++ Bison
12066
12067 @node Other Languages
12068 @chapter Parsers Written In Other Languages
12069
12070 In addition to C, Bison can generate parsers in C++, D and Java.  This chapter
12071 is devoted to these languages.  The reader is expected to understand how
12072 Bison works; read the introductory chapters first if you don't.
12073
12074 @menu
12075 * C++ Parsers::                 The interface to generate C++ parser classes
12076 * D Parsers::                   The interface to generate D parser classes
12077 * Java Parsers::                The interface to generate Java parser classes
12078 @end menu
12079
12080 @node C++ Parsers
12081 @section C++ Parsers
12082
12083 The Bison parser in C++ is an object, an instance of the class
12084 @code{yy::parser}.
12085
12086 @menu
12087 * A Simple C++ Example::        A short introduction to C++ parsers
12088 * C++ Bison Interface::         Asking for C++ parser generation
12089 * C++ Parser Interface::        Instantiating and running the parser
12090 * C++ Semantic Values::         %union vs. C++
12091 * C++ Location Values::         The position and location classes
12092 * C++ Parser Context::          You can supply a @code{report_syntax_error} function.
12093 * C++ Scanner Interface::       Exchanges between yylex and parse
12094 * A Complete C++ Example::      Demonstrating their use
12095 @end menu
12096
12097 @node A Simple C++ Example
12098 @subsection A Simple C++ Example
12099
12100 This tutorial about C++ parsers is based on a simple, self contained
12101 example.@footnote{The sources of this example are available as
12102 @file{examples/c++/simple.yy}.}  The following sections are the reference
12103 manual for Bison with C++, the last one showing a fully blown example
12104 (@pxref{A Complete C++ Example}).
12105
12106 To look nicer, our example will be in C++14.  It is not required: Bison
12107 supports the original C++98 standard.
12108
12109 A Bison file has three parts.  In the first part, the prologue, we start by
12110 making sure we run a version of Bison which is recent enough, and that we
12111 generate C++.
12112
12113 @ignore
12114 @comment file: c++/simple.yy: 1
12115 @example
12116 /* Simple variant-based parser.   -*- C++ -*-
12117
12118    Copyright (C) 2018-2021 Free Software Foundation, Inc.
12119
12120    This file is part of Bison, the GNU Compiler Compiler.
12121
12122    This program is free software: you can redistribute it and/or modify
12123    it under the terms of the GNU General Public License as published by
12124    the Free Software Foundation, either version 3 of the License, or
12125    (at your option) any later version.
12126
12127    This program is distributed in the hope that it will be useful,
12128    but WITHOUT ANY WARRANTY; without even the implied warranty of
12129    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12130    GNU General Public License for more details.
12131
12132    You should have received a copy of the GNU General Public License
12133    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
12134 @end example
12135 @end ignore
12136
12137 @comment file: c++/simple.yy: 1
12138 @example
12139 %require "3.2"
12140 %language "c++"
12141 @end example
12142
12143 Let's dive directly into the middle part: the grammar.  Our input is a
12144 simple list of strings, that we display once the parsing is done.
12145
12146 @comment file: c++/simple.yy: 2
12147 @example
12148 %%
12149 @group
12150 result:
12151   list  @{ std::cout << $1 << '\n'; @}
12152 ;
12153 @end group
12154
12155 %nterm <std::vector<std::string>> list;
12156 @group
12157 list:
12158   %empty     @{ /* Generates an empty string list */ @}
12159 | list item  @{ $$ = $1; $$.push_back ($2); @}
12160 ;
12161 @end group
12162 @end example
12163
12164 We used a vector of strings as a semantic value!  To use genuine C++ objects
12165 as semantic values---not just PODs---we cannot rely on the union that Bison
12166 uses by default to store them, we need @emph{variants} (@pxref{C++
12167 Variants}):
12168
12169 @comment file: c++/simple.yy: 1
12170 @example
12171 %define api.value.type variant
12172 @end example
12173
12174 Obviously, the rule for @code{result} needs to print a vector of strings.
12175 In the prologue, we add:
12176
12177 @comment file: c++/simple.yy: 1
12178 @example
12179 %code
12180 @{
12181   // Print a list of strings.
12182   auto
12183   operator<< (std::ostream& o, const std::vector<std::string>& ss)
12184     -> std::ostream&
12185   @{
12186     o << '@{';
12187     const char *sep = "";
12188 @group
12189     for (const auto& s: ss)
12190       @{
12191         o << sep << s;
12192         sep = ", ";
12193       @}
12194 @end group
12195     return o << '@}';
12196   @}
12197 @}
12198 @end example
12199
12200 @noindent
12201 You may want to move it into the @code{yy} namespace to avoid leaking it in
12202 your default namespace.  We recommend that you keep the actions simple, and
12203 move details into auxiliary functions, as we did with @code{operator<<}.
12204
12205 Our list of strings will be built from two types of items: numbers and
12206 strings:
12207
12208 @comment file: c++/simple.yy: 2
12209 @example
12210 %nterm <std::string> item;
12211 %token <std::string> TEXT;
12212 %token <int> NUMBER;
12213 @group
12214 item:
12215   TEXT
12216 | NUMBER  @{ $$ = std::to_string ($1); @}
12217 ;
12218 @end group
12219 @end example
12220
12221 In the case of @code{TEXT}, the implicit default action applies: @w{@code{$$
12222 = $1}.}
12223
12224 @sp 1
12225
12226 Our scanner deserves some attention.  The traditional interface of
12227 @code{yylex} is not type safe: since the token kind and the token value are
12228 not correlated, you may return a @code{NUMBER} with a string as semantic
12229 value.  To avoid this, we use @emph{token constructors} (@pxref{Complete
12230 Symbols}).  This directive:
12231
12232 @comment file: c++/simple.yy: 1
12233 @example
12234 %define api.token.constructor
12235 @end example
12236
12237 @noindent
12238 requests that Bison generates the functions @code{make_TEXT} and
12239 @code{make_NUMBER}, but also @code{make_YYEOF}, for the end of input.
12240
12241 Everything is in place for our scanner:
12242
12243 @comment file: c++/simple.yy: 1
12244 @example
12245 %code
12246 @{
12247   namespace yy
12248   @{
12249     // Return the next token.
12250     auto yylex () -> parser::symbol_type
12251     @{
12252       static int count = 0;
12253       switch (int stage = count++)
12254         @{
12255 @group
12256         case 0:
12257           return parser::make_TEXT ("I have three numbers for you.");
12258 @end group
12259 @group
12260         case 1: case 2: case 3:
12261           return parser::make_NUMBER (stage);
12262 @end group
12263 @group
12264         case 4:
12265           return parser::make_TEXT ("And that's all!");
12266 @end group
12267 @group
12268         default:
12269           return parser::make_YYEOF ();
12270 @end group
12271         @}
12272     @}
12273   @}
12274 @}
12275 @end example
12276
12277 In the epilogue, the third part of a Bison grammar file, we leave simple
12278 details: the error reporting function, and the main function.
12279
12280 @comment file: c++/simple.yy: 3
12281 @example
12282 %%
12283 namespace yy
12284 @{
12285   // Report an error to the user.
12286   auto parser::error (const std::string& msg) -> void
12287   @{
12288     std::cerr << msg << '\n';
12289   @}
12290 @}
12291
12292 int main ()
12293 @{
12294   yy::parser parse;
12295   return parse ();
12296 @}
12297 @end example
12298
12299 Compile, and run!
12300
12301 @example
12302 $ @kbd{bison simple.yy -o simple.cc}
12303 $ @kbd{g++ -std=c++14 simple.cc -o simple}
12304 @group
12305 $ @kbd{./simple}
12306 @{I have three numbers for you., 1, 2, 3, And that's all!@}
12307 @end group
12308 @end example
12309
12310 @node C++ Bison Interface
12311 @subsection C++ Bison Interface
12312 @c - %skeleton "lalr1.cc"
12313 @c - Always pure
12314 @c - initial action
12315
12316 The C++ deterministic parser is selected using the skeleton directive,
12317 @samp{%skeleton "lalr1.cc"}.  @xref{Decl Summary}.
12318
12319 When run, @command{bison} will create several entities in the @samp{yy}
12320 namespace.
12321 @findex %define api.namespace
12322 Use the @samp{%define api.namespace} directive to change the namespace name,
12323 see @ref{%define Summary}.  The various classes are generated
12324 in the following files:
12325
12326 @table @file
12327 @item @var{file}.hh
12328 (Assuming the extension of the grammar file was @samp{.yy}.)  The
12329 declaration of the C++ parser class and auxiliary types.  By default, this
12330 file is not generated (@pxref{Decl Summary}).
12331
12332 @item @var{file}.cc
12333 The implementation of the C++ parser class.  The basename and extension of
12334 these two files (@file{@var{file}.hh} and @file{@var{file}.cc}) follow the
12335 same rules as with regular C parsers (@pxref{Invocation}).
12336
12337 @item location.hh
12338 Generated when both @code{%header} and @code{%locations} are enabled, this
12339 file contains the definition of the classes @code{position} and
12340 @code{location}, used for location tracking.  It is not generated if
12341 @samp{%define api.location.file none} is specified, or if user defined
12342 locations are used.  @xref{C++ Location Values}.
12343
12344 @item position.hh
12345 @itemx stack.hh
12346 Useless legacy files.  To get rid of then, use @samp{%require "3.2"} or
12347 newer.
12348 @end table
12349
12350 All these files are documented using Doxygen; run @command{doxygen} for a
12351 complete and accurate documentation.
12352
12353 @node C++ Parser Interface
12354 @subsection C++ Parser Interface
12355
12356 The output files @file{@var{file}.hh} and @file{@var{file}.cc} declare and
12357 define the parser class in the namespace @code{yy}.  The class name defaults
12358 to @code{parser}, but may be changed using @samp{%define api.parser.class
12359 @{@var{name}@}}.  The interface of this class is detailed below.  It can be
12360 extended using the @code{%parse-param} feature: its semantics is slightly
12361 changed since it describes an additional member of the parser class, and an
12362 additional argument for its constructor.
12363
12364
12365 @defcv {Type} {parser} {token}
12366 A structure that contains (only) the @code{token_kind_type} enumeration,
12367 which defines the tokens.  To refer to the token @code{FOO}, use
12368 @code{yy::parser::token::FOO}.  The scanner can use @samp{typedef
12369 yy::parser::token token;} to ``import'' the token enumeration (@pxref{Calc++
12370 Scanner}).
12371 @end defcv
12372
12373 @defcv {Type} {parser} {token_kind_type}
12374 An enumeration of the token kinds.  Its enumerators are forged from the
12375 token names, with a possible token prefix
12376 (@pxref{api-token-prefix,,@code{api.token.prefix}}):
12377
12378 @example
12379 /// Token kinds.
12380 struct token
12381 @{
12382   enum token_kind_type
12383   @{
12384     YYEMPTY = -2,              // No token.
12385     YYEOF = 0,                 // "end of file"
12386     YYerror = 256,             // error
12387     YYUNDEF = 257,             // "invalid token"
12388     PLUS = 258,                // "+"
12389     MINUS = 259,               // "-"
12390     [...]
12391     VAR = 271,                 // "variable"
12392     NEG = 272                  // NEG
12393   @};
12394 @};
12395
12396 /// Token kind, as returned by yylex.
12397 typedef token::token_kind_type token_kind_type;
12398 @end example
12399 @end defcv
12400
12401 @defcv {Type} {parser} {value_type}
12402 The types for semantic values. @xref{C++ Semantic Values}.
12403 @end defcv
12404
12405 @defcv {Type} {parser} {location_type}
12406 The type of locations, if location tracking is enabled.  @xref{C++ Location
12407 Values}.
12408 @end defcv
12409
12410 @defcv {Type} {parser} {syntax_error}
12411 This class derives from @code{std::runtime_error}.  Throw instances of it
12412 from the scanner or from the actions to raise parse errors.  This is
12413 equivalent with first invoking @code{error} to report the location and
12414 message of the syntax error, and then to invoke @code{YYERROR} to enter the
12415 error-recovery mode.  But contrary to @code{YYERROR} which can only be
12416 invoked from user actions (i.e., written in the action itself), the
12417 exception can be thrown from functions invoked from the user action.
12418 @end defcv
12419
12420 @deftypeop {Constructor} {parser} {} parser ()
12421 @deftypeopx {Constructor} {parser} {} parser (@var{type1} @var{arg1}, ...)
12422 Build a new parser object.  There are no arguments, unless
12423 @samp{%parse-param @{@var{type1} @var{arg1}@}} was used.
12424 @end deftypeop
12425
12426 @deftypeop {Constructor} {syntax_error} {} syntax_error (@code{const location_type&} @var{l}, @code{const std::string&} @var{m})
12427 @deftypeopx {Constructor}  {syntax_error} {} syntax_error (@code{const std::string&} @var{m})
12428 Instantiate a syntax-error exception.
12429 @end deftypeop
12430
12431 @deftypemethod {parser} {int} operator() ()
12432 @deftypemethodx {parser} {int} parse ()
12433 Run the syntactic analysis, and return 0 on success, 1 otherwise.  Both
12434 routines are equivalent, @code{operator()} being more C++ish.
12435
12436 @cindex exceptions
12437 The whole function is wrapped in a @code{try}/@code{catch} block, so that
12438 when an exception is thrown, the @code{%destructor}s are called to release
12439 the lookahead symbol, and the symbols pushed on the stack.
12440
12441 Exception related code in the generated parser is protected by CPP guards
12442 (@code{#if}) and disabled when exceptions are not supported (i.e., passing
12443 @option{-fno-exceptions} to the C++ compiler).
12444 @end deftypemethod
12445
12446 @deftypemethod {parser} {std::ostream&} debug_stream ()
12447 @deftypemethodx {parser} {void} set_debug_stream (@code{std::ostream&} @var{o})
12448 Get or set the stream used for tracing the parsing.  It defaults to
12449 @code{std::cerr}.
12450 @end deftypemethod
12451
12452 @deftypemethod {parser} {debug_level_type} debug_level ()
12453 @deftypemethodx {parser} {void} set_debug_level (debug_level_type @var{l})
12454 Get or set the tracing level (an integral).  Currently its value is either
12455 0, no trace, or nonzero, full tracing.
12456 @end deftypemethod
12457
12458 @deftypemethod {parser} {void} error (@code{const location_type&} @var{l}, @code{const std::string&} @var{m})
12459 @deftypemethodx {parser} {void} error (@code{const std::string&} @var{m})
12460 The definition for this member function must be supplied by the user: the
12461 parser uses it to report a parser error occurring at @var{l}, described by
12462 @var{m}.  If location tracking is not enabled, the second signature is used.
12463 @end deftypemethod
12464
12465
12466 @node C++ Semantic Values
12467 @subsection C++ Semantic Values
12468
12469 Bison supports two different means to handle semantic values in C++.  One is
12470 alike the C interface, and relies on unions.  As C++ practitioners know,
12471 unions are inconvenient in C++, therefore another approach is provided,
12472 based on variants.
12473
12474 @menu
12475 * C++ Unions::             Semantic values cannot be objects
12476 * C++ Variants::           Using objects as semantic values
12477 @end menu
12478
12479 @node C++ Unions
12480 @subsubsection C++ Unions
12481
12482 The @code{%union} directive works as for C, see @ref{Union Decl}.  In
12483 particular it produces a genuine @code{union}, which have a few specific
12484 features in C++.
12485 @itemize @minus
12486 @item
12487 The value type is @code{yy::parser::value_type}, not @code{YYSTYPE}.
12488 @item
12489 Non POD (Plain Old Data) types cannot be used.  C++98 forbids any instance
12490 of classes with constructors in unions: only @emph{pointers} to such objects
12491 are allowed.  C++11 relaxed this constraints, but at the cost of safety.
12492 @end itemize
12493
12494 Because objects have to be stored via pointers, memory is not
12495 reclaimed automatically: using the @code{%destructor} directive is the
12496 only means to avoid leaks.  @xref{Destructor Decl}.
12497
12498 @node C++ Variants
12499 @subsubsection C++ Variants
12500
12501 Bison provides a @emph{variant} based implementation of semantic values for
12502 C++.  This alleviates all the limitations reported in the previous section,
12503 and in particular, object types can be used without pointers.
12504
12505 To enable variant-based semantic values, set the @code{%define} variable
12506 @code{api.value.type} to @code{variant} (@pxref{%define Summary}).  Then
12507 @code{%union} is ignored; instead of using the name of the fields of the
12508 @code{%union} to ``type'' the symbols, use genuine types.
12509
12510 For instance, instead of:
12511
12512 @example
12513 %union
12514 @{
12515   int ival;
12516   std::string* sval;
12517 @}
12518 %token <ival> NUMBER;
12519 %token <sval> STRING;
12520 @end example
12521
12522 @noindent
12523 write:
12524
12525 @example
12526 %token <int> NUMBER;
12527 %token <std::string> STRING;
12528 @end example
12529
12530 @code{STRING} is no longer a pointer, which should fairly simplify the user
12531 actions in the grammar and in the scanner (in particular the memory
12532 management).
12533
12534 Since C++ features destructors, and since it is customary to specialize
12535 @code{operator<<} to support uniform printing of values, variants also
12536 typically simplify Bison printers and destructors.
12537
12538 Variants are stricter than unions.  When based on unions, you may play any
12539 dirty game with @code{yylval}, say storing an @code{int}, reading a
12540 @code{char*}, and then storing a @code{double} in it.  This is no longer
12541 possible with variants: they must be initialized, then assigned to, and
12542 eventually, destroyed.  As a matter of fact, Bison variants forbid the use
12543 of alternative types such as @samp{$<int>2} or @samp{$<std::string>$}, even
12544 in midrule actions.  It is mandatory to use typed midrule actions
12545 (@pxref{Typed Midrule Actions}).
12546
12547 @deftypemethod {value_type} {T&} {emplace<T>} ()
12548 @deftypemethodx {value_type} {T&} {emplace<T>} (@code{const T&} @var{t})
12549 Available in C++98/C++03 only.  Default construct/copy-construct from
12550 @var{t}.  Return a reference to where the actual value may be stored.
12551 Requires that the variant was not initialized yet.
12552 @end deftypemethod
12553
12554 @deftypemethod {value_type} {T&} {emplace<T, U>} (@code{U&&...} @var{u})
12555 Available in C++11 and later only.  Build a variant of type @code{T} from
12556 the variadic forwarding references @var{u...}.
12557 @end deftypemethod
12558
12559 @strong{Warning}: We do not use Boost.Variant, for two reasons.  First, it
12560 appeared unacceptable to require Boost on the user's machine (i.e., the
12561 machine on which the generated parser will be compiled, not the machine on
12562 which @command{bison} was run).  Second, for each possible semantic value,
12563 Boost.Variant not only stores the value, but also a tag specifying its
12564 type.  But the parser already ``knows'' the type of the semantic value, so
12565 that would be duplicating the information.
12566
12567 We do not use C++17's @code{std::variant} either: we want to support all the
12568 C++ standards, and of course @code{std::variant} also stores a tag to record
12569 the current type.
12570
12571 Therefore we developed light-weight variants whose type tag is external (so
12572 they are really like @code{unions} for C++ actually).  There is a number of
12573 limitations in (the current implementation of) variants:
12574 @itemize
12575 @item
12576 Alignment must be enforced: values should be aligned in memory according to
12577 the most demanding type.  Computing the smallest alignment possible requires
12578 meta-programming techniques that are not currently implemented in Bison, and
12579 therefore, since, as far as we know, @code{double} is the most demanding
12580 type on all platforms, alignments are enforced for @code{double} whatever
12581 types are actually used.  This may waste space in some cases.
12582
12583 @item
12584 There might be portability issues we are not aware of.
12585 @end itemize
12586
12587 As far as we know, these limitations @emph{can} be alleviated.  All it takes
12588 is some time and/or some talented C++ hacker willing to contribute to Bison.
12589
12590 @node C++ Location Values
12591 @subsection C++ Location Values
12592
12593 When the directive @code{%locations} is used, the C++ parser supports
12594 location tracking, see @ref{Tracking Locations}.
12595
12596 By default, two auxiliary classes define a @code{position}, a single point
12597 in a file, and a @code{location}, a range composed of a pair of
12598 @code{position}s (possibly spanning several files).  If the @code{%define}
12599 variable @code{api.location.type} is defined, then these classes will not be
12600 generated, and the user defined type will be used.
12601
12602 @menu
12603 * C++ position::         One point in the source file
12604 * C++ location::         Two points in the source file
12605 * Exposing the Location Classes:: Using the Bison location class in your
12606                                   project
12607 * User Defined Location Type::    Required interface for locations
12608 @end menu
12609
12610 @node C++ position
12611 @subsubsection C++ @code{position}
12612
12613 @defcv {Type} {position} {filename_type}
12614 The base type for file names. Defaults to @code{const std::string}.
12615 @xref{api-filename-type,,@code{api.filename.type}}, to change its definition.
12616 @end defcv
12617
12618 @defcv {Type} {position} {counter_type}
12619 The type used to store line and column numbers.  Defined as @code{int}.
12620 @end defcv
12621
12622 @deftypeop {Constructor} {position} {} position (@code{filename_type*} @var{file} = nullptr, @code{counter_type} @var{line} = 1, @code{counter_type} @var{col} = 1)
12623 Create a @code{position} denoting a given point.  Note that @code{file} is
12624 not reclaimed when the @code{position} is destroyed: memory managed must be
12625 handled elsewhere.
12626 @end deftypeop
12627
12628 @deftypemethod {position} {void} initialize (@code{filename_type*} @var{file} = nullptr, @code{counter_type} @var{line} = 1, @code{counter_type} @var{col} = 1)
12629 Reset the position to the given values.
12630 @end deftypemethod
12631
12632 @deftypeivar {position} {filename_type*} file
12633 The name of the file.  It will always be handled as a pointer, the parser
12634 will never duplicate nor deallocate it.
12635 @end deftypeivar
12636
12637 @deftypeivar {position} {counter_type} line
12638 The line, starting at 1.
12639 @end deftypeivar
12640
12641 @deftypemethod {position} {void} lines (@code{counter_type} @var{height} = 1)
12642 If @var{height} is not null, advance by @var{height} lines, resetting the
12643 column number.  The resulting line number cannot be less than 1.
12644 @end deftypemethod
12645
12646 @deftypeivar {position} {counter_type} column
12647 The column, starting at 1.
12648 @end deftypeivar
12649
12650 @deftypemethod {position} {void} columns (@code{counter_type} @var{width} = 1)
12651 Advance by @var{width} columns, without changing the line number. The
12652 resulting column number cannot be less than 1.
12653 @end deftypemethod
12654
12655 @deftypemethod {position} {position&} operator+= (@code{counter_type} @var{width})
12656 @deftypemethodx {position} {position} operator+ (@code{counter_type} @var{width})
12657 @deftypemethodx {position} {position&} operator-= (@code{counter_type} @var{width})
12658 @deftypemethodx {position} {position} operator- (@code{counter_type} @var{width})
12659 Various forms of syntactic sugar for @code{columns}.
12660 @end deftypemethod
12661
12662 @deftypemethod {position} {bool} operator== (@code{const position&} @var{that})
12663 @deftypemethodx {position} {bool} operator!= (@code{const position&} @var{that})
12664 Whether @code{*this} and @code{that} denote equal/different positions.
12665 @end deftypemethod
12666
12667 @deftypefun {std::ostream&} operator<< (@code{std::ostream&} @var{o}, @code{const position&} @var{p})
12668 Report @var{p} on @var{o} like this:
12669 @samp{@var{file}:@var{line}.@var{column}}, or
12670 @samp{@var{line}.@var{column}} if @var{file} is null.
12671 @end deftypefun
12672
12673 @node C++ location
12674 @subsubsection C++ @code{location}
12675
12676 @deftypeop {Constructor} {location} {} location (@code{const position&} @var{begin}, @code{const position&} @var{end})
12677 Create a @code{Location} from the endpoints of the range.
12678 @end deftypeop
12679
12680 @deftypeop {Constructor} {location} {} location (@code{const position&} @var{pos} = position())
12681 @deftypeopx {Constructor} {location} {} location (@code{filename_type*} @var{file}, @code{counter_type} @var{line}, @code{counter_type} @var{col})
12682 Create a @code{Location} denoting an empty range located at a given point.
12683 @end deftypeop
12684
12685 @deftypemethod {location} {void} initialize (@code{filename_type*} @var{file} = nullptr, @code{counter_type} @var{line} = 1, @code{counter_type} @var{col} = 1)
12686 Reset the location to an empty range at the given values.
12687 @end deftypemethod
12688
12689 @deftypeivar {location} {position} begin
12690 @deftypeivarx {location} {position} end
12691 The first, inclusive, position of the range, and the first beyond.
12692 @end deftypeivar
12693
12694 @deftypemethod {location} {void} columns (@code{counter_type} @var{width} = 1)
12695 @deftypemethodx {location} {void} lines (@code{counter_type} @var{height} = 1)
12696 Forwarded to the @code{end} position.
12697 @end deftypemethod
12698
12699 @deftypemethod  {location} {location} operator+  (@code{counter_type} @var{width})
12700 @deftypemethodx {location} {location} operator+= (@code{counter_type} @var{width})
12701 @deftypemethodx {location} {location} operator-  (@code{counter_type} @var{width})
12702 @deftypemethodx {location} {location} operator-= (@code{counter_type} @var{width})
12703 Various forms of syntactic sugar for @code{columns}.
12704 @end deftypemethod
12705
12706 @deftypemethod {location} {location} operator+ (@code{const location&} @var{end})
12707 @deftypemethodx {location} {location} operator+= (@code{const location&} @var{end})
12708 Join two locations: starts at the position of the first one, and ends at the
12709 position of the second.
12710 @end deftypemethod
12711
12712 @deftypemethod {location} {void} step ()
12713 Move @code{begin} onto @code{end}.
12714 @end deftypemethod
12715
12716 @deftypemethod {location} {bool} operator== (@code{const location&} @var{that})
12717 @deftypemethodx {location} {bool} operator!= (@code{const location&} @var{that})
12718 Whether @code{*this} and @code{that} denote equal/different ranges of
12719 positions.
12720 @end deftypemethod
12721
12722 @deftypefun {std::ostream&} operator<< (@code{std::ostream&} @var{o}, @code{const location&} @var{p})
12723 Report @var{p} on @var{o}, taking care of special cases such as: no
12724 @code{filename} defined, or equal filename/line or column.
12725 @end deftypefun
12726
12727 @node Exposing the Location Classes
12728 @subsubsection Exposing the Location Classes
12729
12730 When both @code{%header} and @code{%locations} are enabled, Bison generates
12731 an additional file: @file{location.hh}.  If you don't use locations outside
12732 of the parser, you may avoid its creation with @samp{%define
12733 api.location.file none}.
12734
12735 However this file is useful if, for instance, your parser builds an abstract
12736 syntax tree decorated with locations: you may use Bison's @code{location}
12737 type independently of Bison's parser.  You may name the file differently,
12738 e.g., @samp{%define api.location.file "include/ast/location.hh"}: this name
12739 can have directory components, or even be absolute.  The way the location
12740 file is included is controlled by @code{api.location.include}.
12741
12742 This way it is possible to have several parsers share the same location
12743 file.
12744
12745 For instance, in @file{src/foo/parser.yy}, generate the
12746 @file{include/ast/loc.hh} file:
12747
12748 @example
12749 // src/foo/parser.yy
12750 %locations
12751 %define api.namespace @{foo@}
12752 %define api.location.file "include/ast/loc.hh"
12753 %define api.location.include @{<ast/loc.hh>@}
12754 @end example
12755
12756 @noindent
12757 and use it in @file{src/bar/parser.yy}:
12758
12759 @example
12760 // src/bar/parser.yy
12761 %locations
12762 %define api.namespace @{bar@}
12763 %code requires @{#include <ast/loc.hh>@}
12764 %define api.location.type @{bar::location@}
12765 @end example
12766
12767 Absolute file names are supported; it is safe in your @file{Makefile} to
12768 pass the flag
12769 @option{-Dapi.location.file='"$(top_srcdir)/include/ast/loc.hh"'} to
12770 @command{bison} for @file{src/foo/parser.yy}.  The generated file will not
12771 have references to this absolute path, thanks to @samp{%define
12772 api.location.include @{<ast/loc.hh>@}}.  Adding @samp{-I
12773 $(top_srcdir)/include} to your @code{CPPFLAGS} will suffice for the compiler
12774 to find @file{ast/loc.hh}.
12775
12776 @node User Defined Location Type
12777 @subsubsection User Defined Location Type
12778 @findex %define api.location.type
12779
12780 Instead of using the built-in types you may use the @code{%define} variable
12781 @code{api.location.type} to specify your own type:
12782
12783 @example
12784 %define api.location.type @{@var{LocationType}@}
12785 @end example
12786
12787 The requirements over your @var{LocationType} are:
12788 @itemize
12789 @item
12790 it must be copyable;
12791
12792 @item
12793 in order to compute the (default) value of @code{@@$} in a reduction, the
12794 parser basically runs
12795 @example
12796 @@$.begin = @@1.begin;
12797 @@$.end   = @@@var{N}.end; // The location of last right-hand side symbol.
12798 @end example
12799 @noindent
12800 so there must be copyable @code{begin} and @code{end} members;
12801
12802 @item
12803 alternatively you may redefine the computation of the default location, in
12804 which case these members are not required (@pxref{Location Default Action});
12805
12806 @item
12807 if traces are enabled, then there must exist an @samp{std::ostream&
12808   operator<< (std::ostream& o, const @var{LocationType}& s)} function.
12809 @end itemize
12810
12811 @sp 1
12812
12813 In programs with several C++ parsers, you may also use the @code{%define}
12814 variable @code{api.location.type} to share a common set of built-in
12815 definitions for @code{position} and @code{location}.  For instance, one
12816 parser @file{master/parser.yy} might use:
12817
12818 @example
12819 %header
12820 %locations
12821 %define api.namespace @{master::@}
12822 @end example
12823
12824 @noindent
12825 to generate the @file{master/position.hh} and @file{master/location.hh}
12826 files, reused by other parsers as follows:
12827
12828 @example
12829 %define api.location.type @{master::location@}
12830 %code requires @{ #include <master/location.hh> @}
12831 @end example
12832
12833
12834 @node C++ Parser Context
12835 @subsection C++ Parser Context
12836
12837 When @samp{%define parse.error custom} is used (@pxref{Syntax Error
12838 Reporting Function}), the user must define the following function.
12839
12840 @deftypemethod {parser} {void} report_syntax_error (@code{const context_type&}@var{ctx}) @code{const}
12841 Report a syntax error to the user.  Whether it uses @code{yyerror} is up to
12842 the user.
12843 @end deftypemethod
12844
12845 Use the following types and functions to build the error message.
12846
12847 @defcv {Type} {parser} {context}
12848 A type that captures the circumstances of the syntax error.
12849 @end defcv
12850
12851 @defcv {Type} {parser} {symbol_kind_type}
12852 An enum of all the grammar symbols, tokens and nonterminals.  Its
12853 enumerators are forged from the symbol names:
12854
12855 @example
12856 struct symbol_kind
12857 @{
12858   enum symbol_kind_type
12859   @{
12860     S_YYEMPTY = -2,      // No symbol.
12861     S_YYEOF = 0,         // "end of file"
12862     S_YYERROR = 1,       // error
12863     S_YYUNDEF = 2,       // "invalid token"
12864     S_PLUS = 3,          // "+"
12865     S_MINUS = 4,         // "-"
12866     [...]
12867     S_VAR = 14,          // "variable"
12868     S_NEG = 15,          // NEG
12869     S_YYACCEPT = 16,     // $accept
12870     S_exp = 17,          // exp
12871     S_input = 18         // input
12872   @};
12873 @};
12874 typedef symbol_kind::symbol_kind_t symbol_kind_type;
12875 @end example
12876 @end defcv
12877
12878 @deftypemethod {context} {const symbol_type&} lookahead () @code{const}
12879 The ``unexpected'' token: the lookahead that caused the syntax error.
12880 @end deftypemethod
12881
12882 @deftypemethod {context} {symbol_kind_type} token () @code{const}
12883 The symbol kind of the lookahead token that caused the syntax error.  Returns
12884 @code{symbol_kind::S_YYEMPTY} if there is no lookahead.
12885 @end deftypemethod
12886
12887 @deftypemethod {context} {const location&} location () @code{const}
12888 The location of the syntax error (that of the lookahead).
12889 @end deftypemethod
12890
12891 @deftypemethod {context} int expected_tokens (@code{symbol_kind_type} @var{argv}@code{[]}, @code{int} @var{argc}) @code{const}
12892 Fill @var{argv} with the expected tokens, which never includes
12893 @code{symbol_kind::S_YYEMPTY}, @code{symbol_kind::S_YYERROR}, or
12894 @code{symbol_kind::S_YYUNDEF}.
12895
12896 Never put more than @var{argc} elements into @var{argv}, and on success
12897 return the number of tokens stored in @var{argv}.  If there are more
12898 expected tokens than @var{argc}, fill @var{argv} up to @var{argc} and return
12899 0.  If there are no expected tokens, also return 0, but set @code{argv[0]}
12900 to @code{symbol_kind::S_YYEMPTY}.
12901
12902 If @var{argv} is null, return the size needed to store all the possible
12903 values, which is always less than @code{YYNTOKENS}.
12904 @end deftypemethod
12905
12906 @deftypemethod {parser} {const char *} symbol_name (@code{symbol_kind_t} @var{symbol}) @code{const}
12907 The name of the symbol whose kind is @var{symbol}, possibly translated.
12908
12909 Returns a @code{std::string} when @code{parse.error} is @code{verbose}.
12910 @end deftypemethod
12911
12912 A custom syntax error function looks as follows.  This implementation is
12913 inappropriate for internationalization, see the @file{c/bistromathic}
12914 example for a better alternative.
12915
12916 @example
12917 void
12918 yy::parser::report_syntax_error (const context& ctx)
12919 @{
12920   int res = 0;
12921   std::cerr << ctx.location () << ": syntax error";
12922   // Report the tokens expected at this point.
12923   @{
12924     enum @{ TOKENMAX = 5 @};
12925     symbol_kind_type expected[TOKENMAX];
12926     int n = ctx.expected_tokens (ctx, expected, TOKENMAX);
12927     for (int i = 0; i < n; ++i)
12928       std::cerr << i == 0 ? ": expected " : " or "
12929                 << symbol_name (expected[i]);
12930   @}
12931   // Report the unexpected token.
12932   @{
12933     symbol_kind_type lookahead = ctx.token ();
12934     if (lookahead != symbol_kind::S_YYEMPTY)
12935       std::cerr << " before " << symbol_name (lookahead));
12936   @}
12937   std::cerr << '\n';
12938 @}
12939 @end example
12940
12941 You still must provide a @code{yyerror} function, used for instance to
12942 report memory exhaustion.
12943
12944
12945 @node C++ Scanner Interface
12946 @subsection C++ Scanner Interface
12947 @c - prefix for yylex.
12948 @c - Pure interface to yylex
12949 @c - %lex-param
12950
12951 The parser invokes the scanner by calling @code{yylex}.  Contrary to C
12952 parsers, C++ parsers are always pure: there is no point in using the
12953 @samp{%define api.pure} directive.  The actual interface with @code{yylex}
12954 depends whether you use unions, or variants.
12955
12956 @menu
12957 * Split Symbols::         Passing symbols as two/three components
12958 * Complete Symbols::      Making symbols a whole
12959 @end menu
12960
12961 @node Split Symbols
12962 @subsubsection Split Symbols
12963
12964 The generated parser expects @code{yylex} to have the following prototype.
12965
12966 @deftypefun {int} yylex (@code{value_type*} @var{yylval}, @code{location_type*} @var{yylloc}, @var{type1} @var{arg1}, @dots{})
12967 @deftypefunx {int} yylex (@code{value_type*} @var{yylval}, @var{type1} @var{arg1}, @dots{})
12968 Return the next token.  Its kind is the return value, its semantic value and
12969 location (if enabled) being @var{yylval} and @var{yylloc}.  Invocations of
12970 @samp{%lex-param @{@var{type1} @var{arg1}@}} yield additional arguments.
12971 @end deftypefun
12972
12973 Note that when using variants, the interface for @code{yylex} is the same,
12974 but @code{yylval} is handled differently.
12975
12976 Regular union-based code in Lex scanner typically looks like:
12977
12978 @example
12979 [0-9]+   @{
12980            yylval->ival = text_to_int (yytext);
12981            return yy::parser::token::INTEGER;
12982          @}
12983 [a-z]+   @{
12984            yylval->sval = new std::string (yytext);
12985            return yy::parser::token::IDENTIFIER;
12986          @}
12987 @end example
12988
12989 Using variants, @code{yylval} is already constructed, but it is not
12990 initialized.  So the code would look like:
12991
12992 @example
12993 [0-9]+   @{
12994            yylval->emplace<int> () = text_to_int (yytext);
12995            return yy::parser::token::INTEGER;
12996          @}
12997 [a-z]+   @{
12998            yylval->emplace<std::string> () = yytext;
12999            return yy::parser::token::IDENTIFIER;
13000          @}
13001 @end example
13002
13003 @noindent
13004 or
13005
13006 @example
13007 [0-9]+   @{
13008            yylval->emplace (text_to_int (yytext));
13009            return yy::parser::token::INTEGER;
13010          @}
13011 [a-z]+   @{
13012            yylval->emplace (yytext);
13013            return yy::parser::token::IDENTIFIER;
13014          @}
13015 @end example
13016
13017
13018 @node Complete Symbols
13019 @subsubsection Complete Symbols
13020
13021 With both @code{%define api.value.type variant} and @code{%define
13022 api.token.constructor}, the parser defines the type @code{symbol_type}, and
13023 expects @code{yylex} to have the following prototype.
13024
13025 @deftypefun {parser::symbol_type} yylex ()
13026 @deftypefunx {parser::symbol_type} yylex (@var{type1} @var{arg1}, @dots{})
13027 Return a @emph{complete} symbol, aggregating its type (i.e., the traditional
13028 value returned by @code{yylex}), its semantic value, and possibly its
13029 location.  Invocations of @samp{%lex-param @{@var{type1} @var{arg1}@}} yield
13030 additional arguments.
13031 @end deftypefun
13032
13033 @defcv {Type} {parser} {symbol_type}
13034 A ``complete symbol'', that binds together its kind, value and (when
13035 applicable) location.
13036 @end defcv
13037
13038 @deftypemethod {symbol_type} {symbol_kind_type} kind () @code{const}
13039 The kind of this symbol.
13040 @end deftypemethod
13041
13042 @deftypemethod {symbol_type} {const char *} name () @code{const}
13043 The name of the kind of this symbol.
13044
13045 Returns a @code{std::string} when @code{parse.error} is @code{verbose}.
13046 @end deftypemethod
13047
13048 @sp 1
13049
13050 For each token kind, Bison generates named constructors as follows.
13051
13052 @deftypeop  {Constructor} {parser::symbol_type} {} {symbol_type} (@code{int} @var{token}, @code{const @var{value_type}&} @var{value}, @code{const location_type&} @var{location})
13053 @deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (@code{int} @var{token}, @code{const location_type&} @var{location})
13054 @deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (@code{int} @var{token}, @code{const @var{value_type}&} @var{value})
13055 @deftypeopx {Constructor} {parser::symbol_type} {} {symbol_type} (@code{int} @var{token})
13056 Build a complete terminal symbol for the token kind @var{token} (including
13057 the @code{api.token.prefix}), whose semantic value, if it has one, is
13058 @var{value} of adequate @var{value_type}.  Pass the @var{location} iff
13059 location tracking is enabled.
13060
13061 Consistency between @var{token} and @var{value_type} is checked via an
13062 @code{assert}.
13063 @end deftypeop
13064
13065 For instance, given the following declarations:
13066
13067 @example
13068 %define api.token.prefix @{TOK_@}
13069 %token <std::string> IDENTIFIER;
13070 %token <int> INTEGER;
13071 %token ':';
13072 @end example
13073
13074 @noindent
13075 you may use these constructors:
13076
13077 @example
13078 symbol_type (int token, const std::string&, const location_type&);
13079 symbol_type (int token, const int&, const location_type&);
13080 symbol_type (int token, const location_type&);
13081 @end example
13082
13083 Correct matching between token kinds and value types is checked via
13084 @code{assert}; for instance, @samp{symbol_type (ID, 42)} would abort.  Named
13085 constructors are preferable (see below), as they offer better type safety
13086 (for instance @samp{make_ID (42)} would not even compile), but symbol_type
13087 constructors may help when token kinds are discovered at run-time, e.g.,
13088
13089 @example
13090 @group
13091 [a-z]+   @{
13092            if (auto i = lookup_keyword (yytext))
13093              return yy::parser::symbol_type (i, loc);
13094            else
13095              return yy::parser::make_ID (yytext, loc);
13096          @}
13097 @end group
13098 @end example
13099
13100 @sp 1
13101
13102 Note that it is possible to generate and compile type incorrect code
13103 (e.g. @samp{symbol_type (':', yytext, loc)}).  It will fail at run time,
13104 provided the assertions are enabled (i.e., @option{-DNDEBUG} was not passed
13105 to the compiler).  Bison supports an alternative that guarantees that type
13106 incorrect code will not even compile.  Indeed, it generates @emph{named
13107 constructors} as follows.
13108
13109 @deftypemethod {parser} {symbol_type} {make_@var{token}} (@code{const @var{value_type}&} @var{value}, @code{const location_type&} @var{location})
13110 @deftypemethodx {parser} {symbol_type} {make_@var{token}} (@code{const location_type&} @var{location})
13111 @deftypemethodx {parser} {symbol_type} {make_@var{token}} (@code{const @var{value_type}&} @var{value})
13112 @deftypemethodx {parser} {symbol_type} {make_@var{token}} ()
13113 Build a complete terminal symbol for the token kind @var{token} (not
13114 including the @code{api.token.prefix}), whose semantic value, if it has one,
13115 is @var{value} of adequate @var{value_type}.  Pass the @var{location} iff
13116 location tracking is enabled.
13117 @end deftypemethod
13118
13119 For instance, given the following declarations:
13120
13121 @example
13122 %define api.token.prefix @{TOK_@}
13123 %token <std::string> IDENTIFIER;
13124 %token <int> INTEGER;
13125 %token COLON;
13126 %token EOF 0;
13127 @end example
13128
13129 @noindent
13130 Bison generates:
13131
13132 @example
13133 symbol_type make_IDENTIFIER (const std::string&, const location_type&);
13134 symbol_type make_INTEGER (const int&, const location_type&);
13135 symbol_type make_COLON (const location_type&);
13136 symbol_type make_EOF (const location_type&);
13137 @end example
13138
13139 @noindent
13140 which should be used in a scanner as follows.
13141
13142 @example
13143 [a-z]+   return yy::parser::make_IDENTIFIER (yytext, loc);
13144 [0-9]+   return yy::parser::make_INTEGER (text_to_int (yytext), loc);
13145 ":"      return yy::parser::make_COLON (loc);
13146 <<EOF>>  return yy::parser::make_EOF (loc);
13147 @end example
13148
13149 Tokens that do not have an identifier are not accessible: you cannot simply
13150 use characters such as @code{':'}, they must be declared with @code{%token},
13151 including the end-of-file token.
13152
13153
13154 @node A Complete C++ Example
13155 @subsection A Complete C++ Example
13156
13157 This section demonstrates the use of a C++ parser with a simple but complete
13158 example.  This example should be available on your system, ready to compile,
13159 in the directory @file{examples/c++/calc++}.  It focuses on
13160 the use of Bison, therefore the design of the various C++ classes is very
13161 naive: no accessors, no encapsulation of members etc.  We will use a Lex
13162 scanner, and more precisely, a Flex scanner, to demonstrate the various
13163 interactions.  A hand-written scanner is actually easier to interface with.
13164
13165 @menu
13166 * Calc++ --- C++ Calculator::   The specifications
13167 * Calc++ Parsing Driver::       An active parsing context
13168 * Calc++ Parser::               A parser class
13169 * Calc++ Scanner::              A pure C++ Flex scanner
13170 * Calc++ Top Level::            Conducting the band
13171 @end menu
13172
13173 @node Calc++ --- C++ Calculator
13174 @subsubsection Calc++ --- C++ Calculator
13175
13176 Of course the grammar is dedicated to arithmetic, a single expression,
13177 possibly preceded by variable assignments.  An environment containing
13178 possibly predefined variables such as @code{one} and @code{two}, is
13179 exchanged with the parser.  An example of valid input follows.
13180
13181 @example
13182 three := 3
13183 seven := one + two * three
13184 seven * seven
13185 @end example
13186
13187 @node Calc++ Parsing Driver
13188 @subsubsection Calc++ Parsing Driver
13189 @c - An env
13190 @c - A place to store error messages
13191 @c - A place for the result
13192
13193 To support a pure interface with the parser (and the scanner) the technique
13194 of the ``parsing context'' is convenient: a structure containing all the
13195 data to exchange.  Since, in addition to simply launch the parsing, there
13196 are several auxiliary tasks to execute (open the file for scanning,
13197 instantiate the parser etc.), we recommend transforming the simple parsing
13198 context structure into a fully blown @dfn{parsing driver} class.
13199
13200 The declaration of this driver class, in @file{driver.hh}, is as follows.
13201 The first part includes the CPP guard and imports the required standard
13202 library components, and the declaration of the parser class.
13203
13204 @ignore
13205 @comment file: c++/calc++/driver.hh
13206 @example
13207 /* Driver for calc++.   -*- C++ -*-
13208
13209    Copyright (C) 2005-2015, 2018-2021 Free Software Foundation, Inc.
13210
13211    This file is part of Bison, the GNU Compiler Compiler.
13212
13213    This program is free software: you can redistribute it and/or modify
13214    it under the terms of the GNU General Public License as published by
13215    the Free Software Foundation, either version 3 of the License, or
13216    (at your option) any later version.
13217
13218    This program is distributed in the hope that it will be useful,
13219    but WITHOUT ANY WARRANTY; without even the implied warranty of
13220    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13221    GNU General Public License for more details.
13222
13223    You should have received a copy of the GNU General Public License
13224    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
13225 @end example
13226 @end ignore
13227
13228 @comment file: c++/calc++/driver.hh
13229 @example
13230 #ifndef DRIVER_HH
13231 # define DRIVER_HH
13232 # include <string>
13233 # include <map>
13234 # include "parser.hh"
13235 @end example
13236
13237
13238 @noindent
13239 Then comes the declaration of the scanning function.  Flex expects the
13240 signature of @code{yylex} to be defined in the macro @code{YY_DECL}, and the
13241 C++ parser expects it to be declared.  We can factor both as follows.
13242
13243 @comment file: c++/calc++/driver.hh
13244 @example
13245 // Give Flex the prototype of yylex we want ...
13246 # define YY_DECL \
13247   yy::parser::symbol_type yylex (driver& drv)
13248 // ... and declare it for the parser's sake.
13249 YY_DECL;
13250 @end example
13251
13252 @noindent
13253 The @code{driver} class is then declared with its most obvious members.
13254
13255 @comment file: c++/calc++/driver.hh
13256 @example
13257 // Conducting the whole scanning and parsing of Calc++.
13258 class driver
13259 @{
13260 public:
13261   driver ();
13262
13263   std::map<std::string, int> variables;
13264
13265   int result;
13266 @end example
13267
13268 @noindent
13269 The main routine is of course calling the parser.
13270
13271 @comment file: c++/calc++/driver.hh
13272 @example
13273   // Run the parser on file F.  Return 0 on success.
13274   int parse (const std::string& f);
13275   // The name of the file being parsed.
13276   std::string file;
13277   // Whether to generate parser debug traces.
13278   bool trace_parsing;
13279 @end example
13280
13281 @noindent
13282 To encapsulate the coordination with the Flex scanner, it is useful to have
13283 member functions to open and close the scanning phase.
13284
13285 @comment file: c++/calc++/driver.hh
13286 @example
13287   // Handling the scanner.
13288   void scan_begin ();
13289   void scan_end ();
13290   // Whether to generate scanner debug traces.
13291   bool trace_scanning;
13292   // The token's location used by the scanner.
13293   yy::location location;
13294 @};
13295 #endif // ! DRIVER_HH
13296 @end example
13297
13298 The implementation of the driver (@file{driver.cc}) is straightforward.
13299
13300 @ignore
13301 @comment file: c++/calc++/driver.cc
13302 @example
13303 /* Driver for calc++.   -*- C++ -*-
13304
13305    Copyright (C) 2005-2015, 2018-2021 Free Software Foundation, Inc.
13306
13307    This file is part of Bison, the GNU Compiler Compiler.
13308
13309    This program is free software: you can redistribute it and/or modify
13310    it under the terms of the GNU General Public License as published by
13311    the Free Software Foundation, either version 3 of the License, or
13312    (at your option) any later version.
13313
13314    This program is distributed in the hope that it will be useful,
13315    but WITHOUT ANY WARRANTY; without even the implied warranty of
13316    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13317    GNU General Public License for more details.
13318
13319    You should have received a copy of the GNU General Public License
13320    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
13321 @end example
13322 @end ignore
13323
13324 @comment file: c++/calc++/driver.cc
13325 @example
13326 #include "driver.hh"
13327 #include "parser.hh"
13328
13329 @group
13330 driver::driver ()
13331   : trace_parsing (false), trace_scanning (false)
13332 @{
13333   variables["one"] = 1;
13334   variables["two"] = 2;
13335 @}
13336 @end group
13337 @end example
13338
13339 The @code{parse} member function deserves some attention.
13340
13341 @comment file: c++/calc++/driver.cc
13342 @example
13343 @group
13344 int
13345 driver::parse (const std::string &f)
13346 @{
13347   file = f;
13348   location.initialize (&file);
13349   scan_begin ();
13350   yy::parser parse (*this);
13351   parse.set_debug_level (trace_parsing);
13352   int res = parse ();
13353   scan_end ();
13354   return res;
13355 @}
13356 @end group
13357 @end example
13358
13359 @node Calc++ Parser
13360 @subsubsection Calc++ Parser
13361
13362 The grammar file @file{parser.yy} starts by asking for the C++ deterministic
13363 parser skeleton, the creation of the parser header file.  Because the C++
13364 skeleton changed several times, it is safer to require the version you
13365 designed the grammar for.
13366
13367 @ignore
13368 @comment file: c++/calc++/parser.yy
13369 @example
13370 /* Parser for calc++.   -*- C++ -*-
13371
13372    Copyright (C) 2005-2015, 2018-2021 Free Software Foundation, Inc.
13373
13374    This file is part of Bison, the GNU Compiler Compiler.
13375
13376    This program is free software: you can redistribute it and/or modify
13377    it under the terms of the GNU General Public License as published by
13378    the Free Software Foundation, either version 3 of the License, or
13379    (at your option) any later version.
13380
13381    This program is distributed in the hope that it will be useful,
13382    but WITHOUT ANY WARRANTY; without even the implied warranty of
13383    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13384    GNU General Public License for more details.
13385
13386    You should have received a copy of the GNU General Public License
13387    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
13388 @end example
13389 @end ignore
13390
13391 @comment file: c++/calc++/parser.yy
13392 @example
13393 %skeleton "lalr1.cc" // -*- C++ -*-
13394 %require "@value{VERSION}"
13395 %header
13396 @end example
13397
13398 @noindent
13399 @findex %define api.token.raw
13400 Because our scanner returns only genuine tokens and never simple characters
13401 (i.e., it returns @samp{PLUS}, not @samp{'+'}), we can avoid conversions.
13402
13403 @comment file: c++/calc++/parser.yy
13404 @example
13405 %define api.token.raw
13406 @end example
13407
13408 @noindent
13409 @findex %define api.token.constructor
13410 @findex %define api.value.type variant
13411 This example uses genuine C++ objects as semantic values, therefore, we
13412 require the variant-based storage of semantic values.  To make sure we
13413 properly use it, we enable assertions.  To fully benefit from type-safety
13414 and more natural definition of ``symbol'', we enable
13415 @code{api.token.constructor}.
13416
13417 @comment file: c++/calc++/parser.yy
13418 @example
13419 %define api.token.constructor
13420 %define api.value.type variant
13421 %define parse.assert
13422 @end example
13423
13424 @noindent
13425 @findex %code requires
13426 Then come the declarations/inclusions needed by the semantic values.
13427 Because the parser uses the parsing driver and reciprocally, both would like
13428 to include the header of the other, which is, of course, insane.  This
13429 mutual dependency will be broken using forward declarations.  Because the
13430 driver's header needs detailed knowledge about the parser class (in
13431 particular its inner types), it is the parser's header which will use a
13432 forward declaration of the driver.  @xref{%code Summary}.
13433
13434 @comment file: c++/calc++/parser.yy
13435 @example
13436 @group
13437 %code requires @{
13438   # include <string>
13439   class driver;
13440 @}
13441 @end group
13442 @end example
13443
13444 @noindent
13445 The driver is passed by reference to the parser and to the scanner.
13446 This provides a simple but effective pure interface, not relying on
13447 global variables.
13448
13449 @comment file: c++/calc++/parser.yy
13450 @example
13451 // The parsing context.
13452 %param @{ driver& drv @}
13453 @end example
13454
13455 @noindent
13456 Then we request location tracking.
13457
13458 @comment file: c++/calc++/parser.yy
13459 @example
13460 %locations
13461 @end example
13462
13463 @noindent
13464 Use the following two directives to enable parser tracing and detailed error
13465 messages.  However, detailed error messages can contain incorrect
13466 information if lookahead correction is not enabled (@pxref{LAC}).
13467
13468 @comment file: c++/calc++/parser.yy
13469 @example
13470 %define parse.trace
13471 %define parse.error detailed
13472 %define parse.lac full
13473 @end example
13474
13475 @noindent
13476 @findex %code
13477 The code between @samp{%code @{} and @samp{@}} is output in the @file{*.cc}
13478 file; it needs detailed knowledge about the driver.
13479
13480 @comment file: c++/calc++/parser.yy
13481 @example
13482 @group
13483 %code @{
13484 # include "driver.hh"
13485 @}
13486 @end group
13487 @end example
13488
13489
13490 @noindent
13491 User friendly names are provided for each symbol.  To avoid name clashes in
13492 the generated files (@pxref{Calc++ Scanner}), prefix tokens with @code{TOK_}
13493 (@pxref{%define Summary}).
13494
13495 @comment file: c++/calc++/parser.yy
13496 @example
13497 %define api.token.prefix @{TOK_@}
13498 %token
13499   ASSIGN  ":="
13500   MINUS   "-"
13501   PLUS    "+"
13502   STAR    "*"
13503   SLASH   "/"
13504   LPAREN  "("
13505   RPAREN  ")"
13506 ;
13507 @end example
13508
13509 @noindent
13510 Since we use variant-based semantic values, @code{%union} is not used, and
13511 @code{%token}, @code{%nterm} and @code{%type} expect genuine types, not type
13512 tags.
13513
13514 @comment file: c++/calc++/parser.yy
13515 @example
13516 %token <std::string> IDENTIFIER "identifier"
13517 %token <int> NUMBER "number"
13518 %nterm <int> exp
13519 @end example
13520
13521 @noindent
13522 No @code{%destructor} is needed to enable memory deallocation during error
13523 recovery; the memory, for strings for instance, will be reclaimed by the
13524 regular destructors.  All the values are printed using their
13525 @code{operator<<} (@pxref{Printer Decl}).
13526
13527 @comment file: c++/calc++/parser.yy
13528 @example
13529 %printer @{ yyo << $$; @} <*>;
13530 @end example
13531
13532 @noindent
13533 The grammar itself is straightforward (@pxref{Location Tracking Calc}).
13534
13535 @comment file: c++/calc++/parser.yy
13536 @example
13537 %%
13538 %start unit;
13539 unit: assignments exp  @{ drv.result = $2; @};
13540
13541 assignments:
13542   %empty                 @{@}
13543 | assignments assignment @{@};
13544
13545 assignment:
13546   "identifier" ":=" exp @{ drv.variables[$1] = $3; @};
13547
13548 %left "+" "-";
13549 %left "*" "/";
13550 exp:
13551   "number"
13552 | "identifier"  @{ $$ = drv.variables[$1]; @}
13553 | exp "+" exp   @{ $$ = $1 + $3; @}
13554 | exp "-" exp   @{ $$ = $1 - $3; @}
13555 | exp "*" exp   @{ $$ = $1 * $3; @}
13556 | exp "/" exp   @{ $$ = $1 / $3; @}
13557 | "(" exp ")"   @{ $$ = $2; @}
13558 %%
13559 @end example
13560
13561 @noindent
13562 Finally the @code{error} member function reports the errors.
13563
13564 @comment file: c++/calc++/parser.yy
13565 @example
13566 void
13567 yy::parser::error (const location_type& l, const std::string& m)
13568 @{
13569   std::cerr << l << ": " << m << '\n';
13570 @}
13571 @end example
13572
13573 @node Calc++ Scanner
13574 @subsubsection Calc++ Scanner
13575
13576 In addition to standard headers, the Flex scanner includes the driver's,
13577 then the parser's to get the set of defined tokens.
13578
13579 @ignore
13580 @comment file: c++/calc++/scanner.ll
13581 @example
13582 /* Scanner for calc++.   -*- C++ -*-
13583
13584    Copyright (C) 2005-2015, 2018-2021 Free Software Foundation, Inc.
13585
13586    This file is part of Bison, the GNU Compiler Compiler.
13587
13588    This program is free software: you can redistribute it and/or modify
13589    it under the terms of the GNU General Public License as published by
13590    the Free Software Foundation, either version 3 of the License, or
13591    (at your option) any later version.
13592
13593    This program is distributed in the hope that it will be useful,
13594    but WITHOUT ANY WARRANTY; without even the implied warranty of
13595    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13596    GNU General Public License for more details.
13597
13598    You should have received a copy of the GNU General Public License
13599    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
13600 @end example
13601 @end ignore
13602
13603 @comment file: c++/calc++/scanner.ll
13604 @example
13605 %@{ /* -*- C++ -*- */
13606 # include <cerrno>
13607 # include <climits>
13608 # include <cstdlib>
13609 # include <cstring> // strerror
13610 # include <string>
13611 # include "driver.hh"
13612 # include "parser.hh"
13613 %@}
13614 @end example
13615
13616 @ignore
13617 @comment file: c++/calc++/scanner.ll
13618 @example
13619 %@{
13620 #if defined __clang__
13621 # define CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
13622 #endif
13623
13624 // Clang and ICC like to pretend they are GCC.
13625 #if defined __GNUC__ && !defined __clang__ && !defined __ICC
13626 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
13627 #endif
13628
13629 // Pacify warnings in yy_init_buffer (observed with Flex 2.6.4)
13630 // and GCC 6.4.0, 7.3.0 with -O3.
13631 #if defined GCC_VERSION && 600 <= GCC_VERSION
13632 # pragma GCC diagnostic ignored "-Wnull-dereference"
13633 #endif
13634
13635 // This example uses Flex's C back end, yet compiles it as C++.
13636 // So expect warnings about C style casts and NULL.
13637 #if defined CLANG_VERSION && 500 <= CLANG_VERSION
13638 # pragma clang diagnostic ignored "-Wold-style-cast"
13639 # pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
13640 #elif defined GCC_VERSION && 407 <= GCC_VERSION
13641 # pragma GCC diagnostic ignored "-Wold-style-cast"
13642 # pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
13643 #endif
13644
13645 #define FLEX_VERSION (YY_FLEX_MAJOR_VERSION * 100 + YY_FLEX_MINOR_VERSION)
13646
13647 // Old versions of Flex (2.5.35) generate an incomplete documentation comment.
13648 //
13649 //  In file included from src/scan-code-c.c:3:
13650 //  src/scan-code.c:2198:21: error: empty paragraph passed to '@param' command
13651 //        [-Werror,-Wdocumentation]
13652 //   * @param line_number
13653 //     ~~~~~~~~~~~~~~~~~^
13654 //  1 error generated.
13655 #if FLEX_VERSION < 206 && defined CLANG_VERSION
13656 # pragma clang diagnostic ignored "-Wdocumentation"
13657 #endif
13658
13659 // Old versions of Flex (2.5.35) use 'register'.  Warnings introduced in
13660 // GCC 7 and Clang 6.
13661 #if FLEX_VERSION < 206
13662 # if defined CLANG_VERSION && 600 <= CLANG_VERSION
13663 #  pragma clang diagnostic ignored "-Wdeprecated-register"
13664 # elif defined GCC_VERSION && 700 <= GCC_VERSION
13665 #  pragma GCC diagnostic ignored "-Wregister"
13666 # endif
13667 #endif
13668
13669 #if FLEX_VERSION < 206
13670 # if defined CLANG_VERSION
13671 #  pragma clang diagnostic ignored "-Wconversion"
13672 #  pragma clang diagnostic ignored "-Wdocumentation"
13673 #  pragma clang diagnostic ignored "-Wshorten-64-to-32"
13674 #  pragma clang diagnostic ignored "-Wsign-conversion"
13675 # elif defined GCC_VERSION
13676 #  pragma GCC diagnostic ignored "-Wconversion"
13677 #  pragma GCC diagnostic ignored "-Wsign-conversion"
13678 # endif
13679 #endif
13680
13681 // Flex 2.6.4, GCC 9
13682 // warning: useless cast to type 'int' [-Wuseless-cast]
13683 // 1361 |   YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2);
13684 //      |                                                 ^
13685 #if defined GCC_VERSION && 900 <= GCC_VERSION
13686 # pragma GCC diagnostic ignored "-Wuseless-cast"
13687 #endif
13688 %@}
13689 @end example
13690 @end ignore
13691
13692 @noindent
13693 Since our calculator has no @code{#include}-like feature, we don't need
13694 @code{yywrap}.  We don't need the @code{unput} and @code{input} functions
13695 either, and we parse an actual file, this is not an interactive session with
13696 the user.  Finally, we enable scanner tracing.
13697
13698 @comment file: c++/calc++/scanner.ll
13699 @example
13700 %option noyywrap nounput noinput batch debug
13701 @end example
13702
13703 @noindent
13704 The following function will be handy to convert a string denoting a number
13705 into a @code{NUMBER} token.
13706
13707 @comment file: c++/calc++/scanner.ll
13708 @example
13709 %@{
13710   // A number symbol corresponding to the value in S.
13711   yy::parser::symbol_type
13712   make_NUMBER (const std::string &s, const yy::parser::location_type& loc);
13713 %@}
13714 @end example
13715
13716 @noindent
13717 Abbreviations allow for more readable rules.
13718
13719 @comment file: c++/calc++/scanner.ll
13720 @example
13721 id    [a-zA-Z][a-zA-Z_0-9]*
13722 int   [0-9]+
13723 blank [ \t\r]
13724 @end example
13725
13726 @noindent
13727 The following paragraph suffices to track locations accurately.  Each time
13728 @code{yylex} is invoked, the begin position is moved onto the end position.
13729 Then when a pattern is matched, its width is added to the end column.  When
13730 matching ends of lines, the end cursor is adjusted, and each time blanks are
13731 matched, the begin cursor is moved onto the end cursor to effectively ignore
13732 the blanks preceding tokens.  Comments would be treated equally.
13733
13734 @comment file: c++/calc++/scanner.ll
13735 @example
13736 @group
13737 %@{
13738   // Code run each time a pattern is matched.
13739   # define YY_USER_ACTION  loc.columns (yyleng);
13740 %@}
13741 @end group
13742 %%
13743 @group
13744 %@{
13745   // A handy shortcut to the location held by the driver.
13746   yy::location& loc = drv.location;
13747   // Code run each time yylex is called.
13748   loc.step ();
13749 %@}
13750 @end group
13751 @{blank@}+   loc.step ();
13752 \n+        loc.lines (yyleng); loc.step ();
13753 @end example
13754
13755 @noindent
13756 The rules are simple.  The driver is used to report errors.
13757
13758 @comment file: c++/calc++/scanner.ll
13759 @example
13760 "-"        return yy::parser::make_MINUS  (loc);
13761 "+"        return yy::parser::make_PLUS   (loc);
13762 "*"        return yy::parser::make_STAR   (loc);
13763 "/"        return yy::parser::make_SLASH  (loc);
13764 "("        return yy::parser::make_LPAREN (loc);
13765 ")"        return yy::parser::make_RPAREN (loc);
13766 ":="       return yy::parser::make_ASSIGN (loc);
13767
13768 @{int@}      return make_NUMBER (yytext, loc);
13769 @{id@}       return yy::parser::make_IDENTIFIER (yytext, loc);
13770 @group
13771 .          @{
13772              throw yy::parser::syntax_error
13773                (loc, "invalid character: " + std::string(yytext));
13774 @}
13775 @end group
13776 <<EOF>>    return yy::parser::make_YYEOF (loc);
13777 %%
13778 @end example
13779
13780 @noindent
13781 You should keep your rules simple, both in the parser and in the scanner.
13782 Throwing from the auxiliary functions is then very handy to report errors.
13783
13784 @comment file: c++/calc++/scanner.ll
13785 @example
13786 @group
13787 yy::parser::symbol_type
13788 make_NUMBER (const std::string &s, const yy::parser::location_type& loc)
13789 @{
13790   errno = 0;
13791   long n = strtol (s.c_str(), NULL, 10);
13792   if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
13793     throw yy::parser::syntax_error (loc, "integer is out of range: " + s);
13794   return yy::parser::make_NUMBER ((int) n, loc);
13795 @}
13796 @end group
13797 @end example
13798
13799 @noindent
13800 Finally, because the scanner-related driver's member-functions depend
13801 on the scanner's data, it is simpler to implement them in this file.
13802
13803 @comment file: c++/calc++/scanner.ll
13804 @example
13805 @group
13806 void
13807 driver::scan_begin ()
13808 @{
13809   yy_flex_debug = trace_scanning;
13810   if (file.empty () || file == "-")
13811     yyin = stdin;
13812   else if (!(yyin = fopen (file.c_str (), "r")))
13813     @{
13814       std::cerr << "cannot open " << file << ": " << strerror (errno) << '\n';
13815       exit (EXIT_FAILURE);
13816     @}
13817 @}
13818 @end group
13819
13820 @group
13821 void
13822 driver::scan_end ()
13823 @{
13824   fclose (yyin);
13825 @}
13826 @end group
13827 @end example
13828
13829 @node Calc++ Top Level
13830 @subsubsection Calc++ Top Level
13831
13832 The top level file, @file{calc++.cc}, poses no problem.
13833
13834 @ignore
13835 @comment file: c++/calc++/calc++.cc
13836 @example
13837 /* Main for calc++.   -*- C++ -*-
13838
13839    Copyright (C) 2005-2015, 2018-2021 Free Software Foundation, Inc.
13840
13841    This file is part of Bison, the GNU Compiler Compiler.
13842
13843    This program is free software: you can redistribute it and/or modify
13844    it under the terms of the GNU General Public License as published by
13845    the Free Software Foundation, either version 3 of the License, or
13846    (at your option) any later version.
13847
13848    This program is distributed in the hope that it will be useful,
13849    but WITHOUT ANY WARRANTY; without even the implied warranty of
13850    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13851    GNU General Public License for more details.
13852
13853    You should have received a copy of the GNU General Public License
13854    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
13855 @end example
13856 @end ignore
13857
13858 @comment file: c++/calc++/calc++.cc
13859 @example
13860 #include <iostream>
13861 #include "driver.hh"
13862
13863 @group
13864 int
13865 main (int argc, char *argv[])
13866 @{
13867   int res = 0;
13868   driver drv;
13869   for (int i = 1; i < argc; ++i)
13870     if (argv[i] == std::string ("-p"))
13871       drv.trace_parsing = true;
13872     else if (argv[i] == std::string ("-s"))
13873       drv.trace_scanning = true;
13874     else if (!drv.parse (argv[i]))
13875       std::cout << drv.result << '\n';
13876     else
13877       res = 1;
13878   return res;
13879 @}
13880 @end group
13881 @end example
13882
13883 @node D Parsers
13884 @section D Parsers
13885
13886 @menu
13887 * D Bison Interface::        Asking for D parser generation
13888 * D Semantic Values::        %token and %nterm vs. D
13889 * D Location Values::        The position and location classes
13890 * D Parser Interface::       Instantiating and running the parser
13891 * D Parser Context Interface:: Circumstances of a syntax error
13892 * D Scanner Interface::      Specifying the scanner for the parser
13893 * D Action Features::        Special features for use in actions
13894 * D Push Parser Interface::  Instantiating and running the push parser
13895 * D Complete Symbols::       Using token constructors
13896 @end menu
13897
13898 @node D Bison Interface
13899 @subsection D Bison Interface
13900 @c - %language "D"
13901
13902 The D parser skeletons are selected using the @code{%language "D"}
13903 directive or the @option{-L D}/@option{--language=D} option.
13904
13905 @c FIXME: Documented bug.
13906 When generating a D parser, @samp{bison @var{basename}.y} will create a
13907 single D source file named @file{@var{basename}.d} containing the
13908 parser implementation.  Using a grammar file without a @file{.y} suffix is
13909 currently broken.  The basename of the parser implementation file can be
13910 changed by the @code{%file-prefix} directive or the
13911 @option{-b}/@option{--file-prefix} option.  The entire parser implementation
13912 file name can be changed by the @code{%output} directive or the
13913 @option{-o}/@option{--output} option.  The parser implementation file
13914 contains a single class for the parser.
13915
13916 You can create documentation for generated parsers using Ddoc.
13917
13918 GLR parsers are currently unsupported in D.  Do not use the
13919 @code{glr-parser} directive.
13920
13921 No header file can be generated for D parsers.  Do not use the
13922 @code{%header} directive or the @option{-d}/@option{--header} options.
13923
13924 @node D Semantic Values
13925 @subsection D Semantic Values
13926
13927 Semantic types are handled by @code{%union} and @samp{%define api.value.type
13928 union}, similar to C/C++ parsers. In the latter case, the union of the
13929 values is handled by the backend. In D, unions can hold classes, structs,
13930 etc., so this directive is more similar to @samp{%define api.value.type
13931 variant} from C++.
13932
13933 D parsers do not support @code{%destructor}, since the language
13934 adopts garbage collection.  The parser will try to hold references
13935 to semantic values for as little time as needed.
13936
13937 D parsers support @code{%printer}.  An example for the output of type
13938 @code{int}, where @code{yyo} is the parser's debug output:
13939
13940 @example
13941 %printer @{ yyo.write($$); @} <int>
13942 @end example
13943
13944
13945 @node D Location Values
13946 @subsection D Location Values
13947 @c - %locations
13948 @c - class Position
13949 @c - class Location
13950
13951 When the directive @code{%locations} is used, the D parser supports location
13952 tracking, see @ref{Tracking Locations}.  The position and the location
13953 structures are provided.
13954
13955 @deftypeivar {Location} {Position} begin
13956 @deftypeivarx {Location} {Position} end
13957 The first, inclusive, position of the range, and the first beyond.
13958 @end deftypeivar
13959
13960 @deftypeop {Constructor} {Location} {} this(@code{Position} @var{loc})
13961 Create a @code{Location} denoting an empty range located at a given point.
13962 @end deftypeop
13963
13964 @deftypeop {Constructor} {Location} {} this(@code{Position} @var{begin}, @code{Position} @var{end})
13965 Create a @code{Location} from the endpoints of the range.
13966 @end deftypeop
13967
13968 @deftypemethod {Location} {string} toString()
13969 The range represented by the location as a string.
13970 @end deftypemethod
13971
13972
13973 @node D Parser Interface
13974 @subsection D Parser Interface
13975
13976 The name of the generated parser class defaults to @code{YYParser}.  The
13977 @code{YY} prefix may be changed using the @samp{%define api.prefix}.
13978 Alternatively, use @samp{%define api.parser.class @{@var{name}@}} to give a
13979 custom name to the class.  The interface of this class is detailed below.
13980
13981 By default, the parser class has public visibility.  To add modifiers to the
13982 parser class, @code{%define} @code{api.parser.public},
13983 @code{api.parser.abstract} and/or @code{api.parser.final}.
13984
13985 The superclass and the implemented interfaces of the parser class can be
13986 specified with the @samp{%define api.parser.extends} and @samp{%define
13987 api.parser.implements} directives.
13988
13989 The parser class defines an interface, @code{Lexer} (@pxref{D Scanner
13990 Interface}).  Other than this interface and the members described in the
13991 interface below, all the other members and fields are preceded with a
13992 @code{yy} or @code{YY} prefix to avoid clashes with user code.
13993
13994 The parser class can be extended using the @code{%parse-param}
13995 directive. Each occurrence of the directive will add a by default public
13996 field to the parser class, and an argument to its constructor, which
13997 initializes them automatically.
13998
13999 @deftypeop {Constructor} {YYParser} {} this(@var{lex_param}, @dots{}, @var{parse_param}, @dots{})
14000 Build a new parser object with embedded @samp{%code lexer}.  There are no
14001 parameters, unless @code{%param}s and/or @code{%parse-param}s and/or
14002 @code{%lex-param}s are used.
14003 @end deftypeop
14004
14005 @deftypeop {Constructor} {YYParser} {} this(@code{Lexer} @var{lexer}, @var{parse_param}, @dots{})
14006 Build a new parser object using the specified scanner.  There are no
14007 additional parameters unless @code{%param}s and/or @code{%parse-param}s are
14008 used.
14009 @end deftypeop
14010
14011 @deftypemethod {YYParser} {boolean} parse()
14012 Run the syntactic analysis, and return @code{true} on success,
14013 @code{false} otherwise.
14014 @end deftypemethod
14015
14016 @deftypemethod {YYParser} {boolean} getErrorVerbose()
14017 @deftypemethodx {YYParser} {void} setErrorVerbose(boolean @var{verbose})
14018 Get or set the option to produce verbose error messages.  These are only
14019 available with @samp{%define parse.error detailed},
14020 which also turns on verbose error messages.
14021 @end deftypemethod
14022
14023 @deftypemethod {YYParser} {void} yyerror(@code{string} @var{msg})
14024 @deftypemethodx {YYParser} {void} yyerror(@code{Location} @var{loc}, @code{string} @var{msg})
14025 Print an error message using the @code{yyerror} method of the scanner
14026 instance in use. The @code{Location} and @code{Position} parameters are
14027 available only if location tracking is active.
14028 @end deftypemethod
14029
14030 @deftypemethod {YYParser} {boolean} recovering()
14031 During the syntactic analysis, return @code{true} if recovering
14032 from a syntax error.
14033 @xref{Error Recovery}.
14034 @end deftypemethod
14035
14036 @deftypemethod {YYParser} {File} getDebugStream()
14037 @deftypemethodx {YYParser} {void} setDebugStream(@code{File} @var{o})
14038 Get or set the stream used for tracing the parsing.  It defaults to
14039 @code{stderr}.
14040 @end deftypemethod
14041
14042 @deftypemethod {YYParser} {int} getDebugLevel()
14043 @deftypemethodx {YYParser} {void} setDebugLevel(@code{int} @var{l})
14044 Get or set the tracing level.  Currently its value is either 0, no trace,
14045 or nonzero, full tracing.
14046 @end deftypemethod
14047
14048 @deftypecv {Constant} {YYParser} {string} {bisonVersion}
14049 @deftypecvx {Constant} {YYParser} {string} {bisonSkeleton}
14050 Identify the Bison version and skeleton used to generate this parser.
14051 @end deftypecv
14052
14053 The internationalization in D is very similar to the one in C. The D
14054 parser uses @code{dgettext} for translating Bison messages.
14055
14056 To enable internationalization, compile using @samp{-version ENABLE_NLS
14057 -version YYENABLE_NLS} and import @code{bindtextdomain} and
14058 @code{textdomain} from C:
14059
14060 @example
14061 extern(C) char* bindtextdomain(const char* domainname, const char* dirname);
14062 extern(C) char* textdomain(const char* domainname);
14063 @end example
14064
14065 The main function should load the translation catalogs, similarly to the
14066 @file{c/bistromathic} example:
14067
14068 @example
14069 int main()
14070 @{
14071   import core.stdc.locale;
14072
14073   // Set up internationalization.
14074   setlocale(LC_ALL, "");
14075   // Use Bison's standard translation catalog for error messages
14076   // (the generated messages).
14077   bindtextdomain("bison-runtime", BISON_LOCALEDIR);
14078   // For the translation catalog of your own project, use the
14079   // name of your project.
14080   bindtextdomain("bison", LOCALEDIR);
14081   textdomain("bison");
14082
14083   // usual main content
14084   ...
14085 @}
14086 @end example
14087
14088 For user message translations, the user must implement the @samp{string
14089 _(const char* @var{msg})} function.  It is recommended to use
14090 @code{gettext}:
14091
14092 @example
14093 %code imports @{
14094   static if (!is(typeof(_)))
14095   @{
14096     version(ENABLE_NLS)
14097     @{
14098       extern(C) char* gettext(const char*);
14099       string _(const char* s)
14100       @{
14101         return to!string(gettext(s));
14102       @}
14103     @}
14104   @}
14105   static if (!is(typeof(_)))
14106   @{
14107     pragma(inline, true)
14108     string _(string msg) @{ return msg; @}
14109   @}
14110 @}
14111 @end example
14112
14113 @node D Parser Context Interface
14114 @subsection D Parser Context Interface
14115 The parser context provides information to build error reports when you
14116 invoke @samp{%define parse.error custom}.
14117
14118 @defcv {Type} {YYParser} {SymbolKind}
14119 A struct containing an enum of all the grammar symbols, tokens and
14120 nonterminals.  Its enumerators are forged from the symbol names.  Use
14121 @samp{void toString(W)(W sink)} to get the symbol names.
14122 @end defcv
14123
14124 @deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken()
14125 The kind of the lookahead.  Return @code{null} iff there is no lookahead.
14126 @end deftypemethod
14127
14128 @deftypemethod {YYParser.Context} {YYParser.Location} getLocation()
14129 The location of the lookahead.
14130 @end deftypemethod
14131
14132 @deftypemethod {YYParser.Context} {int} getExpectedTokens(@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc})
14133 Fill @var{argv} with the expected tokens, which never includes
14134 @code{SymbolKind.YYERROR}, or @code{SymbolKind.YYUNDEF}.
14135
14136 Never put more than @var{argc} elements into @var{argv}, and on success
14137 return the number of tokens stored in @var{argv}.  If there are more
14138 expected tokens than @var{argc}, fill @var{argv} up to @var{argc} and return
14139 0.  If there are no expected tokens, also return 0, but set @code{argv[0]}
14140 to @code{null}.
14141
14142 If @var{argv} is null, return the size needed to store all the possible
14143 values, which is always less than @code{YYNTOKENS}.
14144 @end deftypemethod
14145
14146
14147 @node D Scanner Interface
14148 @subsection D Scanner Interface
14149 @c - %code lexer
14150 @c - %lex-param
14151 @c - Lexer interface
14152
14153 There are two possible ways to interface a Bison-generated D parser
14154 with a scanner: the scanner may be defined by @code{%code lexer}, or
14155 defined elsewhere.  In either case, the scanner has to implement the
14156 @code{Lexer} inner interface of the parser class.  This interface also
14157 contains constants for all user-defined token names and the predefined
14158 @code{YYEOF} token.
14159
14160 In the first case, the body of the scanner class is placed in
14161 @code{%code lexer} blocks.  If you want to pass parameters from the
14162 parser constructor to the scanner constructor, specify them with
14163 @code{%lex-param}; they are passed before @code{%parse-param}s to the
14164 constructor.
14165
14166 In the second case, the scanner has to implement the @code{Lexer} interface,
14167 which is defined within the parser class (e.g., @code{YYParser.Lexer}).
14168 The constructor of the parser object will then accept an object
14169 implementing the interface; @code{%lex-param} is not used in this
14170 case.
14171
14172 In both cases, the scanner has to implement the following methods.
14173
14174 @deftypemethod {Lexer} {void} yyerror(@code{Location} @var{loc}, @code{string} @var{msg})
14175 This method is defined by the user to emit an error message.  The first
14176 parameter is omitted if location tracking is not active.
14177 @end deftypemethod
14178
14179 @deftypemethod {Lexer} {Symbol} yylex()
14180 Return the next token. The return value is of type @code{Symbol}, which
14181 binds together the kind, the semantic value and the location.
14182 @end deftypemethod
14183
14184 @deftypemethod {Lexer} {void} reportSyntaxError(@code{YYParser.Context} @var{ctx})
14185 If you invoke @samp{%define parse.error custom} (@pxref{Bison
14186 Declarations}), then the parser no longer passes syntax error messages to
14187 @code{yyerror}, rather it delegates that task to the user by calling the
14188 @code{reportSyntaxError} function.
14189
14190 Whether it uses @code{yyerror} is up to the user.
14191
14192 Here is an example of a reporting function (@pxref{D Parser Context
14193 Interface}).
14194
14195 @example
14196 public void reportSyntaxError(YYParser.Context ctx)
14197 @{
14198   stderr.write(ctx.getLocation(), ": syntax error");
14199   // Report the expected tokens.
14200   @{
14201     immutable int TOKENMAX = 5;
14202     YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX];
14203     int n = ctx.getExpectedTokens(arg, TOKENMAX);
14204     if (n < TOKENMAX)
14205       for (int i = 0; i < n; ++i)
14206         stderr.write((i == 0 ? ": expected " : " or "), arg[i]);
14207   @}
14208   // Report the unexpected token which triggered the error.
14209   @{
14210     YYParser.SymbolKind lookahead = ctx.getToken();
14211     stderr.writeln(" before ", lookahead);
14212   @}
14213 @}
14214 @end example
14215
14216 @noindent
14217 This implementation is inappropriate for internationalization, see
14218 the @file{c/bistromathic} example for a better alternative.
14219 @end deftypemethod
14220
14221 @node D Action Features
14222 @subsection Special Features for Use in D Actions
14223
14224 Here is a table of Bison constructs, variables and functions that are useful in
14225 actions.
14226
14227 @deffn {Variable} $$
14228 Acts like a variable that contains the semantic value for the
14229 grouping made by the current rule.  @xref{Actions}.
14230 @end deffn
14231
14232 @deffn {Variable} $@var{n}
14233 Acts like a variable that contains the semantic value for the
14234 @var{n}th component of the current rule.  @xref{Actions}.
14235 @end deffn
14236
14237 @deffn {Function} yyerrok
14238 Resume generating error messages immediately for subsequent syntax
14239 errors.  This is useful primarily in error rules.
14240 @xref{Error Recovery}.
14241 @end deffn
14242
14243 @node D Push Parser Interface
14244 @subsection D Push Parser Interface
14245 @c - define push_parse
14246 @findex %define api.push-pull
14247
14248 Normally, Bison generates a pull parser for D.
14249 The following Bison declaration says that you want the parser to be a push
14250 parser (@pxref{%define Summary}):
14251
14252 @example
14253 %define api.push-pull push
14254 @end example
14255
14256 Most of the discussion about the D pull Parser Interface, (@pxref{D
14257 Parser Interface}) applies to the push parser interface as well.
14258
14259 When generating a push parser, the method @code{pushParse} is created with
14260 the following signature:
14261
14262 @deftypemethod {YYParser} {int} pushParse (@code{Symbol} @var{sym})
14263 @end deftypemethod
14264
14265 The primary difference with respect to a pull parser is that the parser
14266 method @code{pushParse} is invoked repeatedly to parse each token.  This
14267 function is available if either the @samp{%define api.push-pull push} or
14268 @samp{%define api.push-pull both} declaration is used (@pxref{%define
14269 Summary}).
14270
14271 The value returned by the @code{pushParse} method is one of the following:
14272 @code{ACCEPT}, @code{ABORT}, or @code{PUSH_MORE}.  This new value,
14273 @code{PUSH_MORE}, may be returned if more input is required to finish
14274 parsing the input.
14275
14276 If @code{api.push-pull} is defined as @code{both}, then the generated parser
14277 class will also implement the @code{parse} method. This method's body is a
14278 loop that repeatedly invokes the scanner and then passes the values obtained
14279 from the scanner to the @code{pushParse} method.
14280
14281 @node D Complete Symbols
14282 @subsection D Complete Symbols
14283
14284 To build return values for @code{yylex}, call the @code{Symbol} method of
14285 the same name as the token kind reported, and adding the parameters for
14286 value and location if necessary.  These methods generate compile-time errors
14287 if the parameters are inconsistent.  Token constructors work with both
14288 @code{%union} and @samp{%define api.value.type union}.
14289
14290 The order of the parameters is the same as for the @code{Symbol}
14291 constructor.  An example for the token kind @code{NUM}, which has value
14292 @code{ival} and with location tracking activated:
14293
14294 @example
14295 Symbol.NUM(ival, location);
14296 @end example
14297
14298 @node Java Parsers
14299 @section Java Parsers
14300
14301 @menu
14302 * Java Bison Interface::        Asking for Java parser generation
14303 * Java Semantic Values::        %token and %nterm vs. Java
14304 * Java Location Values::        The position and location classes
14305 * Java Parser Interface::       Instantiating and running the parser
14306 * Java Parser Context Interface:: Circumstances of a syntax error
14307 * Java Scanner Interface::      Specifying the scanner for the parser
14308 * Java Action Features::        Special features for use in actions
14309 * Java Push Parser Interface::  Instantiating and running the push parser
14310 * Java Differences::            Differences between C/C++ and Java Grammars
14311 * Java Declarations Summary::   List of Bison declarations used with Java
14312 @end menu
14313
14314 @node Java Bison Interface
14315 @subsection Java Bison Interface
14316 @c - %language "Java"
14317
14318 The Java parser skeletons are selected using the @code{%language "Java"}
14319 directive or the @option{-L java}/@option{--language=java} option.
14320
14321 @c FIXME: Documented bug.
14322 When generating a Java parser, @samp{bison @var{basename}.y} will create a
14323 single Java source file named @file{@var{basename}.java} containing the
14324 parser implementation.  Using a grammar file without a @file{.y} suffix is
14325 currently broken.  The basename of the parser implementation file can be
14326 changed by the @code{%file-prefix} directive or the
14327 @option{-b}/@option{--file-prefix} option.  The entire parser implementation
14328 file name can be changed by the @code{%output} directive or the
14329 @option{-o}/@option{--output} option.  The parser implementation file
14330 contains a single class for the parser.
14331
14332 You can create documentation for generated parsers using Javadoc.
14333
14334 Contrary to C parsers, Java parsers do not use global variables; the state
14335 of the parser is always local to an instance of the parser class.
14336 Therefore, all Java parsers are ``pure'', and the @code{%define api.pure}
14337 directive does nothing when used in Java.
14338
14339 GLR parsers are currently unsupported in Java.  Do not use the
14340 @code{glr-parser} directive.
14341
14342 No header file can be generated for Java parsers.  Do not use the
14343 @code{%header} directive or the @option{-d}/@option{-H}/@option{--header}
14344 options.
14345
14346 @c FIXME: Possible code change.
14347 Currently, support for tracing is always compiled in.  Thus the
14348 @samp{%define parse.trace} and @samp{%token-table} directives and the
14349 @option{-t}/@option{--debug} and @option{-k}/@option{--token-table} options
14350 have no effect.  This may change in the future to eliminate unused code in
14351 the generated parser, so use @samp{%define parse.trace} explicitly if
14352 needed.  Also, in the future the @code{%token-table} directive might enable
14353 a public interface to access the token names and codes.
14354
14355 Getting a ``code too large'' error from the Java compiler means the code hit
14356 the 64KB bytecode per method limitation of the Java class file.  Try
14357 reducing the amount of code in actions and static initializers; otherwise,
14358 report a bug so that the parser skeleton will be improved.
14359
14360
14361 @node Java Semantic Values
14362 @subsection Java Semantic Values
14363
14364 There is no @code{%union} directive in Java parsers.  Instead, the semantic
14365 values' types (class names) should be specified in the @code{%nterm} or
14366 @code{%token} directive:
14367
14368 @example
14369 %nterm <Expression> expr assignment_expr term factor
14370 %nterm <Integer> number
14371 @end example
14372
14373 By default, the semantic stack is declared to have @code{Object} members,
14374 which means that the class types you specify can be of any class.
14375 To improve the type safety of the parser, you can declare the common
14376 superclass of all the semantic values using the @samp{%define api.value.type}
14377 directive.  For example, after the following declaration:
14378
14379 @example
14380 %define api.value.type @{ASTNode@}
14381 @end example
14382
14383 @noindent
14384 any @code{%token}, @code{%nterm} or @code{%type} specifying a semantic type
14385 which is not a subclass of @code{ASTNode}, will cause a compile-time error.
14386
14387 @c FIXME: Documented bug.
14388 Types used in the directives may be qualified with a package name.
14389 Primitive data types are accepted for Java version 1.5 or later.  Note
14390 that in this case the autoboxing feature of Java 1.5 will be used.
14391 Generic types may not be used; this is due to a limitation in the
14392 implementation of Bison, and may change in future releases.
14393
14394 Java parsers do not support @code{%destructor}, since the language
14395 adopts garbage collection.  The parser will try to hold references
14396 to semantic values for as little time as needed.
14397
14398 Java parsers do not support @code{%printer}, as @code{toString()}
14399 can be used to print the semantic values.  This however may change
14400 (in a backwards-compatible way) in future versions of Bison.
14401
14402
14403 @node Java Location Values
14404 @subsection Java Location Values
14405 @c - %locations
14406 @c - class Position
14407 @c - class Location
14408
14409 When the directive @code{%locations} is used, the Java parser supports
14410 location tracking, see @ref{Tracking Locations}.  An auxiliary user-defined
14411 class defines a @dfn{position}, a single point in a file; Bison itself
14412 defines a class representing a @dfn{location}, a range composed of a pair of
14413 positions (possibly spanning several files).  The location class is an inner
14414 class of the parser; the name is @code{Location} by default, and may also be
14415 renamed using @code{%define api.location.type @{@var{class-name}@}}.
14416
14417 The location class treats the position as a completely opaque value.
14418 By default, the class name is @code{Position}, but this can be changed
14419 with @code{%define api.position.type @{@var{class-name}@}}.  This class must
14420 be supplied by the user.
14421
14422
14423 @deftypeivar {Location} {Position} begin
14424 @deftypeivarx {Location} {Position} end
14425 The first, inclusive, position of the range, and the first beyond.
14426 @end deftypeivar
14427
14428 @deftypeop {Constructor} {Location} {} Location (@code{Position} @var{loc})
14429 Create a @code{Location} denoting an empty range located at a given point.
14430 @end deftypeop
14431
14432 @deftypeop {Constructor} {Location} {} Location (@code{Position} @var{begin}, @code{Position} @var{end})
14433 Create a @code{Location} from the endpoints of the range.
14434 @end deftypeop
14435
14436 @deftypemethod {Location} {String} toString ()
14437 Prints the range represented by the location.  For this to work
14438 properly, the position class should override the @code{equals} and
14439 @code{toString} methods appropriately.
14440 @end deftypemethod
14441
14442
14443 @node Java Parser Interface
14444 @subsection Java Parser Interface
14445
14446 The name of the generated parser class defaults to @code{YYParser}.  The
14447 @code{YY} prefix may be changed using the @samp{%define api.prefix}.
14448 Alternatively, use @samp{%define api.parser.class @{@var{name}@}} to give a
14449 custom name to the class.  The interface of this class is detailed below.
14450
14451 By default, the parser class has package visibility.  A declaration
14452 @samp{%define api.parser.public} will change to public visibility.  Remember
14453 that, according to the Java language specification, the name of the
14454 @file{.java} file should match the name of the class in this case.
14455 Similarly, you can use @code{api.parser.abstract}, @code{api.parser.final}
14456 and @code{api.parser.strictfp} with the @code{%define} declaration to add
14457 other modifiers to the parser class.  A single @samp{%define
14458 api.parser.annotations @{@var{annotations}@}} directive can be used to add
14459 any number of annotations to the parser class.
14460
14461 The Java package name of the parser class can be specified using the
14462 @samp{%define package} directive.  The superclass and the implemented
14463 interfaces of the parser class can be specified with the @code{%define
14464 api.parser.extends} and @samp{%define api.parser.implements} directives.
14465
14466 The parser class defines an inner class, @code{Location}, that is used
14467 for location tracking (see @ref{Java Location Values}), and a inner
14468 interface, @code{Lexer} (see @ref{Java Scanner Interface}).  Other than
14469 these inner class/interface, and the members described in the interface
14470 below, all the other members and fields are preceded with a @code{yy} or
14471 @code{YY} prefix to avoid clashes with user code.
14472
14473 The parser class can be extended using the @code{%parse-param}
14474 directive. Each occurrence of the directive will add a @code{protected
14475 final} field to the parser class, and an argument to its constructor,
14476 which initializes them automatically.
14477
14478 @deftypeop {Constructor} {YYParser} {} YYParser (@var{lex_param}, @dots{}, @var{parse_param}, @dots{})
14479 Build a new parser object with embedded @code{%code lexer}.  There are
14480 no parameters, unless @code{%param}s and/or @code{%parse-param}s and/or
14481 @code{%lex-param}s are used.
14482
14483 Use @code{%code init} for code added to the start of the constructor
14484 body. This is especially useful to initialize superclasses. Use
14485 @samp{%define init_throws} to specify any uncaught exceptions.
14486 @end deftypeop
14487
14488 @deftypeop {Constructor} {YYParser} {} YYParser (@code{Lexer} @var{lexer}, @var{parse_param}, @dots{})
14489 Build a new parser object using the specified scanner.  There are no
14490 additional parameters unless @code{%param}s and/or @code{%parse-param}s are
14491 used.
14492
14493 If the scanner is defined by @code{%code lexer}, this constructor is
14494 declared @code{protected} and is called automatically with a scanner
14495 created with the correct @code{%param}s and/or @code{%lex-param}s.
14496
14497 Use @code{%code init} for code added to the start of the constructor
14498 body. This is especially useful to initialize superclasses. Use
14499 @samp{%define init_throws} to specify any uncaught exceptions.
14500 @end deftypeop
14501
14502 @deftypemethod {YYParser} {boolean} parse ()
14503 Run the syntactic analysis, and return @code{true} on success,
14504 @code{false} otherwise.
14505 @end deftypemethod
14506
14507 @deftypemethod {YYParser} {boolean} getErrorVerbose ()
14508 @deftypemethodx {YYParser} {void} setErrorVerbose (boolean @var{verbose})
14509 Get or set the option to produce verbose error messages.  These are only
14510 available with @samp{%define parse.error detailed} (or @samp{verbose}),
14511 which also turns on verbose error messages.
14512 @end deftypemethod
14513
14514 @deftypemethod {YYParser} {void} yyerror (@code{String} @var{msg})
14515 @deftypemethodx {YYParser} {void} yyerror (@code{Position} @var{pos}, @code{String} @var{msg})
14516 @deftypemethodx {YYParser} {void} yyerror (@code{Location} @var{loc}, @code{String} @var{msg})
14517 Print an error message using the @code{yyerror} method of the scanner
14518 instance in use. The @code{Location} and @code{Position} parameters are
14519 available only if location tracking is active.
14520 @end deftypemethod
14521
14522 @deftypemethod {YYParser} {boolean} recovering ()
14523 During the syntactic analysis, return @code{true} if recovering
14524 from a syntax error.
14525 @xref{Error Recovery}.
14526 @end deftypemethod
14527
14528 @deftypemethod {YYParser} {java.io.PrintStream} getDebugStream ()
14529 @deftypemethodx {YYParser} {void} setDebugStream (@code{java.io.PrintStream} @var{o})
14530 Get or set the stream used for tracing the parsing.  It defaults to
14531 @code{System.err}.
14532 @end deftypemethod
14533
14534 @deftypemethod {YYParser} {int} getDebugLevel ()
14535 @deftypemethodx {YYParser} {void} setDebugLevel (@code{int} @var{l})
14536 Get or set the tracing level.  Currently its value is either 0, no trace,
14537 or nonzero, full tracing.
14538 @end deftypemethod
14539
14540 @deftypecv {Constant} {YYParser} {String} {bisonVersion}
14541 @deftypecvx {Constant} {YYParser} {String} {bisonSkeleton}
14542 Identify the Bison version and skeleton used to generate this parser.
14543 @end deftypecv
14544
14545 If you enabled token internationalization (@pxref{Token I18n}), you must
14546 provide the parser with the following function:
14547
14548 @deftypecv {Static Method} {YYParser} {String} {i18n} (@code{string} @var{s})
14549 Return the translation of @var{s} in the user's language.  As an example:
14550
14551 @example
14552 %code @{
14553   static ResourceBundle myResources
14554     = ResourceBundle.getBundle("domain-name");
14555   static final String i18n(String s) @{
14556     return myResources.getString(s);
14557   @}
14558 @}
14559 @end example
14560 @end deftypecv
14561
14562 @node Java Parser Context Interface
14563 @subsection Java Parser Context Interface
14564
14565 The parser context provides information to build error reports when you
14566 invoke @samp{%define parse.error custom}.
14567
14568 @defcv {Type} {YYParser} {SymbolKind}
14569 An enum of all the grammar symbols, tokens and nonterminals.  Its
14570 enumerators are forged from the symbol names:
14571
14572 @example
14573 public enum SymbolKind
14574 @{
14575   S_YYEOF(0),          /* "end of file"  */
14576   S_YYERROR(1),        /* error  */
14577   S_YYUNDEF(2),        /* "invalid token"  */
14578   S_BANG(3),           /* "!"  */
14579   S_PLUS(4),           /* "+"  */
14580   S_MINUS(5),          /* "-"  */
14581   [...]
14582   S_NUM(13),           /* "number"  */
14583   S_NEG(14),           /* NEG  */
14584   S_YYACCEPT(15),      /* $accept  */
14585   S_input(16),         /* input  */
14586   S_line(17);          /* line  */
14587 @};
14588 @end example
14589 @end defcv
14590
14591 @deftypemethod {YYParser.SymbolKind} {String} getName ()
14592 The name of this symbol, possibly translated.
14593 @end deftypemethod
14594
14595 @deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken ()
14596 The kind of the lookahead.  Return @code{null} iff there is no lookahead.
14597 @end deftypemethod
14598
14599 @deftypemethod {YYParser.Context} {YYParser.Location} getLocation ()
14600 The location of the lookahead.
14601 @end deftypemethod
14602
14603 @deftypemethod {YYParser.Context} {int} getExpectedTokens (@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc})
14604 Fill @var{argv} with the expected tokens, which never includes
14605 @code{SymbolKind.S_YYERROR}, or @code{SymbolKind.S_YYUNDEF}.
14606
14607 Never put more than @var{argc} elements into @var{argv}, and on success
14608 return the number of tokens stored in @var{argv}.  If there are more
14609 expected tokens than @var{argc}, fill @var{argv} up to @var{argc} and return
14610 0.  If there are no expected tokens, also return 0, but set @code{argv[0]}
14611 to @code{null}.
14612
14613 If @var{argv} is null, return the size needed to store all the possible
14614 values, which is always less than @code{YYNTOKENS}.
14615 @end deftypemethod
14616
14617
14618 @node Java Scanner Interface
14619 @subsection Java Scanner Interface
14620 @c - %code lexer
14621 @c - %lex-param
14622 @c - Lexer interface
14623
14624 There are two possible ways to interface a Bison-generated Java parser
14625 with a scanner: the scanner may be defined by @code{%code lexer}, or
14626 defined elsewhere.  In either case, the scanner has to implement the
14627 @code{Lexer} inner interface of the parser class.  This interface also
14628 contains constants for all user-defined token names and the predefined
14629 @code{YYEOF} token.
14630
14631 In the first case, the body of the scanner class is placed in
14632 @code{%code lexer} blocks.  If you want to pass parameters from the
14633 parser constructor to the scanner constructor, specify them with
14634 @code{%lex-param}; they are passed before @code{%parse-param}s to the
14635 constructor.
14636
14637 In the second case, the scanner has to implement the @code{Lexer} interface,
14638 which is defined within the parser class (e.g., @code{YYParser.Lexer}).
14639 The constructor of the parser object will then accept an object
14640 implementing the interface; @code{%lex-param} is not used in this
14641 case.
14642
14643 In both cases, the scanner has to implement the following methods.
14644
14645 @deftypemethod {Lexer} {void} yyerror (@code{Location} @var{loc}, @code{String} @var{msg})
14646 This method is defined by the user to emit an error message.  The first
14647 parameter is omitted if location tracking is not active.  Its type can be
14648 changed using @code{%define api.location.type @{@var{class-name}@}}.
14649 @end deftypemethod
14650
14651 @deftypemethod {Lexer} {int} yylex ()
14652 Return the next token.  Its type is the return value, its semantic value and
14653 location are saved and returned by the their methods in the interface.  Not
14654 needed for push-only parsers.
14655
14656 Use @samp{%define lex_throws} to specify any uncaught exceptions.
14657 Default is @code{java.io.IOException}.
14658 @end deftypemethod
14659
14660 @deftypemethod {Lexer} {Position} getStartPos ()
14661 @deftypemethodx {Lexer} {Position} getEndPos ()
14662 Return respectively the first position of the last token that @code{yylex}
14663 returned, and the first position beyond it.  These methods are not needed
14664 unless location tracking and pull parsing are active.
14665
14666 They should return new objects for each call, to avoid that all the symbol
14667 share the same Position boundaries.
14668
14669 The return type can be changed using @code{%define api.position.type
14670 @{@var{class-name}@}}.
14671 @end deftypemethod
14672
14673 @deftypemethod {Lexer} {Object} getLVal ()
14674 Return the semantic value of the last token that yylex returned.  Not needed
14675 for push-only parsers.
14676
14677 The return type can be changed using @samp{%define api.value.type
14678 @{@var{class-name}@}}.
14679 @end deftypemethod
14680
14681 @deftypemethod {Lexer} {void} reportSyntaxError (@code{YYParser.Context} @var{ctx})
14682 If you invoke @samp{%define parse.error custom} (@pxref{Bison
14683 Declarations}), then the parser no longer passes syntax error messages to
14684 @code{yyerror}, rather it delegates that task to the user by calling the
14685 @code{reportSyntaxError} function.
14686
14687 Whether it uses @code{yyerror} is up to the user.
14688
14689 Here is an example of a reporting function (@pxref{Java Parser Context
14690 Interface}).
14691
14692 @example
14693 public void reportSyntaxError(YYParser.Context ctx) @{
14694   System.err.print(ctx.getLocation() + ": syntax error");
14695   // Report the expected tokens.
14696   @{
14697     final int TOKENMAX = 5;
14698     YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX];
14699     int n = ctx.getExpectedTokens(arg, TOKENMAX);
14700     for (int i = 0; i < n; ++i)
14701       System.err.print((i == 0 ? ": expected " : " or ")
14702                        + arg[i].getName());
14703   @}
14704   // Report the unexpected token which triggered the error.
14705   @{
14706     YYParser.SymbolKind lookahead = ctx.getToken();
14707     if (lookahead != null)
14708       System.err.print(" before " + lookahead.getName());
14709   @}
14710   System.err.println("");
14711 @}
14712 @end example
14713
14714 @noindent
14715 This implementation is inappropriate for internationalization, see the
14716 @file{c/bistromathic} example for a better alternative.
14717 @end deftypemethod
14718
14719 @node Java Action Features
14720 @subsection Special Features for Use in Java Actions
14721
14722 The following special constructs can be uses in Java actions.
14723 Other analogous C action features are currently unavailable for Java.
14724
14725 Use @samp{%define throws} to specify any uncaught exceptions from parser
14726 actions, and initial actions specified by @code{%initial-action}.
14727
14728 @defvar $@var{n}
14729 The semantic value for the @var{n}th component of the current rule.
14730 This may not be assigned to.
14731 @xref{Java Semantic Values}.
14732 @end defvar
14733
14734 @defvar $<@var{typealt}>@var{n}
14735 Like @code{$@var{n}} but specifies a alternative type @var{typealt}.
14736 @xref{Java Semantic Values}.
14737 @end defvar
14738
14739 @defvar $$
14740 The semantic value for the grouping made by the current rule.  As a
14741 value, this is in the base type (@code{Object} or as specified by
14742 @samp{%define api.value.type}) as in not cast to the declared subtype because
14743 casts are not allowed on the left-hand side of Java assignments.
14744 Use an explicit Java cast if the correct subtype is needed.
14745 @xref{Java Semantic Values}.
14746 @end defvar
14747
14748 @defvar $<@var{typealt}>$
14749 Same as @code{$$} since Java always allow assigning to the base type.
14750 Perhaps we should use this and @code{$<>$} for the value and @code{$$}
14751 for setting the value but there is currently no easy way to distinguish
14752 these constructs.
14753 @xref{Java Semantic Values}.
14754 @end defvar
14755
14756 @defvar @@@var{n}
14757 The location information of the @var{n}th component of the current rule.
14758 This may not be assigned to.
14759 @xref{Java Location Values}.
14760 @end defvar
14761
14762 @defvar @@$
14763 The location information of the grouping made by the current rule.
14764 @xref{Java Location Values}.
14765 @end defvar
14766
14767 @deftypefn {Statement} return YYABORT @code{;}
14768 Return immediately from the parser, indicating failure.
14769 @xref{Java Parser Interface}.
14770 @end deftypefn
14771
14772 @deftypefn {Statement} return YYACCEPT @code{;}
14773 Return immediately from the parser, indicating success.
14774 @xref{Java Parser Interface}.
14775 @end deftypefn
14776
14777 @deftypefn {Statement} {return} YYERROR @code{;}
14778 Start error recovery (without printing an error message).
14779 @xref{Error Recovery}.
14780 @end deftypefn
14781
14782 @deftypefn {Function} {boolean} recovering ()
14783 Return whether error recovery is being done. In this state, the parser
14784 reads token until it reaches a known state, and then restarts normal
14785 operation.
14786 @xref{Error Recovery}.
14787 @end deftypefn
14788
14789 @deftypefn  {Function} {void} yyerror (@code{String} @var{msg})
14790 @deftypefnx {Function} {void} yyerror (@code{Position} @var{loc}, @code{String} @var{msg})
14791 @deftypefnx {Function} {void} yyerror (@code{Location} @var{loc}, @code{String} @var{msg})
14792 Print an error message using the @code{yyerror} method of the scanner
14793 instance in use. The @code{Location} and @code{Position} parameters are
14794 available only if location tracking is active.
14795 @end deftypefn
14796
14797 @node Java Push Parser Interface
14798 @subsection Java Push Parser Interface
14799 @c - define push_parse
14800 @findex %define api.push-pull
14801
14802 Normally, Bison generates a pull parser for Java.
14803 The following Bison declaration says that you want the parser to be a push
14804 parser (@pxref{%define Summary}):
14805
14806 @example
14807 %define api.push-pull push
14808 @end example
14809
14810 Most of the discussion about the Java pull Parser Interface, (@pxref{Java
14811 Parser Interface}) applies to the push parser interface as well.
14812
14813 When generating a push parser, the method @code{push_parse} is created with
14814 the following signature (depending on if locations are enabled).
14815
14816 @deftypemethod {YYParser} {void} push_parse (@code{int} @var{token}, @code{Object} @var{yylval})
14817 @deftypemethodx {YYParser} {void} push_parse (@code{int} @var{token}, @code{Object} @var{yylval}, @code{Location} @var{yyloc})
14818 @deftypemethodx {YYParser} {void} push_parse (@code{int} @var{token}, @code{Object} @var{yylval}, @code{Position} @var{yypos})
14819 @end deftypemethod
14820
14821 The primary difference with respect to a pull parser is that the parser
14822 method @code{push_parse} is invoked repeatedly to parse each token.  This
14823 function is available if either the @samp{%define api.push-pull push} or
14824 @samp{%define api.push-pull both} declaration is used (@pxref{%define
14825 Summary}).  The @code{Location} and @code{Position} parameters are available
14826 only if location tracking is active.
14827
14828 The value returned by the @code{push_parse} method is one of the following:
14829 0 (success), 1 (abort), 2 (memory exhaustion), or @code{YYPUSH_MORE}.  This
14830 new value, @code{YYPUSH_MORE}, may be returned if more input is required to
14831 finish parsing the grammar.
14832
14833 If @code{api.push-pull} is defined as @code{both}, then the generated parser
14834 class will also implement the @code{parse} method. This method's body is a
14835 loop that repeatedly invokes the scanner and then passes the values obtained
14836 from the scanner to the @code{push_parse} method.
14837
14838 There is one additional complication.  Technically, the push parser does not
14839 need to know about the scanner (i.e. an object implementing the
14840 @code{YYParser.Lexer} interface), but it does need access to the
14841 @code{yyerror} method.  Currently, the @code{yyerror} method is defined in
14842 the @code{YYParser.Lexer} interface. Hence, an implementation of that
14843 interface is still required in order to provide an implementation of
14844 @code{yyerror}.  The current approach (and subject to change) is to require
14845 the @code{YYParser} constructor to be given an object implementing the
14846 @code{YYParser.Lexer} interface. This object need only implement the
14847 @code{yyerror} method; the other methods can be stubbed since they will
14848 never be invoked.  The simplest way to do this is to add a trivial scanner
14849 implementation to your grammar file using whatever implementation of
14850 @code{yyerror} is desired. The following code sample shows a simple way to
14851 accomplish this.
14852
14853 @example
14854 %code lexer
14855 @{
14856   public Object getLVal () @{return null;@}
14857   public int yylex () @{return 0;@}
14858   public void yyerror (String s) @{System.err.println(s);@}
14859 @}
14860 @end example
14861
14862 @node Java Differences
14863 @subsection Differences between C/C++ and Java Grammars
14864
14865 The different structure of the Java language forces several differences
14866 between C/C++ grammars, and grammars designed for Java parsers.  This
14867 section summarizes these differences.
14868
14869 @itemize
14870 @item
14871 Java has no a preprocessor, so obviously the @code{YYERROR},
14872 @code{YYACCEPT}, @code{YYABORT} symbols (@pxref{Table of Symbols}) cannot be
14873 macros.  Instead, they should be preceded by @code{return} when they appear
14874 in an action.  The actual definition of these symbols is opaque to the Bison
14875 grammar, and it might change in the future.  The only meaningful operation
14876 that you can do, is to return them.  @xref{Java Action Features}.
14877
14878 Note that of these three symbols, only @code{YYACCEPT} and
14879 @code{YYABORT} will cause a return from the @code{yyparse}
14880 method@footnote{Java parsers include the actions in a separate
14881 method than @code{yyparse} in order to have an intuitive syntax that
14882 corresponds to these C macros.}.
14883
14884 @item
14885 Java lacks unions, so @code{%union} has no effect.  Instead, semantic
14886 values have a common base type: @code{Object} or as specified by
14887 @samp{%define api.value.type}.  Angle brackets on @code{%token}, @code{type},
14888 @code{$@var{n}} and @code{$$} specify subtypes rather than fields of
14889 an union.  The type of @code{$$}, even with angle brackets, is the base
14890 type since Java casts are not allow on the left-hand side of assignments.
14891 Also, @code{$@var{n}} and @code{@@@var{n}} are not allowed on the
14892 left-hand side of assignments. @xref{Java Semantic Values}, and
14893 @ref{Java Action Features}.
14894
14895 @item
14896 The prologue declarations have a different meaning than in C/C++ code.
14897 @table @asis
14898 @item @code{%code imports}
14899 blocks are placed at the beginning of the Java source code.  They may
14900 include copyright notices.  For a @code{package} declarations, use
14901 @samp{%define api.package} instead.
14902
14903 @item unqualified @code{%code}
14904 blocks are placed inside the parser class.
14905
14906 @item @code{%code lexer}
14907 blocks, if specified, should include the implementation of the
14908 scanner.  If there is no such block, the scanner can be any class
14909 that implements the appropriate interface (@pxref{Java Scanner
14910 Interface}).
14911 @end table
14912
14913 Other @code{%code} blocks are not supported in Java parsers.
14914 In particular, @code{%@{ @dots{} %@}} blocks should not be used
14915 and may give an error in future versions of Bison.
14916
14917 The epilogue has the same meaning as in C/C++ code and it can
14918 be used to define other classes used by the parser @emph{outside}
14919 the parser class.
14920 @end itemize
14921
14922
14923 @node Java Declarations Summary
14924 @subsection Java Declarations Summary
14925
14926 This summary only include declarations specific to Java or have special
14927 meaning when used in a Java parser.
14928
14929 @deffn {Directive} {%language "Java"}
14930 Generate a Java class for the parser.
14931 @end deffn
14932
14933 @deffn {Directive} %lex-param @{@var{type} @var{name}@}
14934 A parameter for the lexer class defined by @code{%code lexer}
14935 @emph{only}, added as parameters to the lexer constructor and the parser
14936 constructor that @emph{creates} a lexer.  Default is none.
14937 @xref{Java Scanner Interface}.
14938 @end deffn
14939
14940 @deffn {Directive} %parse-param @{@var{type} @var{name}@}
14941 A parameter for the parser class added as parameters to constructor(s)
14942 and as fields initialized by the constructor(s).  Default is none.
14943 @xref{Java Parser Interface}.
14944 @end deffn
14945
14946 @deffn {Directive} %token <@var{type}> @var{token} @dots{}
14947 Declare tokens.  Note that the angle brackets enclose a Java @emph{type}.
14948 @xref{Java Semantic Values}.
14949 @end deffn
14950
14951 @deffn {Directive} %nterm <@var{type}> @var{nonterminal} @dots{}
14952 Declare the type of nonterminals.  Note that the angle brackets enclose
14953 a Java @emph{type}.
14954 @xref{Java Semantic Values}.
14955 @end deffn
14956
14957 @deffn {Directive} %code @{ @var{code} @dots{} @}
14958 Code appended to the inside of the parser class.
14959 @xref{Java Differences}.
14960 @end deffn
14961
14962 @deffn {Directive} {%code imports} @{ @var{code} @dots{} @}
14963 Code inserted just after the @code{package} declaration.
14964 @xref{Java Differences}.
14965 @end deffn
14966
14967 @deffn {Directive} {%code init} @{ @var{code} @dots{} @}
14968 Code inserted at the beginning of the parser constructor body.
14969 @xref{Java Parser Interface}.
14970 @end deffn
14971
14972 @deffn {Directive} {%code lexer} @{ @var{code} @dots{} @}
14973 Code added to the body of a inner lexer class within the parser class.
14974 @xref{Java Scanner Interface}.
14975 @end deffn
14976
14977 @deffn {Directive} %% @var{code} @dots{}
14978 Code (after the second @code{%%}) appended to the end of the file,
14979 @emph{outside} the parser class.
14980 @xref{Java Differences}.
14981 @end deffn
14982
14983 @deffn {Directive} %@{ @var{code} @dots{} %@}
14984 Not supported.  Use @code{%code imports} instead.
14985 @xref{Java Differences}.
14986 @end deffn
14987
14988 @deffn {Directive} {%define api.prefix} @{@var{prefix}@}
14989 The prefix of the parser class name @code{@var{prefix}Parser} if
14990 @samp{%define api.parser.class} is not used.  Default is @code{YY}.
14991 @xref{Java Bison Interface}.
14992 @end deffn
14993
14994 @deffn {Directive} {%define api.parser.abstract}
14995 Whether the parser class is declared @code{abstract}.  Default is false.
14996 @xref{Java Bison Interface}.
14997 @end deffn
14998
14999 @deffn {Directive} {%define api.parser.annotations} @{@var{annotations}@}
15000 The Java annotations for the parser class.  Default is none.
15001 @xref{Java Bison Interface}.
15002 @end deffn
15003
15004 @deffn {Directive} {%define api.parser.class} @{@var{name}@}
15005 The name of the parser class.  Default is @code{YYParser} or
15006 @code{@var{api.prefix}Parser}.  @xref{Java Bison Interface}.
15007 @end deffn
15008
15009 @deffn {Directive} {%define api.parser.extends} @{@var{superclass}@}
15010 The superclass of the parser class.  Default is none.
15011 @xref{Java Bison Interface}.
15012 @end deffn
15013
15014 @deffn {Directive} {%define api.parser.final}
15015 Whether the parser class is declared @code{final}.  Default is false.
15016 @xref{Java Bison Interface}.
15017 @end deffn
15018
15019 @deffn {Directive} {%define api.parser.implements} @{@var{interfaces}@}
15020 The implemented interfaces of the parser class, a comma-separated list.
15021 Default is none.
15022 @xref{Java Bison Interface}.
15023 @end deffn
15024
15025 @deffn {Directive} {%define api.parser.public}
15026 Whether the parser class is declared @code{public}.  Default is false.
15027 @xref{Java Bison Interface}.
15028 @end deffn
15029
15030 @deffn {Directive} {%define api.parser.strictfp}
15031 Whether the parser class is declared @code{strictfp}.  Default is false.
15032 @xref{Java Bison Interface}.
15033 @end deffn
15034
15035 @deffn {Directive} {%define init_throws} @{@var{exceptions}@}
15036 The exceptions thrown by @code{%code init} from the parser class
15037 constructor.  Default is none.
15038 @xref{Java Parser Interface}.
15039 @end deffn
15040
15041 @deffn {Directive} {%define lex_throws} @{@var{exceptions}@}
15042 The exceptions thrown by the @code{yylex} method of the lexer, a
15043 comma-separated list.  Default is @code{java.io.IOException}.
15044 @xref{Java Scanner Interface}.
15045 @end deffn
15046
15047 @deffn {Directive} {%define api.location.type} @{@var{class}@}
15048 The name of the class used for locations (a range between two
15049 positions).  This class is generated as an inner class of the parser
15050 class by @command{bison}.  Default is @code{Location}.
15051 Formerly named @code{location_type}.
15052 @xref{Java Location Values}.
15053 @end deffn
15054
15055 @deffn {Directive} {%define api.package} @{@var{package}@}
15056 The package to put the parser class in.  Default is none.
15057 @xref{Java Bison Interface}.
15058 Renamed from @code{package} in Bison 3.7.
15059 @end deffn
15060
15061 @deffn {Directive} {%define api.position.type} @{@var{class}@}
15062 The name of the class used for positions. This class must be supplied by
15063 the user.  Default is @code{Position}.
15064 Formerly named @code{position_type}.
15065 @xref{Java Location Values}.
15066 @end deffn
15067
15068 @deffn {Directive} {%define api.value.type} @{@var{class}@}
15069 The base type of semantic values.  Default is @code{Object}.
15070 @xref{Java Semantic Values}.
15071 @end deffn
15072
15073 @deffn {Directive} {%define throws} @{@var{exceptions}@}
15074 The exceptions thrown by user-supplied parser actions and
15075 @code{%initial-action}, a comma-separated list.  Default is none.
15076 @xref{Java Parser Interface}.
15077 @end deffn
15078
15079
15080 @c ================================================= History
15081
15082 @node History
15083 @chapter A Brief History of the Greater Ungulates
15084 @cindex history
15085 @cindex ungulates
15086
15087 @menu
15088 * Yacc::                        The original Yacc
15089 * yacchack::                    An obscure early implementation of reentrancy
15090 * Byacc::                       Berkeley Yacc
15091 * Bison::                       This program
15092 * Other Ungulates::             Similar programs
15093 @end menu
15094
15095 @node Yacc
15096 @section The ancestral Yacc
15097
15098 Bison originated as a workalike of a program called Yacc --- Yet Another
15099 Compiler Compiler.@footnote{Because of the acronym, the name is sometimes
15100 given as ``YACC'', but Johnson used ``Yacc'' in the descriptive paper
15101 included in the
15102 @url{https://s3.amazonaws.com/plan9-bell-labs/7thEdMan/v7vol2b.pdf, Version
15103 7 Unix Manual}.} Yacc was written at Bell Labs as part of the very early
15104 development of Unix; one of its first uses was to develop the original
15105 Portable C Compiler, pcc. The same person, Steven C. Johnson, wrote Yacc and
15106 the original pcc.
15107
15108 According to the author
15109 @footnote{@url{https://lists.gnu.org/r/bison-patches/2019-02/msg00061.html}},
15110 Yacc was first invented in 1971 and reached a form recognizably similar to
15111 the C version in 1973.  Johnson published @cite{A Portable Compiler: Theory
15112 and Practice} @pcite{Johnson 1978}.
15113
15114 Yacc was not itself originally written in C but in its predecessor language,
15115 B.  This goes far to explain its odd interface, which exposes a large number
15116 of global variables rather than bundling them into a C struct.  All other
15117 Yacc-like programs are descended from the C port of Yacc.
15118
15119 Yacc, through both its deployment in pcc and as a standalone tool for
15120 generating other parsers, helped drive the early spread of Unix.  Yacc
15121 itself, however, passed out of use after around 1990 when workalikes
15122 with less restrictive licenses and more features became available.
15123
15124 Original Yacc became generally available when Caldera released the sources
15125 of old versions of Unix up to V7 and 32V in 2002.  By that time it had been
15126 long superseded in practical use by Bison even on Yacc's native Unix
15127 variants.
15128
15129 @node yacchack
15130 @section yacchack
15131 @cindex yacchack
15132
15133 One of the deficiencies of original Yacc was its inability to produce
15134 reentrant parsers.  This was first remedied by a set of drop-in
15135 modifications called ``yacchack'', published by Eric S. Raymond on USENET
15136 around 1983.  This code was quickly forgotten when zoo and Berkeley Yacc
15137 became available a few years later.
15138
15139 @node Byacc
15140 @section Berkeley Yacc
15141 @cindex byacc
15142
15143 Berkeley Yacc was originated in 1985 by Robert Corbett @pcite{Corbett
15144 1984}.  It was originally named ``zoo'', but by October 1989 it became
15145 known as Berkeley Yacc or byacc.
15146
15147 Berkeley Yacc had three advantages over the ancestral Yacc: it generated
15148 faster parsers, it could generate reentrant parsers, and the source code was
15149 released to the public domain rather than being under an AT&T proprietary
15150 license.  The better performance came from implementing techniques from
15151 DeRemer and Penello's seminal paper on LALR parsing @pcite{DeRemer 1982}.
15152
15153 Use of byacc spread rapidly due to its public domain license. However, once
15154 Bison became available, byacc itself passed out of general use.
15155
15156 @node Bison
15157 @section Bison
15158 @cindex zoo
15159
15160 Robert Corbett actually wrote two (closely related) LALR parsers in 1985,
15161 both using the DeRemer/Penello techniques. One was ``zoo'', the other was
15162 ``Byson''. In 1987 Richard Stallman began working on Byson; the name changed
15163 to Bison and the interface became Yacc-compatible.
15164
15165 The main visible difference between Yacc and Byson/Bison at the time of
15166 Byson's first release is that Byson supported the @code{@@@var{n}} construct
15167 (giving access to the starting and ending line number and character number
15168 associated with any of the symbols in the current rule).
15169
15170 There was also the command @samp{%expect @var{n}} which said not to mention the
15171 conflicts if there are @var{n} shift/reduce conflicts and no reduce/reduce
15172 conflicts.  In more recent versions of Bison, @code{%expect} and its
15173 @code{%expect-rr} variant for reduce/reduce conflicts can be applied to
15174 individual rules.
15175
15176 Later versions of Bison added many more new features.
15177
15178 Bison error reporting has been improved in various ways. Notably. ancestral
15179 Yacc and Byson did not have carets in error messages.
15180
15181 Compared to Yacc Bison uses a faster but less space-efficient encoding for
15182 the parse tables @pcite{Corbett 1984}, and more modern techniques for
15183 generating the lookahead sets @pcite{DeRemer 1982}.  This approach is the
15184 standard one since then.
15185
15186 (It has also been plausibly alleged the differences in the algorithms stem
15187 mainly from the horrible kludges that Johnson had to perpetrate to make
15188 the original Yacc fit in a PDP-11.)
15189
15190 Named references, semantic predicates, @code{%locations},
15191 @code{%glr-parser}, @code{%printer}, %destructor, dumps to DOT,
15192 @code{%parse-param}, @code{%lex-param}, and dumps to XSLT, LAC, and IELR(1)
15193 generation are new in Bison.
15194
15195 Bison also has many features to support C++ that were not present in the
15196 ancestral Yacc or Byson.
15197
15198 Bison obsolesced all previous Yacc variants and workalikes generating C by
15199 1995.
15200
15201 @node Other Ungulates
15202 @section Other Ungulates
15203
15204 The Yacc concept has frequently been ported to other languages. Some of the
15205 early ports are extinct along with the languages that hosted them; others
15206 have been superseded by parser skeletons shipped with Bison.
15207
15208 However, independent implementations persist. One of the best-known
15209 still in use is David Beazley's ``PLY'' (Python Lex-Yacc) for
15210 Python. Another is goyacc, supporting the Go language. An ``ocamlyacc''
15211 is shipped as part of the Ocaml compiler suite.
15212
15213 @c ================================================= Version Compatibility
15214
15215 @node Versioning
15216 @chapter Bison Version Compatibility: Best Practices
15217 @cindex version
15218 @cindex compatibility
15219
15220 Bison provides a Yacc compatibility mode in which it strives to conform with
15221 the POSIX standard.  Grammar files which are written to the POSIX standard, and
15222 do not take advantage of any of the special capabilities of Bison, should
15223 work with many versions of Bison without modification.
15224
15225 All other features of Bison are particular to Bison, and are changing.  Bison
15226 is actively maintained and continuously evolving.  It should come as no
15227 surprise that an older version of Bison will not accept Bison source code which
15228 uses newer features that do no not exist at all in the older Bison.
15229 Regrettably, in spite of reasonable effort to maintain compatibility, the
15230 reverse situation may also occur: it may happen that code developed using an
15231 older version of Bison does not build with a newer version of Bison without
15232 modifications.
15233
15234 Because Bison is a code generation tool, it is possible to retain its output
15235 and distribute that to the users of the program.  The users are then not
15236 required to have Bison installed at all, only an implementation of the
15237 programming language, such as C, which is required for processing the generated
15238 output.
15239
15240 It is the output of Bison that is intended to be of the utmost portability.
15241 So, that is to say, whereas the Bison grammar source code may have a dependency
15242 on specific versions of Bison, the generated parser from any version of Bison
15243 should work with with a large number of implementations of C, or whatever
15244 language is applicable.
15245
15246 The recommended best practice for using Bison (in the context of software that
15247 is distributed in source code form) is to ship the generated parser to the
15248 downstream users.  Only those downstream users who engage in active development
15249 of the program who need to make changes to the grammar file need to have Bison
15250 installed at all, and those users can install the specific version of Bison
15251 which is required.
15252
15253 Following this recommended practice also makes it possible to use a more recent
15254 Bison than what is available to users through operating system distributions,
15255 thereby taking advantage of the latest techniques that Bison allows.
15256
15257 Some features of Bison have been, or are being adopted into other Yacc-like
15258 programs.  Therefore it might seem that is a good idea to write grammar code
15259 which targets multiple implementations, similarly to the way C programs are
15260 often written to target multiple compilers and language versions.  Other than
15261 the Yacc subset described by POSIX, the Bison language is not rigorously
15262 standardized.  When a Bison feature is adopted by another parser generator, it
15263 may be initially compatible with that version of Bison on which it was based,
15264 but the compatibility may degrade going forward.  Developers who strive to make
15265 their Bison code simultaneously compatible with other parser generators are
15266 encouraged to nevertheless use specific versions of all generators, and still
15267 follow the recommended practice of shipping generated output.  For example,
15268 a project can internally maintain compatibility with multiple generators,
15269 and choose the output of a particular one to ship to the users.  Or else,
15270 the project could ship all of the outputs, arranging for a way for the user
15271 to specify which one is used to build the program.
15272
15273 @c ================================================= FAQ
15274
15275 @node FAQ
15276 @chapter Frequently Asked Questions
15277 @cindex frequently asked questions
15278 @cindex questions
15279
15280 Several questions about Bison come up occasionally.  Here some of them
15281 are addressed.
15282
15283 @menu
15284 * Memory Exhausted::            Breaking the Stack Limits
15285 * How Can I Reset the Parser::  @code{yyparse} Keeps some State
15286 * Strings are Destroyed::       @code{yylval} Loses Track of Strings
15287 * Implementing Gotos/Loops::    Control Flow in the Calculator
15288 * Multiple start-symbols::      Factoring closely related grammars
15289 * Secure?  Conform?::           Is Bison POSIX safe?
15290 * Enabling Relocatability::     Moving Bison/using it through network shares
15291 * I can't build Bison::         Troubleshooting
15292 * Where can I find help?::      Troubleshouting
15293 * Bug Reports::                 Troublereporting
15294 * More Languages::              Parsers in C++, Java, and so on
15295 * Beta Testing::                Experimenting development versions
15296 * Mailing Lists::               Meeting other Bison users
15297 @end menu
15298
15299 @node Memory Exhausted
15300 @section Memory Exhausted
15301
15302 @quotation
15303 My parser returns with error with a @samp{memory exhausted}
15304 message.  What can I do?
15305 @end quotation
15306
15307 This question is already addressed elsewhere, see @ref{Recursion}.
15308
15309 @node How Can I Reset the Parser
15310 @section How Can I Reset the Parser
15311
15312 The following phenomenon has several symptoms, resulting in the
15313 following typical questions:
15314
15315 @quotation
15316 I invoke @code{yyparse} several times, and on correct input it works
15317 properly; but when a parse error is found, all the other calls fail
15318 too.  How can I reset the error flag of @code{yyparse}?
15319 @end quotation
15320
15321 @noindent
15322 or
15323
15324 @quotation
15325 My parser includes support for an @samp{#include}-like feature, in which
15326 case I run @code{yyparse} from @code{yyparse}.  This fails although I did
15327 specify @samp{%define api.pure full}.
15328 @end quotation
15329
15330 These problems typically come not from Bison itself, but from
15331 Lex-generated scanners.  Because these scanners use large buffers for
15332 speed, they might not notice a change of input file.  As a
15333 demonstration, consider the following source file,
15334 @file{first-line.l}:
15335
15336 @example
15337 @group
15338 %@{
15339 #include <stdio.h>
15340 #include <stdlib.h>
15341 %@}
15342 @end group
15343 %%
15344 .*\n    ECHO; return 1;
15345 %%
15346 @group
15347 int
15348 yyparse (char const *file)
15349 @{
15350   yyin = fopen (file, "r");
15351   if (!yyin)
15352     @{
15353       perror ("fopen");
15354       exit (EXIT_FAILURE);
15355     @}
15356 @end group
15357 @group
15358   /* One token only. */
15359   yylex ();
15360   if (fclose (yyin) != 0)
15361     @{
15362       perror ("fclose");
15363       exit (EXIT_FAILURE);
15364     @}
15365   return 0;
15366 @}
15367 @end group
15368
15369 @group
15370 int
15371 main (void)
15372 @{
15373   yyparse ("input");
15374   yyparse ("input");
15375   return 0;
15376 @}
15377 @end group
15378 @end example
15379
15380 @noindent
15381 If the file @file{input} contains
15382
15383 @example
15384 input:1: Hello,
15385 input:2: World!
15386 @end example
15387
15388 @noindent
15389 then instead of getting the first line twice, you get:
15390
15391 @example
15392 $ @kbd{flex -ofirst-line.c first-line.l}
15393 $ @kbd{gcc  -ofirst-line   first-line.c -ll}
15394 $ @kbd{./first-line}
15395 input:1: Hello,
15396 input:2: World!
15397 @end example
15398
15399 Therefore, whenever you change @code{yyin}, you must tell the
15400 Lex-generated scanner to discard its current buffer and switch to the
15401 new one.  This depends upon your implementation of Lex; see its
15402 documentation for more.  For Flex, it suffices to call
15403 @samp{YY_FLUSH_BUFFER} after each change to @code{yyin}.  If your
15404 Flex-generated scanner needs to read from several input streams to
15405 handle features like include files, you might consider using Flex
15406 functions like @samp{yy_switch_to_buffer} that manipulate multiple
15407 input buffers.
15408
15409 If your Flex-generated scanner uses start conditions (@pxref{Start
15410 conditions, , Start conditions, flex, The Flex Manual}), you might
15411 also want to reset the scanner's state, i.e., go back to the initial
15412 start condition, through a call to @samp{BEGIN (0)}.
15413
15414 @node Strings are Destroyed
15415 @section Strings are Destroyed
15416
15417 @quotation
15418 My parser seems to destroy old strings, or maybe it loses track of
15419 them.  Instead of reporting @samp{"foo", "bar"}, it reports
15420 @samp{"bar", "bar"}, or even @samp{"foo\nbar", "bar"}.
15421 @end quotation
15422
15423 This error is probably the single most frequent ``bug report'' sent to
15424 Bison lists, but is only concerned with a misunderstanding of the role
15425 of the scanner.  Consider the following Lex code:
15426
15427 @example
15428 @group
15429 %@{
15430 #include <stdio.h>
15431 char *yylval = NULL;
15432 %@}
15433 @end group
15434 @group
15435 %%
15436 .*    yylval = yytext; return 1;
15437 \n    continue;
15438 %%
15439 @end group
15440 @group
15441 int
15442 main ()
15443 @{
15444   /* Similar to using $1, $2 in a Bison action. */
15445   char *fst = (yylex (), yylval);
15446   char *snd = (yylex (), yylval);
15447   printf ("\"%s\", \"%s\"\n", fst, snd);
15448   return 0;
15449 @}
15450 @end group
15451 @end example
15452
15453 If you compile and run this code, you get:
15454
15455 @example
15456 $ @kbd{flex -osplit-lines.c split-lines.l}
15457 $ @kbd{gcc  -osplit-lines   split-lines.c -ll}
15458 $ @kbd{printf 'one\ntwo\n' | ./split-lines}
15459 "one
15460 two", "two"
15461 @end example
15462
15463 @noindent
15464 this is because @code{yytext} is a buffer provided for @emph{reading}
15465 in the action, but if you want to keep it, you have to duplicate it
15466 (e.g., using @code{strdup}).  Note that the output may depend on how
15467 your implementation of Lex handles @code{yytext}.  For instance, when
15468 given the Lex compatibility option @option{-l} (which triggers the
15469 option @samp{%array}) Flex generates a different behavior:
15470
15471 @example
15472 $ @kbd{flex -l -osplit-lines.c split-lines.l}
15473 $ @kbd{gcc     -osplit-lines   split-lines.c -ll}
15474 $ @kbd{printf 'one\ntwo\n' | ./split-lines}
15475 "two", "two"
15476 @end example
15477
15478
15479 @node Implementing Gotos/Loops
15480 @section Implementing Gotos/Loops
15481
15482 @quotation
15483 My simple calculator supports variables, assignments, and functions,
15484 but how can I implement gotos, or loops?
15485 @end quotation
15486
15487 Although very pedagogical, the examples included in the document blur
15488 the distinction to make between the parser---whose job is to recover
15489 the structure of a text and to transmit it to subsequent modules of
15490 the program---and the processing (such as the execution) of this
15491 structure.  This works well with so called straight line programs,
15492 i.e., precisely those that have a straightforward execution model:
15493 execute simple instructions one after the others.
15494
15495 @cindex abstract syntax tree
15496 @cindex AST
15497 If you want a richer model, you will probably need to use the parser
15498 to construct a tree that does represent the structure it has
15499 recovered; this tree is usually called the @dfn{abstract syntax tree},
15500 or @dfn{AST} for short.  Then, walking through this tree,
15501 traversing it in various ways, will enable treatments such as its
15502 execution or its translation, which will result in an interpreter or a
15503 compiler.
15504
15505 This topic is way beyond the scope of this manual, and the reader is
15506 invited to consult the dedicated literature.
15507
15508
15509 @node Multiple start-symbols
15510 @section Multiple start-symbols
15511
15512 @quotation
15513 I have several closely related grammars, and I would like to share their
15514 implementations.  In fact, I could use a single grammar but with multiple
15515 entry points.
15516 @end quotation
15517
15518 Bison does not support multiple start-symbols, but there is a very simple
15519 means to simulate them.  If @code{foo} and @code{bar} are the two pseudo
15520 start-symbols, then introduce two new tokens, say @code{START_FOO} and
15521 @code{START_BAR}, and use them as switches from the real start-symbol:
15522
15523 @example
15524 %token START_FOO START_BAR;
15525 %start start;
15526 start:
15527   START_FOO foo
15528 | START_BAR bar;
15529 @end example
15530
15531 These tokens prevent the introduction of new conflicts.  As far as the
15532 parser goes, that is all that is needed.
15533
15534 Now the difficult part is ensuring that the scanner will send these tokens
15535 first.  If your scanner is hand-written, that should be straightforward.  If
15536 your scanner is generated by Lex, them there is simple means to do it:
15537 recall that anything between @samp{%@{ ... %@}} after the first @code{%%} is
15538 copied verbatim in the top of the generated @code{yylex} function.  Make
15539 sure a variable @code{start_token} is available in the scanner (e.g., a
15540 global variable or using @code{%lex-param} etc.), and use the following:
15541
15542 @example
15543   /* @r{Prologue.} */
15544 %%
15545 %@{
15546   if (start_token)
15547     @{
15548       int t = start_token;
15549       start_token = 0;
15550       return t;
15551     @}
15552 %@}
15553   /* @r{The rules.} */
15554 @end example
15555
15556
15557 @node Secure?  Conform?
15558 @section Secure?  Conform?
15559
15560 @quotation
15561 Is Bison secure?  Does it conform to POSIX?
15562 @end quotation
15563
15564 If you're looking for a guarantee or certification, we don't provide it.
15565 However, Bison is intended to be a reliable program that conforms to the
15566 POSIX specification for Yacc.  If you run into problems, please send us a
15567 bug report.
15568
15569 @include relocatable.texi
15570
15571 @node I can't build Bison
15572 @section I can't build Bison
15573
15574 @quotation
15575 I can't build Bison because @command{make} complains that
15576 @code{msgfmt} is not found.
15577 What should I do?
15578 @end quotation
15579
15580 Like most GNU packages with internationalization support, that feature
15581 is turned on by default.  If you have problems building in the @file{po}
15582 subdirectory, it indicates that your system's internationalization
15583 support is lacking.  You can re-configure Bison with
15584 @option{--disable-nls} to turn off this support, or you can install GNU
15585 gettext from @url{https://ftp.gnu.org/gnu/gettext/} and re-configure
15586 Bison.  See the file @file{ABOUT-NLS} for more information.
15587
15588 @quotation
15589 I can't build Bison because my C compiler is too old.
15590 @end quotation
15591
15592 Except for GLR parsers (which require C99), the C code that Bison generates
15593 requires only C89 or later.  However, Bison itself requires common C99
15594 features such as declarations after statements.  Bison's @code{configure}
15595 script attempts to enable C99 (or later) support on compilers that default
15596 to pre-C99.  If your compiler lacks these C99 features entirely, GCC may
15597 well be a better choice; or you can try upgrading to your compiler's latest
15598 version.
15599
15600 @node Where can I find help?
15601 @section Where can I find help?
15602
15603 @quotation
15604 I'm having trouble using Bison.  Where can I find help?
15605 @end quotation
15606
15607 First, read this fine manual.  Beyond that, you can send mail to
15608 @email{help-bison@@gnu.org}.  This mailing list is intended to be
15609 populated with people who are willing to answer questions about using
15610 and installing Bison.  Please keep in mind that (most of) the people on
15611 the list have aspects of their lives which are not related to Bison (!),
15612 so you may not receive an answer to your question right away.  This can
15613 be frustrating, but please try not to honk them off; remember that any
15614 help they provide is purely voluntary and out of the kindness of their
15615 hearts.
15616
15617 @node Bug Reports
15618 @section Bug Reports
15619
15620 @quotation
15621 I found a bug.  What should I include in the bug report?
15622 @end quotation
15623
15624 Before sending a bug report, make sure you are using the latest
15625 version.  Check @url{https://ftp.gnu.org/pub/gnu/bison/} or one of its
15626 mirrors.  Be sure to include the version number in your bug report.  If
15627 the bug is present in the latest version but not in a previous version,
15628 try to determine the most recent version which did not contain the bug.
15629
15630 If the bug is parser-related, you should include the smallest grammar
15631 you can which demonstrates the bug.  The grammar file should also be
15632 complete (i.e., I should be able to run it through Bison without having
15633 to edit or add anything).  The smaller and simpler the grammar, the
15634 easier it will be to fix the bug.
15635
15636 Include information about your compilation environment, including your
15637 operating system's name and version and your compiler's name and
15638 version.  If you have trouble compiling, you should also include a
15639 transcript of the build session, starting with the invocation of
15640 @code{configure}.  Depending on the nature of the bug, you may be asked to
15641 send additional files as well (such as @file{config.h} or @file{config.cache}).
15642
15643 Patches are most welcome, but not required.  That is, do not hesitate to
15644 send a bug report just because you cannot provide a fix.
15645
15646 Send bug reports to @email{bug-bison@@gnu.org}.
15647
15648 @node More Languages
15649 @section More Languages
15650
15651 @quotation
15652 Will Bison ever have C++ and Java support?  How about @var{insert your
15653 favorite language here}?
15654 @end quotation
15655
15656 C++, D and Java are supported.  We'd love to add other languages;
15657 contributions are welcome.
15658
15659 @node Beta Testing
15660 @section Beta Testing
15661
15662 @quotation
15663 What is involved in being a beta tester?
15664 @end quotation
15665
15666 It's not terribly involved.  Basically, you would download a test
15667 release, compile it, and use it to build and run a parser or two.  After
15668 that, you would submit either a bug report or a message saying that
15669 everything is okay.  It is important to report successes as well as
15670 failures because test releases eventually become mainstream releases,
15671 but only if they are adequately tested.  If no one tests, development is
15672 essentially halted.
15673
15674 Beta testers are particularly needed for operating systems to which the
15675 developers do not have easy access.  They currently have easy access to
15676 recent GNU/Linux and Solaris versions.  Reports about other operating
15677 systems are especially welcome.
15678
15679 @node Mailing Lists
15680 @section Mailing Lists
15681
15682 @quotation
15683 How do I join the help-bison and bug-bison mailing lists?
15684 @end quotation
15685
15686 See @url{https://lists.gnu.org/}.
15687
15688 @c ================================================= Table of Symbols
15689
15690 @node Table of Symbols
15691 @appendix Bison Symbols
15692 @cindex Bison symbols, table of
15693 @cindex symbols in Bison, table of
15694
15695 @deffn {Variable} @@$
15696 In an action, the location of the left-hand side of the rule.
15697 @xref{Tracking Locations}.
15698 @end deffn
15699
15700 @deffn {Variable} @@@var{n}
15701 @deffnx {Symbol} @@@var{n}
15702 In an action, the location of the @var{n}-th symbol of the right-hand side
15703 of the rule.  @xref{Tracking Locations}.
15704
15705 In a grammar, the Bison-generated nonterminal symbol for a midrule action
15706 with a semantic value.  @xref{Midrule Action Translation}.
15707 @end deffn
15708
15709 @deffn {Variable} @@@var{name}
15710 @deffnx {Variable} @@[@var{name}]
15711 In an action, the location of a symbol addressed by @var{name}.
15712 @xref{Tracking Locations}.
15713 @end deffn
15714
15715 @deffn {Symbol} $@@@var{n}
15716 In a grammar, the Bison-generated nonterminal symbol for a midrule action
15717 with no semantics value.  @xref{Midrule Action Translation}.
15718 @end deffn
15719
15720 @deffn {Variable} $$
15721 In an action, the semantic value of the left-hand side of the rule.
15722 @xref{Actions}.
15723 @end deffn
15724
15725 @deffn {Variable} $@var{n}
15726 In an action, the semantic value of the @var{n}-th symbol of the
15727 right-hand side of the rule.  @xref{Actions}.
15728 @end deffn
15729
15730 @deffn {Variable} $@var{name}
15731 @deffnx {Variable} $[@var{name}]
15732 In an action, the semantic value of a symbol addressed by @var{name}.
15733 @xref{Actions}.
15734 @end deffn
15735
15736 @deffn {Delimiter} %%
15737 Delimiter used to separate the grammar rule section from the
15738 Bison declarations section or the epilogue.
15739 @xref{Grammar Layout}.
15740 @end deffn
15741
15742 @c Don't insert spaces, or check the DVI output.
15743 @deffn {Delimiter} %@{@var{code}%@}
15744 All code listed between @samp{%@{} and @samp{%@}} is copied verbatim
15745 to the parser implementation file.  Such code forms the prologue of
15746 the grammar file.  @xref{Grammar Outline}.
15747 @end deffn
15748
15749 @deffn {Directive} %?@{@var{expression}@}
15750 Predicate actions.  This is a type of action clause that may appear in
15751 rules. The expression is evaluated, and if false, causes a syntax error.  In
15752 GLR parsers during nondeterministic operation,
15753 this silently causes an alternative parse to die.  During deterministic
15754 operation, it is the same as the effect of YYERROR.
15755 @xref{Semantic Predicates}.
15756 @end deffn
15757
15758 @deffn {Construct} /* @dots{} */
15759 @deffnx {Construct} // @dots{}
15760 Comments, as in C/C++.
15761 @end deffn
15762
15763 @deffn {Delimiter} :
15764 Separates a rule's result from its components.  @xref{Rules}.
15765 @end deffn
15766
15767 @deffn {Delimiter} ;
15768 Terminates a rule.  @xref{Rules}.
15769 @end deffn
15770
15771 @deffn {Delimiter} |
15772 Separates alternate rules for the same result nonterminal.
15773 @xref{Rules}.
15774 @end deffn
15775
15776 @deffn {Directive} <*>
15777 Used to define a default tagged @code{%destructor} or default tagged
15778 @code{%printer}.
15779
15780 @xref{Destructor Decl}.
15781 @end deffn
15782
15783 @deffn {Directive} <>
15784 Used to define a default tagless @code{%destructor} or default tagless
15785 @code{%printer}.
15786
15787 @xref{Destructor Decl}.
15788 @end deffn
15789
15790 @deffn {Symbol} $accept
15791 The predefined nonterminal whose only rule is @samp{$accept: @var{start}
15792 $end}, where @var{start} is the start symbol.  @xref{Start Decl}.  It cannot
15793 be used in the grammar.
15794 @end deffn
15795
15796 @deffn {Directive} %code @{@var{code}@}
15797 @deffnx {Directive} %code @var{qualifier} @{@var{code}@}
15798 Insert @var{code} verbatim into the output parser source at the
15799 default location or at the location specified by @var{qualifier}.
15800 @xref{%code Summary}.
15801 @end deffn
15802
15803 @deffn {Directive} %debug
15804 Equip the parser for debugging.  @xref{Decl Summary}.
15805 @end deffn
15806
15807 @ifset defaultprec
15808 @deffn {Directive} %default-prec
15809 Assign a precedence to rules that lack an explicit @samp{%prec}
15810 modifier.  @xref{Contextual Precedence}.
15811 @end deffn
15812 @end ifset
15813
15814 @deffn {Directive} %define @var{variable}
15815 @deffnx {Directive} %define @var{variable} @var{value}
15816 @deffnx {Directive} %define @var{variable} @{@var{value}@}
15817 @deffnx {Directive} %define @var{variable} "@var{value}"
15818 Define a variable to adjust Bison's behavior.  @xref{%define Summary}.
15819 @end deffn
15820
15821 @deffn {Directive} %defines
15822 @deffnx {Directive} %defines @var{defines-file}
15823 Historical name for @code{%header}.
15824 @xref{Decl Summary}.
15825 @end deffn
15826
15827 @deffn {Directive} %destructor
15828 Specify how the parser should reclaim the memory associated to
15829 discarded symbols.  @xref{Destructor Decl}.
15830 @end deffn
15831
15832 @deffn {Directive} %dprec
15833 Bison declaration to assign a precedence to a rule that is used at parse
15834 time to resolve reduce/reduce conflicts.  @xref{GLR Parsers}.
15835 @end deffn
15836
15837 @deffn {Directive} %empty
15838 Bison declaration to declare make explicit that a rule has an empty
15839 right-hand side.  @xref{Empty Rules}.
15840 @end deffn
15841
15842 @deffn {Symbol} $end
15843 The predefined token marking the end of the token stream.  It cannot be
15844 used in the grammar.
15845 @end deffn
15846
15847 @deffn {Symbol} error
15848 A token name reserved for error recovery.  This token may be used in
15849 grammar rules so as to allow the Bison parser to recognize an error in
15850 the grammar without halting the process.  In effect, a sentence
15851 containing an error may be recognized as valid.  On a syntax error, the
15852 token @code{error} becomes the current lookahead token.  Actions
15853 corresponding to @code{error} are then executed, and the lookahead
15854 token is reset to the token that originally caused the violation.
15855 @xref{Error Recovery}.
15856 @end deffn
15857
15858 @deffn {Directive} %error-verbose
15859 An obsolete directive standing for @samp{%define parse.error verbose}.
15860 @end deffn
15861
15862 @deffn {Directive} %file-prefix "@var{prefix}"
15863 Bison declaration to set the prefix of the output files.  @xref{Decl
15864 Summary}.
15865 @end deffn
15866
15867 @deffn {Directive} %glr-parser
15868 Bison declaration to produce a GLR parser.  @xref{GLR
15869 Parsers}.
15870 @end deffn
15871
15872 @deffn {Directive} %header
15873 Bison declaration to create a parser header file, which is usually
15874 meant for the scanner.  @xref{Decl Summary}.
15875 @end deffn
15876
15877 @deffn {Directive} %header @var{header-file}
15878 Same as above, but save in the file @var{header-file}.
15879 @xref{Decl Summary}.
15880 @end deffn
15881
15882 @deffn {Directive} %initial-action
15883 Run user code before parsing.  @xref{Initial Action Decl}.
15884 @end deffn
15885
15886 @deffn {Directive} %language
15887 Specify the programming language for the generated parser.
15888 @xref{Decl Summary}.
15889 @end deffn
15890
15891 @deffn {Directive} %left
15892 Bison declaration to assign precedence and left associativity to token(s).
15893 @xref{Precedence Decl}.
15894 @end deffn
15895
15896 @deffn {Directive} %lex-param @{@var{argument-declaration}@} @dots{}
15897 Bison declaration to specifying additional arguments that
15898 @code{yylex} should accept.  @xref{Pure Calling}.
15899 @end deffn
15900
15901 @deffn {Directive} %merge
15902 Bison declaration to assign a merging function to a rule.  If there is a
15903 reduce/reduce conflict with a rule having the same merging function, the
15904 function is applied to the two semantic values to get a single result.
15905 @xref{GLR Parsers}.
15906 @end deffn
15907
15908 @deffn {Directive} %name-prefix "@var{prefix}"
15909 Obsoleted by the @code{%define} variable @code{api.prefix} (@pxref{Multiple
15910 Parsers}).
15911
15912 Rename the external symbols (variables and functions) used in the parser so
15913 that they start with @var{prefix} instead of @samp{yy}.  Contrary to
15914 @code{api.prefix}, do no rename types and macros.
15915
15916 The precise list of symbols renamed in C parsers is @code{yyparse},
15917 @code{yylex}, @code{yyerror}, @code{yynerrs}, @code{yylval}, @code{yychar},
15918 @code{yydebug}, and (if locations are used) @code{yylloc}.  If you use a
15919 push parser, @code{yypush_parse}, @code{yypull_parse}, @code{yypstate},
15920 @code{yypstate_new} and @code{yypstate_delete} will also be renamed.  For
15921 example, if you use @samp{%name-prefix "c_"}, the names become
15922 @code{c_parse}, @code{c_lex}, and so on.  For C++ parsers, see the
15923 @code{%define api.namespace} documentation in this section.
15924 @end deffn
15925
15926
15927 @ifset defaultprec
15928 @deffn {Directive} %no-default-prec
15929 Do not assign a precedence to rules that lack an explicit @samp{%prec}
15930 modifier.  @xref{Contextual Precedence}.
15931 @end deffn
15932 @end ifset
15933
15934 @deffn {Directive} %no-lines
15935 Bison declaration to avoid generating @code{#line} directives in the
15936 parser implementation file.  @xref{Decl Summary}.
15937 @end deffn
15938
15939 @deffn {Directive} %nonassoc
15940 Bison declaration to assign precedence and nonassociativity to token(s).
15941 @xref{Precedence Decl}.
15942 @end deffn
15943
15944 @deffn {Directive} %nterm
15945 Bison declaration to declare nonterminals.  @xref{Type Decl}.
15946 @end deffn
15947
15948 @deffn {Directive} %output "@var{file}"
15949 Bison declaration to set the name of the parser implementation file.
15950 @xref{Decl Summary}.
15951 @end deffn
15952
15953 @deffn {Directive} %param @{@var{argument-declaration}@} @dots{}
15954 Bison declaration to specify additional arguments that both
15955 @code{yylex} and @code{yyparse} should accept.  @xref{Parser Function}.
15956 @end deffn
15957
15958 @deffn {Directive} %parse-param @{@var{argument-declaration}@} @dots{}
15959 Bison declaration to specify additional arguments that @code{yyparse}
15960 should accept.  @xref{Parser Function}.
15961 @end deffn
15962
15963 @deffn {Directive} %prec
15964 Bison declaration to assign a precedence to a specific rule.
15965 @xref{Contextual Precedence}.
15966 @end deffn
15967
15968 @deffn {Directive} %precedence
15969 Bison declaration to assign precedence to token(s), but no associativity
15970 @xref{Precedence Decl}.
15971 @end deffn
15972
15973 @deffn {Directive} %pure-parser
15974 Deprecated version of @samp{%define api.pure} (@pxref{%define
15975 Summary}), for which Bison is more careful to warn about
15976 unreasonable usage.
15977 @end deffn
15978
15979 @deffn {Directive} %require "@var{version}"
15980 Require version @var{version} or higher of Bison.  @xref{Require Decl}.
15981 @end deffn
15982
15983 @deffn {Directive} %right
15984 Bison declaration to assign precedence and right associativity to token(s).
15985 @xref{Precedence Decl}.
15986 @end deffn
15987
15988 @deffn {Directive} %skeleton
15989 Specify the skeleton to use; usually for development.
15990 @xref{Decl Summary}.
15991 @end deffn
15992
15993 @deffn {Directive} %start
15994 Bison declaration to specify the start symbol.  @xref{Start Decl}.
15995 @end deffn
15996
15997 @deffn {Directive} %token
15998 Bison declaration to declare token(s) without specifying precedence.
15999 @xref{Token Decl}.
16000 @end deffn
16001
16002 @deffn {Directive} %token-table
16003 Bison declaration to include a token name table in the parser implementation
16004 file.  @xref{Decl Summary}.
16005 @end deffn
16006
16007 @deffn {Directive} %type
16008 Bison declaration to declare symbol value types.  @xref{Type Decl}.
16009 @end deffn
16010
16011 @deffn {Symbol} $undefined
16012 The predefined token onto which all undefined values returned by
16013 @code{yylex} are mapped.  It cannot be used in the grammar, rather, use
16014 @code{error}.
16015 @end deffn
16016
16017 @deffn {Directive} %union
16018 Bison declaration to specify several possible data types for semantic
16019 values.  @xref{Union Decl}.
16020 @end deffn
16021
16022 @deffn {Macro} YYABORT
16023 Macro to pretend that an unrecoverable syntax error has occurred, by making
16024 @code{yyparse} return 1 immediately.  The error reporting function
16025 @code{yyerror} is not called.  @xref{Parser Function}.
16026
16027 For Java parsers, this functionality is invoked using @code{return YYABORT;}
16028 instead.
16029 @end deffn
16030
16031 @deffn {Macro} YYACCEPT
16032 Macro to pretend that a complete utterance of the language has been
16033 read, by making @code{yyparse} return 0 immediately.
16034 @xref{Parser Function}.
16035
16036 For Java parsers, this functionality is invoked using @code{return YYACCEPT;}
16037 instead.
16038 @end deffn
16039
16040 @deffn {Macro} YYBACKUP
16041 Macro to discard a value from the parser stack and fake a lookahead
16042 token.  @xref{Action Features}.
16043 @end deffn
16044
16045 @deffn {Macro} YYBISON
16046 The version of Bison as an integer, for instance 30704 for version 3.7.4.
16047 Defined in @file{yacc.c} only.  Before version 3.7.4, @code{YYBISON} was
16048 defined to 1.
16049 @end deffn
16050
16051 @deffn {Variable} yychar
16052 External integer variable that contains the integer value of the
16053 lookahead token.  (In a pure parser, it is a local variable within
16054 @code{yyparse}.)  Error-recovery rule actions may examine this variable.
16055 @xref{Action Features}.
16056 @end deffn
16057
16058 @deffn {Variable} yyclearin
16059 Macro used in error-recovery rule actions.  It clears the previous
16060 lookahead token.  @xref{Error Recovery}.
16061 @end deffn
16062
16063 @deffn {Macro} YYDEBUG
16064 Macro to define to equip the parser with tracing code.  @xref{Tracing}.
16065 @end deffn
16066
16067 @deffn {Variable} yydebug
16068 External integer variable set to zero by default.  If @code{yydebug}
16069 is given a nonzero value, the parser will output information on input
16070 symbols and parser action.  @xref{Tracing}.
16071 @end deffn
16072
16073 @deffn {Value} YYEMPTY
16074 The pseudo token kind when there is no lookahead token.
16075 @end deffn
16076
16077 @deffn {Value} YYEOF
16078 The token kind denoting is the end of the input stream.
16079 @end deffn
16080
16081 @deffn {Macro} yyerrok
16082 Macro to cause parser to recover immediately to its normal mode
16083 after a syntax error.  @xref{Error Recovery}.
16084 @end deffn
16085
16086 @deffn {Macro} YYERROR
16087 Cause an immediate syntax error.  This statement initiates error
16088 recovery just as if the parser itself had detected an error; however, it
16089 does not call @code{yyerror}, and does not print any message.  If you
16090 want to print an error message, call @code{yyerror} explicitly before
16091 the @samp{YYERROR;} statement.  @xref{Error Recovery}.
16092
16093 For Java parsers, this functionality is invoked using @code{return YYERROR;}
16094 instead.
16095 @end deffn
16096
16097 @deffn {Function} yyerror
16098 User-supplied function to be called by @code{yyparse} on error.
16099 @xref{Error Reporting Function}.
16100 @end deffn
16101
16102 @deffn {Macro} YYFPRINTF
16103 Macro used to output run-time traces in C.
16104 @xref{Enabling Traces}.
16105 @end deffn
16106
16107 @deffn {Macro} YYINITDEPTH
16108 Macro for specifying the initial size of the parser stack.
16109 @xref{Memory Management}.
16110 @end deffn
16111
16112 @deffn {Function} yylex
16113 User-supplied lexical analyzer function, called with no arguments to get
16114 the next token.  @xref{Lexical}.
16115 @end deffn
16116
16117 @deffn {Variable} yylloc
16118 External variable in which @code{yylex} should place the line and column
16119 numbers associated with a token.  (In a pure parser, it is a local
16120 variable within @code{yyparse}, and its address is passed to
16121 @code{yylex}.)
16122 You can ignore this variable if you don't use the @samp{@@} feature in the
16123 grammar actions.
16124 @xref{Token Locations}.
16125 In semantic actions, it stores the location of the lookahead token.
16126 @xref{Actions and Locations}.
16127 @end deffn
16128
16129 @deffn {Type} YYLTYPE
16130 Data type of @code{yylloc}.  By default in C, a structure with four members
16131 (start/end line/column).  @xref{Location Type}.
16132 @end deffn
16133
16134 @deffn {Variable} yylval
16135 External variable in which @code{yylex} should place the semantic
16136 value associated with a token.  (In a pure parser, it is a local
16137 variable within @code{yyparse}, and its address is passed to
16138 @code{yylex}.)
16139 @xref{Token Values}.
16140 In semantic actions, it stores the semantic value of the lookahead token.
16141 @xref{Actions}.
16142 @end deffn
16143
16144 @deffn {Macro} YYMAXDEPTH
16145 Macro for specifying the maximum size of the parser stack.  @xref{Memory
16146 Management}.
16147 @end deffn
16148
16149 @deffn {Variable} yynerrs
16150 Global variable which Bison increments each time it reports a syntax error.
16151 (In a pure parser, it is a local variable within @code{yyparse}. In a
16152 pure push parser, it is a member of @code{yypstate}.)
16153 @xref{Error Reporting Function}.
16154 @end deffn
16155
16156 @deffn {Macro} YYNOMEM
16157 Macro to pretend that memory is exhausted, by making @code{yyparse} return 2
16158 immediately.  The error reporting function @code{yyerror} is called.
16159 @xref{Parser Function}.
16160 @end deffn
16161
16162 @deffn {Function} yyparse
16163 The parser function produced by Bison; call this function to start
16164 parsing.  @xref{Parser Function}.
16165 @end deffn
16166
16167 @deffn {Function} yypstate_delete
16168 The function to delete a parser instance, produced by Bison in push mode;
16169 call this function to delete the memory associated with a parser.
16170 @xref{yypstate_delete,,@code{yypstate_delete}}.  Does nothing when called
16171 with a null pointer.
16172 @end deffn
16173
16174 @deffn {Function} yypstate_new
16175 The function to create a parser instance, produced by Bison in push mode;
16176 call this function to create a new parser.
16177 @xref{yypstate_new,,@code{yypstate_new}}.
16178 @end deffn
16179
16180 @deffn {Function} yypull_parse
16181 The parser function produced by Bison in push mode; call this function to
16182 parse the rest of the input stream.
16183 @xref{yypull_parse,,@code{yypull_parse}}.
16184 @end deffn
16185
16186 @deffn {Function} yypush_parse
16187 The parser function produced by Bison in push mode; call this function to
16188 parse a single token.
16189 @xref{yypush_parse,,@code{yypush_parse}}.
16190 @end deffn
16191
16192 @deffn {Macro} YYRECOVERING
16193 The expression @code{YYRECOVERING ()} yields 1 when the parser
16194 is recovering from a syntax error, and 0 otherwise.
16195 @xref{Action Features}.
16196 @end deffn
16197
16198 @deffn {Macro} YYSTACK_USE_ALLOCA
16199 Macro used to control the use of @code{alloca} when the
16200 deterministic parser in C needs to extend its stacks.  If defined to 0,
16201 the parser will use @code{malloc} to extend its stacks and memory exhaustion
16202 occurs if @code{malloc} fails (@pxref{Memory Management}).  If defined to
16203 1, the parser will use @code{alloca}.  Values other than 0 and 1 are
16204 reserved for future Bison extensions.  If not defined,
16205 @code{YYSTACK_USE_ALLOCA} defaults to 0.
16206
16207 In the all-too-common case where your code may run on a host with a
16208 limited stack and with unreliable stack-overflow checking, you should
16209 set @code{YYMAXDEPTH} to a value that cannot possibly result in
16210 unchecked stack overflow on any of your target hosts when
16211 @code{alloca} is called.  You can inspect the code that Bison
16212 generates in order to determine the proper numeric values.  This will
16213 require some expertise in low-level implementation details.
16214 @end deffn
16215
16216 @deffn {Type} YYSTYPE
16217 In C, data type of semantic values; @code{int} by default.
16218 Deprecated in favor of the @code{%define} variable @code{api.value.type}.
16219 @xref{Value Type}.
16220 @end deffn
16221
16222 @deffn {Type} yysymbol_kind_t
16223 An enum of all the symbols, tokens and nonterminals, of the grammar.
16224 @xref{Syntax Error Reporting Function}.  The symbol kinds are used
16225 internally by the parser, and should not be confused with the token kinds:
16226 the symbol kind of a terminal symbol is not equal to its token kind! (Unless
16227 @samp{%define api.token.raw} was used.)
16228 @end deffn
16229
16230 @deffn {Type} yytoken_kind_t
16231 An enum of all the @dfn{token kinds} declared with @code{%token}
16232 (@pxref{Token Decl}).  These are the return values for @code{yylex}.  They
16233 should not be confused with the @emph{symbol kinds}, used internally by the
16234 parser.
16235 @end deffn
16236
16237 @deffn {Value} YYUNDEF
16238 The token kind denoting an unknown token.
16239 @end deffn
16240
16241
16242 @node Glossary
16243 @appendix Glossary
16244 @cindex glossary
16245
16246 @table @asis
16247 @item Accepting state
16248 A state whose only action is the accept action.
16249 The accepting state is thus a consistent state.
16250 @xref{Understanding}.
16251
16252 @item Backus-Naur Form (BNF; also called ``Backus Normal Form'')
16253 Formal method of specifying context-free grammars originally proposed
16254 by John Backus, and slightly improved by Peter Naur in his 1960-01-02
16255 committee document contributing to what became the Algol 60 report.
16256 @xref{Language and Grammar}.
16257
16258 @item Consistent state
16259 A state containing only one possible action.  @xref{Default Reductions}.
16260
16261 @item Context-free grammars
16262 Grammars specified as rules that can be applied regardless of context.
16263 Thus, if there is a rule which says that an integer can be used as an
16264 expression, integers are allowed @emph{anywhere} an expression is
16265 permitted.  @xref{Language and Grammar}.
16266
16267 @item Counterexample
16268 A sequence of tokens and/or nonterminals, with one dot, that demonstrates a
16269 conflict.  The dot marks the place where the conflict occurs.
16270
16271 @cindex unifying counterexample
16272 @cindex counterexample, unifying
16273 @cindex nonunifying counterexample
16274 @cindex counterexample, nonunifying
16275 A @emph{unifying} counterexample is a single string that has two different
16276 parses; its existence proves that the grammar is ambiguous.  When a unifying
16277 counterexample cannot be found in reasonable time, a @emph{nonunifying}
16278 counterexample is built: @emph{two} different string sharing the prefix up
16279 to the dot.
16280
16281 @xref{Counterexamples}
16282
16283 @item Default reduction
16284 The reduction that a parser should perform if the current parser state
16285 contains no other action for the lookahead token.  In permitted parser
16286 states, Bison declares the reduction with the largest lookahead set to be
16287 the default reduction and removes that lookahead set.  @xref{Default
16288 Reductions}.
16289
16290 @item Defaulted state
16291 A consistent state with a default reduction.  @xref{Default Reductions}.
16292
16293 @item Dynamic allocation
16294 Allocation of memory that occurs during execution, rather than at
16295 compile time or on entry to a function.
16296
16297 @item Empty string
16298 Analogous to the empty set in set theory, the empty string is a
16299 character string of length zero.
16300
16301 @item Finite-state stack machine
16302 A ``machine'' that has discrete states in which it is said to exist at
16303 each instant in time.  As input to the machine is processed, the
16304 machine moves from state to state as specified by the logic of the
16305 machine.  In the case of the parser, the input is the language being
16306 parsed, and the states correspond to various stages in the grammar
16307 rules.  @xref{Algorithm}.
16308
16309 @item Generalized LR (GLR)
16310 A parsing algorithm that can handle all context-free grammars, including those
16311 that are not LR(1).  It resolves situations that Bison's
16312 deterministic parsing
16313 algorithm cannot by effectively splitting off multiple parsers, trying all
16314 possible parsers, and discarding those that fail in the light of additional
16315 right context.  @xref{Generalized LR Parsing}.
16316
16317 @item Grouping
16318 A language construct that is (in general) grammatically divisible;
16319 for example, `expression' or `declaration' in C@.
16320 @xref{Language and Grammar}.
16321
16322 @item IELR(1) (Inadequacy Elimination LR(1))
16323 A minimal LR(1) parser table construction algorithm.  That is, given any
16324 context-free grammar, IELR(1) generates parser tables with the full
16325 language-recognition power of canonical LR(1) but with nearly the same
16326 number of parser states as LALR(1).  This reduction in parser states is
16327 often an order of magnitude.  More importantly, because canonical LR(1)'s
16328 extra parser states may contain duplicate conflicts in the case of non-LR(1)
16329 grammars, the number of conflicts for IELR(1) is often an order of magnitude
16330 less as well.  This can significantly reduce the complexity of developing a
16331 grammar.  @xref{LR Table Construction}.
16332
16333 @item Infix operator
16334 An arithmetic operator that is placed between the operands on which it
16335 performs some operation.
16336
16337 @item Input stream
16338 A continuous flow of data between devices or programs.
16339
16340 @item Kind
16341 ``Token'' and ``symbol'' are each overloaded to mean either a grammar symbol
16342 (kind) or all parse info (kind, value, location) associated with occurrences
16343 of that grammar symbol from the input.  To disambiguate,
16344
16345 @itemize
16346 @item
16347 we use ``token kind'' and ``symbol kind'' to mean both grammar symbols and
16348 the values that represent them in a base programming language (C, C++,
16349 etc.).  The names of the types of these values are typically
16350 @code{token_kind_t}, or @code{token_kind_type}, or @code{TokenKind},
16351 depending on the programming language.
16352
16353 @item
16354 we use ``token'' and ``symbol'' without the word ``kind'' to mean parsed
16355 occurrences, and we append the word ``type'' to refer to the types that
16356 represent them in a base programming language.
16357 @end itemize
16358
16359 In summary: When you see ``kind'', interpret ``symbol'' or ``token'' to mean
16360 a @emph{grammar symbol}.  When you don't see ``kind'' (including when you
16361 see ``type''), interpret ``symbol'' or ``token'' to mean a @emph{parsed
16362 symbol}.
16363
16364 @item LAC (Lookahead Correction)
16365 A parsing mechanism that fixes the problem of delayed syntax error
16366 detection, which is caused by LR state merging, default reductions, and the
16367 use of @code{%nonassoc}.  Delayed syntax error detection results in
16368 unexpected semantic actions, initiation of error recovery in the wrong
16369 syntactic context, and an incorrect list of expected tokens in a verbose
16370 syntax error message.  @xref{LAC}.
16371
16372 @item Language construct
16373 One of the typical usage schemas of the language.  For example, one of
16374 the constructs of the C language is the @code{if} statement.
16375 @xref{Language and Grammar}.
16376
16377 @item Left associativity
16378 Operators having left associativity are analyzed from left to right:
16379 @samp{a+b+c} first computes @samp{a+b} and then combines with
16380 @samp{c}.  @xref{Precedence}.
16381
16382 @item Left recursion
16383 A rule whose result symbol is also its first component symbol; for
16384 example, @samp{expseq1 : expseq1 ',' exp;}.  @xref{Recursion}.
16385
16386 @item Left-to-right parsing
16387 Parsing a sentence of a language by analyzing it token by token from
16388 left to right.  @xref{Algorithm}.
16389
16390 @item Lexical analyzer (scanner)
16391 A function that reads an input stream and returns tokens one by one.
16392 @xref{Lexical}.
16393
16394 @item Lexical tie-in
16395 A flag, set by actions in the grammar rules, which alters the way
16396 tokens are parsed.  @xref{Lexical Tie-ins}.
16397
16398 @item Literal string token
16399 A token which consists of two or more fixed characters.  @xref{Symbols}.
16400
16401 @item Lookahead token
16402 A token already read but not yet shifted.  @xref{Lookahead}.
16403
16404 @item LALR(1)
16405 The class of context-free grammars that Bison (like most other parser
16406 generators) can handle by default; a subset of LR(1).
16407 @xref{Mysterious Conflicts}.
16408
16409 @item LR(1)
16410 The class of context-free grammars in which at most one token of
16411 lookahead is needed to disambiguate the parsing of any piece of input.
16412
16413 @item Nonterminal symbol
16414 A grammar symbol standing for a grammatical construct that can
16415 be expressed through rules in terms of smaller constructs; in other
16416 words, a construct that is not a token.  @xref{Symbols}.
16417
16418 @item Parser
16419 A function that recognizes valid sentences of a language by analyzing
16420 the syntax structure of a set of tokens passed to it from a lexical
16421 analyzer.
16422
16423 @item Postfix operator
16424 An arithmetic operator that is placed after the operands upon which it
16425 performs some operation.
16426
16427 @item Reduction
16428 Replacing a string of nonterminals and/or terminals with a single
16429 nonterminal, according to a grammar rule.  @xref{Algorithm}.
16430
16431 @item Reentrant
16432 A reentrant subprogram is a subprogram which can be in invoked any
16433 number of times in parallel, without interference between the various
16434 invocations.  @xref{Pure Decl}.
16435
16436 @item Reverse Polish Notation
16437 A language in which all operators are postfix operators.
16438
16439 @item Right recursion
16440 A rule whose result symbol is also its last component symbol; for
16441 example, @samp{expseq1: exp ',' expseq1;}.  @xref{Recursion}.
16442
16443 @item Semantics
16444 In computer languages, the semantics are specified by the actions
16445 taken for each instance of the language, i.e., the meaning of
16446 each statement.  @xref{Semantics}.
16447
16448 @item Shift
16449 A parser is said to shift when it makes the choice of analyzing
16450 further input from the stream rather than reducing immediately some
16451 already-recognized rule.  @xref{Algorithm}.
16452
16453 @item Single-character literal
16454 A single character that is recognized and interpreted as is.
16455 @xref{Grammar in Bison}.
16456
16457 @item Start symbol
16458 The nonterminal symbol that stands for a complete valid utterance in
16459 the language being parsed.  The start symbol is usually listed as the
16460 first nonterminal symbol in a language specification.
16461 @xref{Start Decl}.
16462
16463 @item Symbol kind
16464 A (finite) enumeration of the grammar symbols, as processed by the parser.
16465 @xref{Symbols}.
16466
16467 @item Symbol table
16468 A data structure where symbol names and associated data are stored during
16469 parsing to allow for recognition and use of existing information in repeated
16470 uses of a symbol.  @xref{Multi-function Calc}.
16471
16472 @item Syntax error
16473 An error encountered during parsing of an input stream due to invalid
16474 syntax.  @xref{Error Recovery}.
16475
16476 @item Terminal symbol
16477 A grammar symbol that has no rules in the grammar and therefore is
16478 grammatically indivisible.  The piece of text it represents is a token.
16479 @xref{Language and Grammar}.
16480
16481 @item Token
16482 A basic, grammatically indivisible unit of a language.  The symbol that
16483 describes a token in the grammar is a terminal symbol.  The input of the
16484 Bison parser is a stream of tokens which comes from the lexical analyzer.
16485 @xref{Symbols}.
16486
16487 @item Token kind
16488 A (finite) enumeration of the grammar terminals, as discriminated by the
16489 scanner.  @xref{Symbols}.
16490
16491 @item Unreachable state
16492 A parser state to which there does not exist a sequence of transitions from
16493 the parser's start state.  A state can become unreachable during conflict
16494 resolution.  @xref{Unreachable States}.
16495 @end table
16496
16497 @node GNU Free Documentation License
16498 @appendix GNU Free Documentation License
16499
16500 @include fdl.texi
16501
16502 @node Bibliography
16503 @unnumbered Bibliography
16504
16505 @c Please follow the following canvas to add more references.
16506 @c And keep sorted alphabetically.
16507
16508 @table @asis
16509 @anchor{Corbett 1984}
16510 @item [Corbett 1984]
16511 @c author
16512 Robert Paul Corbett,
16513 @c title
16514 Static Semantics in Compiler Error Recovery
16515 @c in
16516 Ph.D. Dissertation, Report No. UCB/CSD 85/251,
16517 @c where
16518 Department of Electrical Engineering and Computer Science, Compute Science
16519 Division, University of California, Berkeley, California
16520 @c when
16521 (June 1985).
16522 @c url
16523 @uref{https://digicoll.lib.berkeley.edu/record/135875}
16524
16525 @anchor{Denny 2008}
16526 @item [Denny 2008]
16527 Joel E. Denny and Brian A. Malloy, IELR(1): Practical LR(1) Parser Tables
16528 for Non-LR(1) Grammars with Conflict Resolution, in @cite{Proceedings of the
16529 2008 ACM Symposium on Applied Computing} (SAC'08), ACM, New York, NY, USA,
16530 pp.@: 240--245.  @uref{https://dx.doi.org/10.1145/1363686.1363747}
16531
16532 @anchor{Denny 2010 May}
16533 @item [Denny 2010 May]
16534 Joel E. Denny, PSLR(1): Pseudo-Scannerless Minimal LR(1) for the
16535 Deterministic Parsing of Composite Languages, Ph.D. Dissertation, Clemson
16536 University, Clemson, SC, USA (May 2010).
16537 @uref{https://tigerprints.clemson.edu/all_dissertations/519/}
16538
16539 @anchor{Denny 2010 November}
16540 @item [Denny 2010 November]
16541 Joel E. Denny and Brian A. Malloy, The IELR(1) Algorithm for Generating
16542 Minimal LR(1) Parser Tables for Non-LR(1) Grammars with Conflict Resolution,
16543 in @cite{Science of Computer Programming}, Vol.@: 75, Issue 11 (November
16544 2010), pp.@: 943--979.  @uref{https://dx.doi.org/10.1016/j.scico.2009.08.001}
16545
16546 @anchor{DeRemer 1982}
16547 @item [DeRemer 1982]
16548 Frank DeRemer and Thomas Pennello, Efficient Computation of LALR(1)
16549 Look-Ahead Sets, in @cite{ACM Transactions on Programming Languages and
16550 Systems}, Vol.@: 4, No.@: 4 (October 1982), pp.@:
16551 615--649. @uref{https://dx.doi.org/10.1145/69622.357187}
16552
16553 @anchor{Isradisaikul 2015}
16554 @item [Isradisaikul 2015]
16555 Chinawat Isradisaikul, Andrew Myers,
16556 Finding Counterexamples from Parsing Conflicts,
16557 in @cite{Proceedings of the 36th ACM SIGPLAN Conference on
16558 Programming Language Design and Implementation} (PLDI '15),
16559 ACM, pp.@: 555--564.
16560 @uref{https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf}
16561
16562 @anchor{Johnson 1978}
16563 @item [Johnson 1978]
16564 Steven C. Johnson,
16565 A portable compiler: theory and practice,
16566 in @cite{Proceedings of the 5th ACM SIGACT-SIGPLAN symposium on
16567 Principles of programming languages} (POPL '78),
16568 pp.@: 97--104.
16569 @uref{https://dx.doi.org/10.1145/512760.512771}.
16570
16571 @anchor{Knuth 1965}
16572 @item [Knuth 1965]
16573 Donald E. Knuth, On the Translation of Languages from Left to Right, in
16574 @cite{Information and Control}, Vol.@: 8, Issue 6 (December 1965), pp.@:
16575 607--639. @uref{https://dx.doi.org/10.1016/S0019-9958(65)90426-2}
16576
16577 @anchor{Scott 2000}
16578 @item [Scott 2000]
16579 Elizabeth Scott, Adrian Johnstone, and Shamsa Sadaf Hussain,
16580 @cite{Tomita-Style Generalised LR Parsers}, Royal Holloway, University of
16581 London, Department of Computer Science, TR-00-12 (December 2000).
16582 @uref{https://www.cs.rhul.ac.uk/research/languages/publications/tomita_style_1.ps}
16583 @end table
16584
16585 @node Index of Terms
16586 @unnumbered Index of Terms
16587
16588 @printindex cp
16589
16590 @bye
16591
16592 @c LocalWords: texinfo setfilename settitle setchapternewpage finalout texi FSF
16593 @c LocalWords: ifinfo smallbook shorttitlepage titlepage GPL FIXME iftex FSF's
16594 @c LocalWords: akim fn cp syncodeindex vr tp synindex dircategory direntry Naur
16595 @c LocalWords: ifset vskip pt filll insertcopying sp ISBN Etienne Suvasa Multi
16596 @c LocalWords: ifnottex yyparse detailmenu GLR RPN Calc var Decls Rpcalc multi
16597 @c LocalWords: rpcalc Lexer Expr ltcalc mfcalc yylex defaultprec Donnelly Gotos
16598 @c LocalWords: yyerror pxref LR yylval cindex dfn LALR samp gpl BNF xref yypush
16599 @c LocalWords: const int paren ifnotinfo AC noindent emph expr stmt findex lr
16600 @c LocalWords: glr YYSTYPE TYPENAME prog dprec printf decl init POSIX ODR
16601 @c LocalWords: pre STDC GNUC endif yy YY alloca lf stddef stdlib YYDEBUG yypull
16602 @c LocalWords: NUM exp subsubsection kbd Ctrl ctype EOF getchar isdigit nonfree
16603 @c LocalWords: ungetc stdin scanf sc calc ulator ls lm cc NEG prec yyerrok rr
16604 @c LocalWords: longjmp fprintf stderr yylloc YYLTYPE cos ln Stallman Destructor
16605 @c LocalWords: symrec val tptr FUN func struct sym enum IEC syntaxes Byacc
16606 @c LocalWords: fun putsym getsym arith funs atan ptr malloc sizeof Lex pcc
16607 @c LocalWords: strlen strcpy fctn strcmp isalpha symbuf realloc isalnum DOTDOT
16608 @c LocalWords: ptypes itype trigraphs yytname expseq vindex dtype Unary usr
16609 @c LocalWords: Rhs YYRHSLOC LE nonassoc op deffn typeless yynerrs nonterminal
16610 @c LocalWords: yychar yydebug msg YYNTOKENS YYNNTS YYNRULES YYNSTATES reentrant
16611 @c LocalWords: cparse clex deftypefun NE defmac YYACCEPT YYABORT param yypstate
16612 @c LocalWords: strncmp intval tindex lvalp locp llocp typealt YYBACKUP subrange
16613 @c LocalWords: YYEMPTY YYEOF YYRECOVERING yyclearin GE def UMINUS maybeword loc
16614 @c LocalWords: Johnstone Shamsa Sadaf Hussain Tomita TR uref YYMAXDEPTH inline
16615 @c LocalWords: YYINITDEPTH stmts ref initdcl maybeasm notype Lookahead ctx
16616 @c LocalWords: hexflag STR exdent itemset asis DYYDEBUG YYFPRINTF args Autoconf
16617 @c LocalWords: ypp yxx itemx tex leaderfill Troubleshouting sqrt Graphviz
16618 @c LocalWords: hbox hss hfill tt ly yyin fopen fclose ofirst gcc ll lookahead
16619 @c LocalWords: nbar yytext fst snd osplit ntwo strdup AST Troublereporting th
16620 @c LocalWords: YYSTACK DVI fdl printindex IELR nondeterministic nonterminals ps
16621 @c LocalWords: subexpressions declarator nondeferred config libintl postfix LAC
16622 @c LocalWords: preprocessor nonpositive unary nonnumeric typedef extern rhs sr
16623 @c LocalWords: yytokentype destructor multicharacter nonnull EBCDIC nterm LR's
16624 @c LocalWords: lvalue nonnegative XNUM CHR chr TAGLESS tagless stdout api TOK
16625 @c LocalWords: destructors Reentrancy nonreentrant subgrammar nonassociative Ph
16626 @c LocalWords: deffnx namespace xml goto lalr ielr runtime lex yacc yyps env
16627 @c LocalWords: yystate variadic Unshift NLS gettext po UTF Automake LOCALEDIR
16628 @c LocalWords: YYENABLE bindtextdomain Makefile DEFS CPPFLAGS DBISON DeRemer
16629 @c LocalWords: autoreconf Pennello multisets nondeterminism Generalised baz ACM
16630 @c LocalWords: redeclare automata Dparse localedir datadir XSLT midrule Wno
16631 @c LocalWords: multitable headitem hh basename Doxygen fno filename gdef de
16632 @c LocalWords: doxygen ival sval deftypemethod deallocate pos deftypemethodx
16633 @c LocalWords: Ctor defcv defcvx arg accessors CPP ifndef CALCXX YYerror
16634 @c LocalWords: lexer's calcxx bool LPAREN RPAREN deallocation cerrno climits
16635 @c LocalWords: cstdlib Debian undef yywrap unput noyywrap nounput zA yyleng
16636 @c LocalWords: errno strtol ERANGE str strerror iostream argc argv Javadoc PSLR
16637 @c LocalWords: bytecode initializers superclass stype ASTNode autoboxing nls
16638 @c LocalWords: toString deftypeivar deftypeivarx deftypeop YYParser strictfp
16639 @c LocalWords: superclasses boolean getErrorVerbose setErrorVerbose deftypecv
16640 @c LocalWords: getDebugStream setDebugStream getDebugLevel setDebugLevel url
16641 @c LocalWords: bisonVersion deftypecvx bisonSkeleton getStartPos getEndPos
16642 @c LocalWords: getLVal defvar deftypefn deftypefnx gotos msgfmt Corbett LALR's
16643 @c LocalWords: subdirectory Solaris nonassociativity perror schemas Malloy ints
16644 @c LocalWords: Scannerless ispell american ChangeLog smallexample CSTYPE CLTYPE
16645 @c LocalWords: clval CDEBUG cdebug deftypeopx yyterminate LocationType yyo
16646 @c LocalWords: parsers parser's documentencoding documentlanguage Wempty ss
16647 @c LocalWords: associativity subclasses precedences unresolvable runnable
16648 @c LocalWords: allocators subunit initializations unreferenced untyped dir
16649 @c LocalWords: errorVerbose subtype subtypes Wmidrule midrule's src rvalues
16650 @c LocalWords: automove evolutions Wother Wconflicts PNG lookaheads Acc sep
16651 @c LocalWords: xsltproc XSL xsl xhtml html num Wprecedence Werror fcaret gv
16652 @c LocalWords: fdiagnostics setlocale nullptr ast srcdir iff drv rgbWarning
16653 @c LocalWords: deftypefunx pragma Wnull dereference Wdocumentation elif ish
16654 @c LocalWords: Wdeprecated Wregister noinput yyloc yypos PODs sstream Wsign
16655 @c LocalWords: typename emplace Wconversion Wshorten yacchack reentrancy ou
16656 @c LocalWords: Relocatability exprs fixit Wyacc parseable fixits ffixit svg
16657 @c LocalWords: DNDEBUG cstring Wzero workalike POPL workalikes byacc UCB
16658 @c LocalWords: Penello's Penello Byson Byson's Corbett's CSD TOPLAS PDP cex
16659 @c LocalWords: Beazley's goyacc ocamlyacc SIGACT SIGPLAN colorWarning exVal
16660 @c LocalWords: setcolor rgbError colorError rgbNotice colorNotice derror
16661 @c LocalWords: colorOff maincolor inlineraw darkviolet darkcyan dwarning
16662 @c LocalWords: dnotice copyable stdint ptrdiff bufsize yyreport invariants
16663 @c LocalWords: xrefautomaticsectiontitle yysyntax yysymbol ARGMAX cond RTTI
16664 @c LocalWords: Wdangling yytoken erreur syntaxe inattendu attendait nombre
16665 @c LocalWords: YYUNDEF SymbolKind yypcontext YYENOMEM TOKENMAX getBundle
16666 @c LocalWords: ResourceBundle myResources getString getName getToken ylwrap
16667 @c LocalWords: getLocation getExpectedTokens reportSyntaxError bistromathic
16668 @c LocalWords: TokenKind Automake's rtti Wcounterexamples Chinawat PLDI buf
16669 @c LocalWords: Isradisaikul tcite pcite rgbGreen colorGreen rgbYellow Wcex
16670 @c LocalWords: colorYellow rgbRed colorRed rgbBlue colorBlue rgbPurple Ddoc
16671 @c LocalWords: colorPurple ifhtml ifnothtml situ rcex MERCHANTABILITY Wnone
16672 @c LocalWords: diagError diagNotice diagWarning diagOff danglingElseCex
16673 @c LocalWords: nonunifying YYNOMEM Wuseless dgettext textdomain domainname
16674 @c LocalWords: dirname typeof writeln YYBISON YYLOCATION backend structs
16675 @c LocalWords: pushParse
16676
16677 @c Local Variables:
16678 @c ispell-dictionary: "american"
16679 @c fill-column: 76
16680 @c End: