From 2ca6b719674228fa2f77f6627e78111a757b83cc Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 28 Sep 2019 13:48:35 +0200 Subject: [PATCH] yacc: use the most appropriate integral type for state numbers Currently we properly use the "best" integral type for tables, including those storing state numbers. However the variables for state numbers used in yyparse (and its dependencies such as yy_stack_print) still use int16_t invariably. As a consequence, very large models overflow these variables. Let's use the "best" type for these variables too. It turns out that we can still use 16 bits for twice larger automata: stick to unsigned types. However using 'unsigned' when 16 bits are not enough is troublesome and generates tons of warnings about signedness issues. Instead, let's use 'int'. Reported by Tom Kramer. https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html * data/skeletons/yacc.c (b4_state_num_type): New. (yy_state_num): Be computed from YYNSTATES. * tests/linear: New. * tests/torture.at (State number type): New. Use it. --- THANKS | 1 + data/skeletons/yacc.c | 44 ++++++++++++++---------- tests/linear | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/local.mk | 2 +- tests/torture.at | 44 ++++++++++++++++++++---- 5 files changed, 160 insertions(+), 25 deletions(-) create mode 100755 tests/linear diff --git a/THANKS b/THANKS index 2cdd9b0b..569ba173 100644 --- a/THANKS +++ b/THANKS @@ -179,6 +179,7 @@ Tim Landscheidt tim@tim-landscheidt.de Tim Van Holder tim.van.holder@pandora.be Tobias Frost tobi@debian.org Todd Freed todd.freed@gmail.com +Tom Kramer kramer@nist.gov Tom Lane tgl@sss.pgh.pa.us Tom Tromey tromey@cygnus.com Tomasz Kłoczko kloczko.tomasz@gmail.com diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index 4f68f408..057cc7a0 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -127,6 +127,18 @@ m4_define([b4_int_type], [int])]) +# b4_state_num_type(MIN, MAX) +# --------------------------- +# Likewise, but prefer 'int' to 'unsigned' for large integers. +m4_define([b4_state_num_type], +[m4_if(b4_ints_in($@, [0], [255]), [1], [yytype_uint8], + b4_ints_in($@, [-128], [127]), [1], [yytype_int8], + + b4_ints_in($@, [0], [65535]), [1], [yytype_uint16], + b4_ints_in($@, [-32768], [32767]), [1], [yytype_int16], + + [int])]) + ## ----------------- ## ## Semantic Values. ## @@ -432,7 +444,7 @@ typedef short yytype_int16; /* State numbers. */ -typedef yytype_int16 yy_state_num; +typedef ]b4_state_num_type(0, m4_eval(b4_states_number - 1))[ yy_state_num; #ifndef YY_ @@ -740,10 +752,7 @@ do { \ { YYFPRINTF (stderr, "Stack now"); for (; yybottom <= yytop; yybottom++) - { - int yybot = *yybottom; - YYFPRINTF (stderr, " %d", yybot); - } + YYFPRINTF (stderr, " %u", (unsigned) *yybottom); YYFPRINTF (stderr, "\n"); } @@ -985,7 +994,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes, } else { - yyrule = yytable[yyrule]; + yyrule = (int) yytable[yyrule]; if (yytable_value_is_error (yyrule)) { YYDPRINTF ((stderr, " Err\n")); @@ -1022,11 +1031,10 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes, int yystate; { const int yylhs = yyr1[yyrule] - YYNTOKENS; - const int yyi = yypgoto[yylhs] + *yyesp; - yystate = ((yy_state_num) - (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp - ? yytable[yyi] - : yydefgoto[yylhs])); + const int yyi = yypgoto[yylhs] + (int) *yyesp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp + ? (int) yytable[yyi] + : yydefgoto[yylhs]); } if (yyesp == yyes_prev) { @@ -1046,7 +1054,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes, } *++yyesp = (yy_state_num) yystate; } - YYDPRINTF ((stderr, " G%d", (int) yystate)); + YYDPRINTF ((stderr, " G%d", yystate)); } } }]])[ @@ -1639,7 +1647,7 @@ yyread_pushed_token:]])[ goto yydefault; }]], [[ goto yydefault;]])[ - yyn = yytable[yyn]; + yyn = (int) yytable[yyn]; if (yyn <= 0) { if (yytable_value_is_error (yyn)) @@ -1661,7 +1669,7 @@ yyread_pushed_token:]])[ yychar = YYEMPTY;]b4_lac_if([[ YY_LAC_DISCARD ("shift");]])[ - yystate = (yy_state_num) yyn; + yystate = yyn; YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN *++yyvsp = yylval; YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([ @@ -1741,9 +1749,9 @@ yyreduce: number reduced by. */ { const int yylhs = yyr1[yyn] - YYNTOKENS; - const int yyi = yypgoto[yylhs] + *yyssp; + const int yyi = yypgoto[yylhs] + (int) *yyssp; yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp - ? yytable[yyi] + ? (int) yytable[yyi] : yydefgoto[yylhs]); } @@ -1859,7 +1867,7 @@ yyerrlab1: yyn += YYTERROR; if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) { - yyn = yytable[yyn]; + yyn = (int) yytable[yyn]; if (0 < yyn) break; } @@ -1894,7 +1902,7 @@ yyerrlab1: /* Shift the error token. */ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - yystate = (yy_state_num) yyn; + yystate = yyn; goto yynewstate; diff --git a/tests/linear b/tests/linear new file mode 100755 index 00000000..fb5512be --- /dev/null +++ b/tests/linear @@ -0,0 +1,94 @@ +#! /usr/bin/env ruby + +# Build a grammar whose LALR(1) parser has a given number of states. +# Useful to test edge cases (e.g., 256 and 257 states, etc.). + +class Linear + def initialize(states) + @states = states - 4 + @cols = Math.sqrt(@states).to_i + @lines = @states / @cols + @rest = @states % @cols + + @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest) + end + + def nterms + last = @lines + ([0, 1].include?(@rest) ? 0 : 1) + (0...last).map { |i| "t#{i}" }.join(' ') + end + + def rules + res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n") + case @rest + when 0 + res += ' N' + when 1 + res += ' N N' + else + res += "\nt#{@lines}:#{" N" * @rest}" + end + res + end + + def to_s + puts <<~EOF + // states: #{@states} + // cols: #{@cols} + // lines: #{@lines} + // rest: #{@rest} + // n: #{@n} + + %code { + #include + #include + + static int yylex (void); + static void yyerror (const char *msg); + } + + %debug + %define api.value.type union + %define parse.lac full + %define parse.error verbose + %printer { fprintf (yyo, "%ld", $$); } + %token N + + %% + + exp: #{nterms} + + #{rules} + + %% + + static + int yylex (void) + { + static long count = 0; + if (count++ < #{@n}) + { + yylval.N = count; + return N; + } + else + return 0; + } + + static + void yyerror (const char *msg) + { + fprintf (stderr, "%s\\n", msg); + } + + int + main (void) + { + yydebug = !!getenv ("YYDEBUG"); + return yyparse (); + } + EOF + end +end + +puts Linear.new(ARGV[0].to_i).to_s diff --git a/tests/local.mk b/tests/local.mk index 9a2b4d51..9f55e552 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -15,7 +15,7 @@ ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . -EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h +EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS) MAINTAINERCLEANFILES += $(TESTSUITE) diff --git a/tests/torture.at b/tests/torture.at index b91e059a..952add73 100644 --- a/tests/torture.at +++ b/tests/torture.at @@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000]) # Ask for 200 MiB, which should be plenty even on a 64-bit host. AT_INCREASE_DATA_SIZE(204000) -AT_BISON_CHECK_NO_XML([-v -o input.c input.y]) +AT_BISON_CHECK_NO_XML([-o input.c input.y]) AT_COMPILE([input]) AT_PARSER_CHECK([input]) @@ -241,6 +241,43 @@ AT_CLEANUP +## ------------------- ## +## State number type. ## +## ------------------- ## + +# AT_TEST(NUM-STATES, TYPE) +# ------------------------- +# Check that automaton with NUM-STATES uses TYPE has state number type. +# Check that parser works. + +m4_pushdef([AT_TEST], +[AT_SETUP([State number type: $1 states]) + +AT_BISON_OPTION_PUSHDEFS + +AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y || { echo "ruby does not work"; exit 77; }]) +# Old versions of GCC reject large values given to #line. +AT_FULL_COMPILE([input], [], [], [], [--no-line]) +AT_CHECK([grep 'define YYNSTATES *$1' input.c], [], [ignore]) +AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore]) +AT_PARSER_CHECK([input]) + +AT_BISON_OPTION_POPDEFS +AT_CLEANUP]) + +AT_TEST( [256], [yytype_uint8]) +AT_TEST( [257], [yytype_uint16]) +AT_TEST([65536], [yytype_uint16]) +AT_TEST([65537], [int]) + +m4_popdef([AT_TEST]) + + + +## ------------------------ ## +## Many lookahead tokens. ## +## ------------------------ ## + # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE) # -------------------------------------------------- # Create FILE-NAME, containing a self checking parser for a grammar @@ -340,11 +377,6 @@ mv stdout $1 AT_BISON_OPTION_POPDEFS ]) - -## ------------------------ ## -## Many lookahead tokens. ## -## ------------------------ ## - AT_SETUP([Many lookahead tokens]) AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000]) -- 2.11.4.GIT