From 1f36161119703e35796e9a70ace4df7ccb83d92c Mon Sep 17 00:00:00 2001 From: "D. Richard Hipp" Date: Fri, 26 Jan 2024 20:34:48 +0000 Subject: [PATCH] Experimental changes that prevent parser stack overflows by growing the parser stack with heap memory when it reaches its limit. --- doc/lemon.html | 18 ++++++++++ src/parse.y | 4 +++ tool/lemon.c | 23 ++++++++++++ tool/lempar.c | 109 ++++++++++++++++++++++++++++----------------------------- 4 files changed, 99 insertions(+), 55 deletions(-) diff --git a/doc/lemon.html b/doc/lemon.html index 66665f46f3..4147d9b31e 100644 --- a/doc/lemon.html +++ b/doc/lemon.html @@ -683,6 +683,7 @@ other than that, the order of directives in Lemon is arbitrary.

  • %endif
  • %extra_argument
  • %fallback +
  • %free
  • %if
  • %ifdef
  • %ifndef @@ -693,6 +694,7 @@ other than that, the order of directives in Lemon is arbitrary.

  • %parse_accept
  • %parse_failure
  • %right +
  • %realloc
  • %stack_overflow
  • %stack_size
  • %start_symbol @@ -1200,6 +1202,21 @@ match any input token.

    the wildcard token and some other token, the other token is always used. The wildcard token is only matched if there are no alternatives.

    + +

    4.4.26 The %realloc and %free directives

    + +

    The %realloc and %free directives defines function +that allocate and free heap memory. The signatures of these functions +should be the same as the realloc() and free() functions from the standard +C library. + +

    If both of these functions are defined +then these functions are used to allocate and free +memory for supplemental parser stack space, if the initial +parse stack space is exceeded. The initial parser stack size +is specified by either %stack_size or the +-DYYSTACKDEPTH compile-time flag. +

    5.0 Error Processing

    @@ -1224,6 +1241,7 @@ to begin parsing a new file. This is what will happen at the very first syntax error, of course, if there are no instances of the "error" non-terminal in your grammar.

    +

    6.0 History of Lemon

    diff --git a/src/parse.y b/src/parse.y index 19491192e3..0e41f54cf1 100644 --- a/src/parse.y +++ b/src/parse.y @@ -21,6 +21,10 @@ */ } +// Function used to enlarge the parser stack, if needed +%realloc sqlite3_realloc64 +%free sqlite3_free + // All token codes are small integers with #defines that begin with "TK_" %token_prefix TK_ diff --git a/tool/lemon.c b/tool/lemon.c index 7804837a06..c578b44bab 100644 --- a/tool/lemon.c +++ b/tool/lemon.c @@ -418,6 +418,8 @@ struct lemon { char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ + char *reallocFunc; /* Function to use to allocate stack space */ + char *freeFunc; /* Function to use to free stack space */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int nlookaheadtab; /* Number of entries in yy_lookahead[] */ @@ -2531,6 +2533,12 @@ static void parseonetoken(struct pstate *psp) }else if( strcmp(x,"default_type")==0 ){ psp->declargslot = &(psp->gp->vartype); psp->insertLineMacro = 0; + }else if( strcmp(x,"realloc")==0 ){ + psp->declargslot = &(psp->gp->reallocFunc); + psp->insertLineMacro = 0; + }else if( strcmp(x,"free")==0 ){ + psp->declargslot = &(psp->gp->freeFunc); + psp->insertLineMacro = 0; }else if( strcmp(x,"stack_size")==0 ){ psp->declargslot = &(psp->gp->stacksize); psp->insertLineMacro = 0; @@ -4501,6 +4509,21 @@ void ReportTable( fprintf(out,"#define %sARG_FETCH\n",name); lineno++; fprintf(out,"#define %sARG_STORE\n",name); lineno++; } + if( lemp->reallocFunc ){ + fprintf(out,"#define YYREALLOC %s\n", lemp->reallocFunc); lineno++; + }else{ + fprintf(out,"#define YYREALLOC realloc\n"); lineno++; + } + if( lemp->freeFunc ){ + fprintf(out,"#define YYFREE %s\n", lemp->freeFunc); lineno++; + }else{ + fprintf(out,"#define YYFREE free\n"); lineno++; + } + if( lemp->reallocFunc && lemp->freeFunc ){ + fprintf(out,"#define YYDYNSTACK 1\n"); lineno++; + }else{ + fprintf(out,"#define YYDYNSTACK 0\n"); lineno++; + } if( lemp->ctx && lemp->ctx[0] ){ i = lemonStrlen(lemp->ctx); while( i>=1 && ISSPACE(lemp->ctx[i-1]) ) i--; diff --git a/tool/lempar.c b/tool/lempar.c index 8cc57897db..d8e64c40dd 100644 --- a/tool/lempar.c +++ b/tool/lempar.c @@ -67,6 +67,9 @@ ** ParseARG_STORE Code to store %extra_argument into yypParser ** ParseARG_FETCH Code to extract %extra_argument from yypParser ** ParseCTX_* As ParseARG_ except for %extra_context +** YYREALLOC Name of the realloc() function to use +** YYFREE Name of the free() function to use +** YYDYNSTACK True if stack space should be extended on heap ** YYERRORSYMBOL is the code number of the error symbol. If not ** defined, then do no error processing. ** YYNSTATE the combined number of states. @@ -101,6 +104,22 @@ # define yytestcase(X) #endif +/* Macro to determine if stack space has the ability to grow using +** heap memory. +*/ +#if YYSTACKDEPTH<=0 || YYDYNSTACK +# define YYGROWABLESTACK 1 +#else +# define YYGROWABLESTACK 0 +#endif + +/* Guarantee a minimum number of initial stack slots. +*/ +#if YYSTACKDEPTH<=0 +# undef YYSTACKDEPTH +# define YYSTACKDEPTH 2 /* Need a minimum stack size */ +#endif + /* Next are the tables used to determine what action to take based on the ** current state and lookahead token. These tables are used to implement @@ -212,14 +231,9 @@ struct yyParser { #endif ParseARG_SDECL /* A place to hold %extra_argument */ ParseCTX_SDECL /* A place to hold %extra_context */ -#if YYSTACKDEPTH<=0 - int yystksz; /* Current side of the stack */ - yyStackEntry *yystack; /* The parser's stack */ - yyStackEntry yystk0; /* First stack entry */ -#else - yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ - yyStackEntry *yystackEnd; /* Last entry in the stack */ -#endif + yyStackEntry *yystackEnd; /* Last entry in the stack */ + yyStackEntry *yystack; /* The parser stack */ + yyStackEntry yystk0[YYSTACKDEPTH]; /* Initial stack space */ }; typedef struct yyParser yyParser; @@ -273,37 +287,45 @@ static const char *const yyRuleName[] = { #endif /* NDEBUG */ -#if YYSTACKDEPTH<=0 +#if YYGROWABLESTACK /* ** Try to increase the size of the parser stack. Return the number ** of errors. Return 0 on success. */ static int yyGrowStack(yyParser *p){ + int oldSize = 1 + (int)(p->yystackEnd - p->yystack)/sizeof(p->yystack[0]); int newSize; int idx; yyStackEntry *pNew; - newSize = p->yystksz*2 + 100; + newSize = oldSize*2 + 100; idx = p->yytos ? (int)(p->yytos - p->yystack) : 0; - if( p->yystack==&p->yystk0 ){ - pNew = malloc(newSize*sizeof(pNew[0])); - if( pNew ) pNew[0] = p->yystk0; + if( p->yystack==p->yystk0 ){ + pNew = YYREALLOC(0, newSize*sizeof(pNew[0])); + if( pNew==0 ) return 1; + memcpy(pNew, p->yystk0, sizeof(p->yystk0)); }else{ - pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + pNew = YYREALLOC(p->yystack, newSize*sizeof(pNew[0])); + if( pNew==0 ) return 1; } - if( pNew ){ - p->yystack = pNew; - p->yytos = &p->yystack[idx]; + p->yystack = pNew; + p->yytos = &p->yystack[idx]; #ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", - yyTracePrompt, p->yystksz, newSize); - } -#endif - p->yystksz = newSize; + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", + yyTracePrompt, oldSize, newSize); } - return pNew==0; +#endif + p->yystackEnd = &p->yystack[newSize-1]; + return 0; } +#endif /* YYGROWABLESTACK */ + +#if !YYGROWABLESTACK +/* For builds that do no have a growable stack, yyGrowStack always +** returns an error. +*/ +# define yyGrowStack(X) 1 #endif /* Datatype of the argument to the memory allocated passed as the @@ -323,24 +345,14 @@ void ParseInit(void *yypRawParser ParseCTX_PDECL){ #ifdef YYTRACKMAXSTACKDEPTH yypParser->yyhwm = 0; #endif -#if YYSTACKDEPTH<=0 - yypParser->yytos = NULL; - yypParser->yystack = NULL; - yypParser->yystksz = 0; - if( yyGrowStack(yypParser) ){ - yypParser->yystack = &yypParser->yystk0; - yypParser->yystksz = 1; - } -#endif + yypParser->yystack = yypParser->yystk0; + yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1]; #ifndef YYNOERRORRECOVERY yypParser->yyerrcnt = -1; #endif yypParser->yytos = yypParser->yystack; yypParser->yystack[0].stateno = 0; yypParser->yystack[0].major = 0; -#if YYSTACKDEPTH>0 - yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1]; -#endif } #ifndef Parse_ENGINEALWAYSONSTACK @@ -427,8 +439,8 @@ static void yy_pop_parser_stack(yyParser *pParser){ void ParseFinalize(void *p){ yyParser *pParser = (yyParser*)p; while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser); -#if YYSTACKDEPTH<=0 - if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack); +#if YYGROWABLESTACK + if( pParser->yystack!=pParser->yystk0 ) YYFREE(pParser->yystack); #endif } @@ -654,25 +666,19 @@ static void yy_shift( assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); } #endif -#if YYSTACKDEPTH>0 - if( yypParser->yytos>yypParser->yystackEnd ){ - yypParser->yytos--; - yyStackOverflow(yypParser); - return; - } -#else - if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){ + yytos = yypParser->yytos; + if( yytos>yypParser->yystackEnd ){ if( yyGrowStack(yypParser) ){ yypParser->yytos--; yyStackOverflow(yypParser); return; } + yytos = yypParser->yytos; + assert( yytos <= yypParser->yystackEnd ); } -#endif if( yyNewState > YY_MAX_SHIFT ){ yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; } - yytos = yypParser->yytos; yytos->stateno = yyNewState; yytos->major = yyMajor; yytos->minor.yy0 = yyMinor; @@ -911,19 +917,12 @@ void Parse( (int)(yypParser->yytos - yypParser->yystack)); } #endif -#if YYSTACKDEPTH>0 if( yypParser->yytos>=yypParser->yystackEnd ){ - yyStackOverflow(yypParser); - break; - } -#else - if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){ if( yyGrowStack(yypParser) ){ yyStackOverflow(yypParser); break; } } -#endif } yyact = yy_reduce(yypParser,yyruleno,yymajor,yyminor ParseCTX_PARAM); }else if( yyact <= YY_MAX_SHIFTREDUCE ){ -- 2.11.4.GIT