From 1f36161119703e35796e9a70ace4df7ccb83d92c Mon Sep 17 00:00:00 2001
From: "D. Richard Hipp"
Date: Fri, 26 Jan 2024 20:34:48 +0000
Subject: [PATCH] Experimental changes that prevent parser stack overflows by
growing the parser stack with heap memory when it reaches its limit.
---
doc/lemon.html | 18 ++++++++++
src/parse.y | 4 +++
tool/lemon.c | 23 ++++++++++++
tool/lempar.c | 109 ++++++++++++++++++++++++++++-----------------------------
4 files changed, 99 insertions(+), 55 deletions(-)
diff --git a/doc/lemon.html b/doc/lemon.html
index 66665f46f3..4147d9b31e 100644
--- a/doc/lemon.html
+++ b/doc/lemon.html
@@ -683,6 +683,7 @@ other than that, the order of directives in Lemon is arbitrary.
%endif
%extra_argument
%fallback
+%free
%if
%ifdef
%ifndef
@@ -693,6 +694,7 @@ other than that, the order of directives in Lemon is arbitrary.
%parse_accept
%parse_failure
%right
+%realloc
%stack_overflow
%stack_size
%start_symbol
@@ -1200,6 +1202,21 @@ match any input token.
the wildcard token and some other token, the other token is always used.
The wildcard token is only matched if there are no alternatives.
+
+4.4.26 The %realloc and %free directives
+
+The %realloc and %free directives defines function
+that allocate and free heap memory. The signatures of these functions
+should be the same as the realloc() and free() functions from the standard
+C library.
+
+
If both of these functions are defined
+then these functions are used to allocate and free
+memory for supplemental parser stack space, if the initial
+parse stack space is exceeded. The initial parser stack size
+is specified by either %stack_size or the
+-DYYSTACKDEPTH compile-time flag.
+
5.0 Error Processing
@@ -1224,6 +1241,7 @@ to begin parsing a new file. This is what will happen at the very
first syntax error, of course, if there are no instances of the
"error" non-terminal in your grammar.
+
6.0 History of Lemon
diff --git a/src/parse.y b/src/parse.y
index 19491192e3..0e41f54cf1 100644
--- a/src/parse.y
+++ b/src/parse.y
@@ -21,6 +21,10 @@
*/
}
+// Function used to enlarge the parser stack, if needed
+%realloc sqlite3_realloc64
+%free sqlite3_free
+
// All token codes are small integers with #defines that begin with "TK_"
%token_prefix TK_
diff --git a/tool/lemon.c b/tool/lemon.c
index 7804837a06..c578b44bab 100644
--- a/tool/lemon.c
+++ b/tool/lemon.c
@@ -418,6 +418,8 @@ struct lemon {
char *filename; /* Name of the input file */
char *outname; /* Name of the current output file */
char *tokenprefix; /* A prefix added to token names in the .h file */
+ char *reallocFunc; /* Function to use to allocate stack space */
+ char *freeFunc; /* Function to use to free stack space */
int nconflict; /* Number of parsing conflicts */
int nactiontab; /* Number of entries in the yy_action[] table */
int nlookaheadtab; /* Number of entries in yy_lookahead[] */
@@ -2531,6 +2533,12 @@ static void parseonetoken(struct pstate *psp)
}else if( strcmp(x,"default_type")==0 ){
psp->declargslot = &(psp->gp->vartype);
psp->insertLineMacro = 0;
+ }else if( strcmp(x,"realloc")==0 ){
+ psp->declargslot = &(psp->gp->reallocFunc);
+ psp->insertLineMacro = 0;
+ }else if( strcmp(x,"free")==0 ){
+ psp->declargslot = &(psp->gp->freeFunc);
+ psp->insertLineMacro = 0;
}else if( strcmp(x,"stack_size")==0 ){
psp->declargslot = &(psp->gp->stacksize);
psp->insertLineMacro = 0;
@@ -4501,6 +4509,21 @@ void ReportTable(
fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
fprintf(out,"#define %sARG_STORE\n",name); lineno++;
}
+ if( lemp->reallocFunc ){
+ fprintf(out,"#define YYREALLOC %s\n", lemp->reallocFunc); lineno++;
+ }else{
+ fprintf(out,"#define YYREALLOC realloc\n"); lineno++;
+ }
+ if( lemp->freeFunc ){
+ fprintf(out,"#define YYFREE %s\n", lemp->freeFunc); lineno++;
+ }else{
+ fprintf(out,"#define YYFREE free\n"); lineno++;
+ }
+ if( lemp->reallocFunc && lemp->freeFunc ){
+ fprintf(out,"#define YYDYNSTACK 1\n"); lineno++;
+ }else{
+ fprintf(out,"#define YYDYNSTACK 0\n"); lineno++;
+ }
if( lemp->ctx && lemp->ctx[0] ){
i = lemonStrlen(lemp->ctx);
while( i>=1 && ISSPACE(lemp->ctx[i-1]) ) i--;
diff --git a/tool/lempar.c b/tool/lempar.c
index 8cc57897db..d8e64c40dd 100644
--- a/tool/lempar.c
+++ b/tool/lempar.c
@@ -67,6 +67,9 @@
** ParseARG_STORE Code to store %extra_argument into yypParser
** ParseARG_FETCH Code to extract %extra_argument from yypParser
** ParseCTX_* As ParseARG_ except for %extra_context
+** YYREALLOC Name of the realloc() function to use
+** YYFREE Name of the free() function to use
+** YYDYNSTACK True if stack space should be extended on heap
** YYERRORSYMBOL is the code number of the error symbol. If not
** defined, then do no error processing.
** YYNSTATE the combined number of states.
@@ -101,6 +104,22 @@
# define yytestcase(X)
#endif
+/* Macro to determine if stack space has the ability to grow using
+** heap memory.
+*/
+#if YYSTACKDEPTH<=0 || YYDYNSTACK
+# define YYGROWABLESTACK 1
+#else
+# define YYGROWABLESTACK 0
+#endif
+
+/* Guarantee a minimum number of initial stack slots.
+*/
+#if YYSTACKDEPTH<=0
+# undef YYSTACKDEPTH
+# define YYSTACKDEPTH 2 /* Need a minimum stack size */
+#endif
+
/* Next are the tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
@@ -212,14 +231,9 @@ struct yyParser {
#endif
ParseARG_SDECL /* A place to hold %extra_argument */
ParseCTX_SDECL /* A place to hold %extra_context */
-#if YYSTACKDEPTH<=0
- int yystksz; /* Current side of the stack */
- yyStackEntry *yystack; /* The parser's stack */
- yyStackEntry yystk0; /* First stack entry */
-#else
- yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */
- yyStackEntry *yystackEnd; /* Last entry in the stack */
-#endif
+ yyStackEntry *yystackEnd; /* Last entry in the stack */
+ yyStackEntry *yystack; /* The parser stack */
+ yyStackEntry yystk0[YYSTACKDEPTH]; /* Initial stack space */
};
typedef struct yyParser yyParser;
@@ -273,37 +287,45 @@ static const char *const yyRuleName[] = {
#endif /* NDEBUG */
-#if YYSTACKDEPTH<=0
+#if YYGROWABLESTACK
/*
** Try to increase the size of the parser stack. Return the number
** of errors. Return 0 on success.
*/
static int yyGrowStack(yyParser *p){
+ int oldSize = 1 + (int)(p->yystackEnd - p->yystack)/sizeof(p->yystack[0]);
int newSize;
int idx;
yyStackEntry *pNew;
- newSize = p->yystksz*2 + 100;
+ newSize = oldSize*2 + 100;
idx = p->yytos ? (int)(p->yytos - p->yystack) : 0;
- if( p->yystack==&p->yystk0 ){
- pNew = malloc(newSize*sizeof(pNew[0]));
- if( pNew ) pNew[0] = p->yystk0;
+ if( p->yystack==p->yystk0 ){
+ pNew = YYREALLOC(0, newSize*sizeof(pNew[0]));
+ if( pNew==0 ) return 1;
+ memcpy(pNew, p->yystk0, sizeof(p->yystk0));
}else{
- pNew = realloc(p->yystack, newSize*sizeof(pNew[0]));
+ pNew = YYREALLOC(p->yystack, newSize*sizeof(pNew[0]));
+ if( pNew==0 ) return 1;
}
- if( pNew ){
- p->yystack = pNew;
- p->yytos = &p->yystack[idx];
+ p->yystack = pNew;
+ p->yytos = &p->yystack[idx];
#ifndef NDEBUG
- if( yyTraceFILE ){
- fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n",
- yyTracePrompt, p->yystksz, newSize);
- }
-#endif
- p->yystksz = newSize;
+ if( yyTraceFILE ){
+ fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n",
+ yyTracePrompt, oldSize, newSize);
}
- return pNew==0;
+#endif
+ p->yystackEnd = &p->yystack[newSize-1];
+ return 0;
}
+#endif /* YYGROWABLESTACK */
+
+#if !YYGROWABLESTACK
+/* For builds that do no have a growable stack, yyGrowStack always
+** returns an error.
+*/
+# define yyGrowStack(X) 1
#endif
/* Datatype of the argument to the memory allocated passed as the
@@ -323,24 +345,14 @@ void ParseInit(void *yypRawParser ParseCTX_PDECL){
#ifdef YYTRACKMAXSTACKDEPTH
yypParser->yyhwm = 0;
#endif
-#if YYSTACKDEPTH<=0
- yypParser->yytos = NULL;
- yypParser->yystack = NULL;
- yypParser->yystksz = 0;
- if( yyGrowStack(yypParser) ){
- yypParser->yystack = &yypParser->yystk0;
- yypParser->yystksz = 1;
- }
-#endif
+ yypParser->yystack = yypParser->yystk0;
+ yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1];
#ifndef YYNOERRORRECOVERY
yypParser->yyerrcnt = -1;
#endif
yypParser->yytos = yypParser->yystack;
yypParser->yystack[0].stateno = 0;
yypParser->yystack[0].major = 0;
-#if YYSTACKDEPTH>0
- yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1];
-#endif
}
#ifndef Parse_ENGINEALWAYSONSTACK
@@ -427,8 +439,8 @@ static void yy_pop_parser_stack(yyParser *pParser){
void ParseFinalize(void *p){
yyParser *pParser = (yyParser*)p;
while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser);
-#if YYSTACKDEPTH<=0
- if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack);
+#if YYGROWABLESTACK
+ if( pParser->yystack!=pParser->yystk0 ) YYFREE(pParser->yystack);
#endif
}
@@ -654,25 +666,19 @@ static void yy_shift(
assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) );
}
#endif
-#if YYSTACKDEPTH>0
- if( yypParser->yytos>yypParser->yystackEnd ){
- yypParser->yytos--;
- yyStackOverflow(yypParser);
- return;
- }
-#else
- if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){
+ yytos = yypParser->yytos;
+ if( yytos>yypParser->yystackEnd ){
if( yyGrowStack(yypParser) ){
yypParser->yytos--;
yyStackOverflow(yypParser);
return;
}
+ yytos = yypParser->yytos;
+ assert( yytos <= yypParser->yystackEnd );
}
-#endif
if( yyNewState > YY_MAX_SHIFT ){
yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
}
- yytos = yypParser->yytos;
yytos->stateno = yyNewState;
yytos->major = yyMajor;
yytos->minor.yy0 = yyMinor;
@@ -911,19 +917,12 @@ void Parse(
(int)(yypParser->yytos - yypParser->yystack));
}
#endif
-#if YYSTACKDEPTH>0
if( yypParser->yytos>=yypParser->yystackEnd ){
- yyStackOverflow(yypParser);
- break;
- }
-#else
- if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
if( yyGrowStack(yypParser) ){
yyStackOverflow(yypParser);
break;
}
}
-#endif
}
yyact = yy_reduce(yypParser,yyruleno,yymajor,yyminor ParseCTX_PARAM);
}else if( yyact <= YY_MAX_SHIFTREDUCE ){
--
2.11.4.GIT