4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This module contains code that implements a parser for fts3 query strings
14 ** (the right-hand argument to the MATCH operator). Because the supported
15 ** syntax is relatively simple, the whole tokenizer/parser system is
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
22 ** By default, this module parses the legacy syntax that has been
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
24 ** is defined, then it uses the new syntax. The differences between
25 ** the new and the old syntaxes are:
27 ** a) The new syntax supports parenthesis. The old does not.
29 ** b) The new syntax supports the AND and NOT operators. The old does not.
31 ** c) The old syntax supports the "-" token qualifier. This is not
32 ** supported by the new syntax (it is replaced by the NOT operator).
34 ** d) When using the old syntax, the OR operator has a greater precedence
35 ** than an implicit AND. When using the new, both implicity and explicit
36 ** AND operators have a higher precedence than OR.
38 ** If compiled with SQLITE_TEST defined, then this module exports the
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
40 ** to zero causes the module to use the old syntax. If it is set to
41 ** non-zero the new syntax is activated. This is so both syntaxes can
42 ** be tested using a single build of testfixture.
44 ** The following describes the syntax supported by the fts3 MATCH
45 ** operator in a similar format to that used by the lemon parser
46 ** generator. This module does not use actually lemon, it uses a
49 ** query ::= andexpr (OR andexpr)*.
51 ** andexpr ::= notexpr (AND? notexpr)*.
53 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
54 ** notexpr ::= LP query RP.
56 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
59 ** distance_opt ::= / INTEGER.
62 ** phrase ::= COLUMN:TOKEN.
63 ** phrase ::= "TOKEN TOKEN TOKEN...".
67 int sqlite3_fts3_enable_parentheses
= 0;
69 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
70 # define sqlite3_fts3_enable_parentheses 1
72 # define sqlite3_fts3_enable_parentheses 0
77 ** Default span for NEAR operators.
79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
86 ** This variable is used by function getNextNode(). When getNextNode() is
87 ** called, it sets ParseContext.isNot to true if the 'next node' is a
88 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
89 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
92 typedef struct ParseContext ParseContext
;
94 sqlite3_tokenizer
*pTokenizer
; /* Tokenizer module */
95 int iLangid
; /* Language id used with tokenizer */
96 const char **azCol
; /* Array of column names for fts3 table */
97 int bFts4
; /* True to allow FTS4-only syntax */
98 int nCol
; /* Number of entries in azCol[] */
99 int iDefaultCol
; /* Default column to query */
100 int isNot
; /* True if getNextNode() sees a unary - */
101 sqlite3_context
*pCtx
; /* Write error message here */
102 int nNest
; /* Number of nested brackets */
106 ** This function is equivalent to the standard isspace() function.
108 ** The standard isspace() can be awkward to use safely, because although it
109 ** is defined to accept an argument of type int, its behavior when passed
110 ** an integer that falls outside of the range of the unsigned char type
111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper
112 ** is defined to accept an argument of type char, and always returns 0 for
113 ** any values that fall outside of the range of the unsigned char type (i.e.
116 static int fts3isspace(char c
){
117 return c
==' ' || c
=='\t' || c
=='\n' || c
=='\r' || c
=='\v' || c
=='\f';
121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
122 ** zero the memory before returning a pointer to it. If unsuccessful,
125 void *sqlite3Fts3MallocZero(sqlite3_int64 nByte
){
126 void *pRet
= sqlite3_malloc64(nByte
);
127 if( pRet
) memset(pRet
, 0, nByte
);
131 int sqlite3Fts3OpenTokenizer(
132 sqlite3_tokenizer
*pTokenizer
,
136 sqlite3_tokenizer_cursor
**ppCsr
138 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
139 sqlite3_tokenizer_cursor
*pCsr
= 0;
142 rc
= pModule
->xOpen(pTokenizer
, z
, n
, &pCsr
);
143 assert( rc
==SQLITE_OK
|| pCsr
==0 );
145 pCsr
->pTokenizer
= pTokenizer
;
146 if( pModule
->iVersion
>=1 ){
147 rc
= pModule
->xLanguageid(pCsr
, iLangid
);
149 pModule
->xClose(pCsr
);
159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself
160 ** call fts3ExprParse(). So this forward declaration is required.
162 static int fts3ExprParse(ParseContext
*, const char *, int, Fts3Expr
**, int *);
165 ** Extract the next token from buffer z (length n) using the tokenizer
166 ** and other information (column names etc.) in pParse. Create an Fts3Expr
167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
168 ** single token and set *ppExpr to point to it. If the end of the buffer is
169 ** reached before a token is found, set *ppExpr to zero. It is the
170 ** responsibility of the caller to eventually deallocate the allocated
171 ** Fts3Expr structure (if any) by passing it to sqlite3_free().
173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
176 static int getNextToken(
177 ParseContext
*pParse
, /* fts3 query parse context */
178 int iCol
, /* Value for Fts3Phrase.iColumn */
179 const char *z
, int n
, /* Input string */
180 Fts3Expr
**ppExpr
, /* OUT: expression */
181 int *pnConsumed
/* OUT: Number of bytes consumed */
183 sqlite3_tokenizer
*pTokenizer
= pParse
->pTokenizer
;
184 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
186 sqlite3_tokenizer_cursor
*pCursor
;
190 /* Set variable i to the maximum number of bytes of input to tokenize. */
192 if( sqlite3_fts3_enable_parentheses
&& (z
[i
]=='(' || z
[i
]==')') ) break;
193 if( z
[i
]=='"' ) break;
197 rc
= sqlite3Fts3OpenTokenizer(pTokenizer
, pParse
->iLangid
, z
, i
, &pCursor
);
200 int nToken
= 0, iStart
= 0, iEnd
= 0, iPosition
= 0;
201 sqlite3_int64 nByte
; /* total space to allocate */
203 rc
= pModule
->xNext(pCursor
, &zToken
, &nToken
, &iStart
, &iEnd
, &iPosition
);
205 nByte
= sizeof(Fts3Expr
) + sizeof(Fts3Phrase
) + nToken
;
206 pRet
= (Fts3Expr
*)sqlite3Fts3MallocZero(nByte
);
210 pRet
->eType
= FTSQUERY_PHRASE
;
211 pRet
->pPhrase
= (Fts3Phrase
*)&pRet
[1];
212 pRet
->pPhrase
->nToken
= 1;
213 pRet
->pPhrase
->iColumn
= iCol
;
214 pRet
->pPhrase
->aToken
[0].n
= nToken
;
215 pRet
->pPhrase
->aToken
[0].z
= (char *)&pRet
->pPhrase
[1];
216 memcpy(pRet
->pPhrase
->aToken
[0].z
, zToken
, nToken
);
218 if( iEnd
<n
&& z
[iEnd
]=='*' ){
219 pRet
->pPhrase
->aToken
[0].isPrefix
= 1;
224 if( !sqlite3_fts3_enable_parentheses
225 && iStart
>0 && z
[iStart
-1]=='-'
229 }else if( pParse
->bFts4
&& iStart
>0 && z
[iStart
-1]=='^' ){
230 pRet
->pPhrase
->aToken
[0].bFirst
= 1;
239 }else if( i
&& rc
==SQLITE_DONE
){
243 pModule
->xClose(pCursor
);
252 ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
253 ** then free the old allocation.
255 static void *fts3ReallocOrFree(void *pOrig
, sqlite3_int64 nNew
){
256 void *pRet
= sqlite3_realloc64(pOrig
, nNew
);
264 ** Buffer zInput, length nInput, contains the contents of a quoted string
265 ** that appeared as part of an fts3 query expression. Neither quote character
266 ** is included in the buffer. This function attempts to tokenize the entire
267 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
268 ** containing the results.
270 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
271 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
272 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
275 static int getNextString(
276 ParseContext
*pParse
, /* fts3 query parse context */
277 const char *zInput
, int nInput
, /* Input string */
278 Fts3Expr
**ppExpr
/* OUT: expression */
280 sqlite3_tokenizer
*pTokenizer
= pParse
->pTokenizer
;
281 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
284 sqlite3_tokenizer_cursor
*pCursor
= 0;
288 const int nSpace
= sizeof(Fts3Expr
) + sizeof(Fts3Phrase
);
291 /* The final Fts3Expr data structure, including the Fts3Phrase,
292 ** Fts3PhraseToken structures token buffers are all stored as a single
293 ** allocation so that the expression can be freed with a single call to
294 ** sqlite3_free(). Setting this up requires a two pass approach.
296 ** The first pass, in the block below, uses a tokenizer cursor to iterate
297 ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
298 ** to assemble data in two dynamic buffers:
300 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
301 ** structure, followed by the array of Fts3PhraseToken
302 ** structures. This pass only populates the Fts3PhraseToken array.
304 ** Buffer zTemp: Contains copies of all tokens.
306 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
307 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
310 rc
= sqlite3Fts3OpenTokenizer(
311 pTokenizer
, pParse
->iLangid
, zInput
, nInput
, &pCursor
);
314 for(ii
=0; rc
==SQLITE_OK
; ii
++){
316 int nByte
= 0, iBegin
= 0, iEnd
= 0, iPos
= 0;
317 rc
= pModule
->xNext(pCursor
, &zByte
, &nByte
, &iBegin
, &iEnd
, &iPos
);
319 Fts3PhraseToken
*pToken
;
321 p
= fts3ReallocOrFree(p
, nSpace
+ ii
*sizeof(Fts3PhraseToken
));
322 if( !p
) goto no_mem
;
324 zTemp
= fts3ReallocOrFree(zTemp
, nTemp
+ nByte
);
325 if( !zTemp
) goto no_mem
;
327 assert( nToken
==ii
);
328 pToken
= &((Fts3Phrase
*)(&p
[1]))->aToken
[ii
];
329 memset(pToken
, 0, sizeof(Fts3PhraseToken
));
331 memcpy(&zTemp
[nTemp
], zByte
, nByte
);
335 pToken
->isPrefix
= (iEnd
<nInput
&& zInput
[iEnd
]=='*');
336 pToken
->bFirst
= (iBegin
>0 && zInput
[iBegin
-1]=='^');
341 pModule
->xClose(pCursor
);
345 if( rc
==SQLITE_DONE
){
349 p
= fts3ReallocOrFree(p
, nSpace
+ nToken
*sizeof(Fts3PhraseToken
) + nTemp
);
350 if( !p
) goto no_mem
;
351 memset(p
, 0, (char *)&(((Fts3Phrase
*)&p
[1])->aToken
[0])-(char *)p
);
352 p
->eType
= FTSQUERY_PHRASE
;
353 p
->pPhrase
= (Fts3Phrase
*)&p
[1];
354 p
->pPhrase
->iColumn
= pParse
->iDefaultCol
;
355 p
->pPhrase
->nToken
= nToken
;
357 zBuf
= (char *)&p
->pPhrase
->aToken
[nToken
];
359 memcpy(zBuf
, zTemp
, nTemp
);
365 for(jj
=0; jj
<p
->pPhrase
->nToken
; jj
++){
366 p
->pPhrase
->aToken
[jj
].z
= zBuf
;
367 zBuf
+= p
->pPhrase
->aToken
[jj
].n
;
377 pModule
->xClose(pCursor
);
386 ** The output variable *ppExpr is populated with an allocated Fts3Expr
387 ** structure, or set to 0 if the end of the input buffer is reached.
389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
391 ** If SQLITE_ERROR is returned, pContext is populated with an error message.
393 static int getNextNode(
394 ParseContext
*pParse
, /* fts3 query parse context */
395 const char *z
, int n
, /* Input string */
396 Fts3Expr
**ppExpr
, /* OUT: expression */
397 int *pnConsumed
/* OUT: Number of bytes consumed */
399 static const struct Fts3Keyword
{
400 char *z
; /* Keyword text */
401 unsigned char n
; /* Length of the keyword */
402 unsigned char parenOnly
; /* Only valid in paren mode */
403 unsigned char eType
; /* Keyword code */
405 { "OR" , 2, 0, FTSQUERY_OR
},
406 { "AND", 3, 1, FTSQUERY_AND
},
407 { "NOT", 3, 1, FTSQUERY_NOT
},
408 { "NEAR", 4, 0, FTSQUERY_NEAR
}
416 const char *zInput
= z
;
421 /* Skip over any whitespace before checking for a keyword, an open or
422 ** close bracket, or a quoted string.
424 while( nInput
>0 && fts3isspace(*zInput
) ){
432 /* See if we are dealing with a keyword. */
433 for(ii
=0; ii
<(int)(sizeof(aKeyword
)/sizeof(struct Fts3Keyword
)); ii
++){
434 const struct Fts3Keyword
*pKey
= &aKeyword
[ii
];
436 if( (pKey
->parenOnly
& ~sqlite3_fts3_enable_parentheses
)!=0 ){
440 if( nInput
>=pKey
->n
&& 0==memcmp(zInput
, pKey
->z
, pKey
->n
) ){
441 int nNear
= SQLITE_FTS3_DEFAULT_NEAR_PARAM
;
445 /* If this is a "NEAR" keyword, check for an explicit nearness. */
446 if( pKey
->eType
==FTSQUERY_NEAR
){
448 if( zInput
[4]=='/' && zInput
[5]>='0' && zInput
[5]<='9' ){
449 nKey
+= 1+sqlite3Fts3ReadInt(&zInput
[nKey
+1], &nNear
);
453 /* At this point this is probably a keyword. But for that to be true,
454 ** the next byte must contain either whitespace, an open or close
455 ** parenthesis, a quote character, or EOF.
457 cNext
= zInput
[nKey
];
458 if( fts3isspace(cNext
)
459 || cNext
=='"' || cNext
=='(' || cNext
==')' || cNext
==0
461 pRet
= (Fts3Expr
*)sqlite3Fts3MallocZero(sizeof(Fts3Expr
));
465 pRet
->eType
= pKey
->eType
;
468 *pnConsumed
= (int)((zInput
- z
) + nKey
);
472 /* Turns out that wasn't a keyword after all. This happens if the
473 ** user has supplied a token such as "ORacle". Continue.
478 /* See if we are dealing with a quoted phrase. If this is the case, then
479 ** search for the closing quote and pass the whole string to getNextString()
480 ** for processing. This is easy to do, as fts3 has no syntax for escaping
481 ** a quote character embedded in a string.
484 for(ii
=1; ii
<nInput
&& zInput
[ii
]!='"'; ii
++);
485 *pnConsumed
= (int)((zInput
- z
) + ii
+ 1);
489 return getNextString(pParse
, &zInput
[1], ii
-1, ppExpr
);
492 if( sqlite3_fts3_enable_parentheses
){
496 #if !defined(SQLITE_MAX_EXPR_DEPTH)
497 if( pParse
->nNest
>1000 ) return SQLITE_ERROR
;
498 #elif SQLITE_MAX_EXPR_DEPTH>0
499 if( pParse
->nNest
>SQLITE_MAX_EXPR_DEPTH
) return SQLITE_ERROR
;
501 rc
= fts3ExprParse(pParse
, zInput
+1, nInput
-1, ppExpr
, &nConsumed
);
502 *pnConsumed
= (int)(zInput
- z
) + 1 + nConsumed
;
504 }else if( *zInput
==')' ){
506 *pnConsumed
= (int)((zInput
- z
) + 1);
512 /* If control flows to this point, this must be a regular token, or
513 ** the end of the input. Read a regular token using the sqlite3_tokenizer
514 ** interface. Before doing so, figure out if there is an explicit
515 ** column specifier for the token.
517 ** TODO: Strangely, it is not possible to associate a column specifier
518 ** with a quoted phrase, only with a single token. Not sure if this was
519 ** an implementation artifact or an intentional decision when fts3 was
520 ** first implemented. Whichever it was, this module duplicates the
523 iCol
= pParse
->iDefaultCol
;
525 for(ii
=0; ii
<pParse
->nCol
; ii
++){
526 const char *zStr
= pParse
->azCol
[ii
];
527 int nStr
= (int)strlen(zStr
);
528 if( nInput
>nStr
&& zInput
[nStr
]==':'
529 && sqlite3_strnicmp(zStr
, zInput
, nStr
)==0
532 iColLen
= (int)((zInput
- z
) + nStr
+ 1);
536 rc
= getNextToken(pParse
, iCol
, &z
[iColLen
], n
-iColLen
, ppExpr
, pnConsumed
);
537 *pnConsumed
+= iColLen
;
542 ** The argument is an Fts3Expr structure for a binary operator (any type
543 ** except an FTSQUERY_PHRASE). Return an integer value representing the
544 ** precedence of the operator. Lower values have a higher precedence (i.e.
545 ** group more tightly). For example, in the C language, the == operator
546 ** groups more tightly than ||, and would therefore have a higher precedence.
548 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
549 ** is defined), the order of the operators in precedence from highest to
554 ** AND (including implicit ANDs)
557 ** Note that when using the old query syntax, the OR operator has a higher
558 ** precedence than the AND operator.
560 static int opPrecedence(Fts3Expr
*p
){
561 assert( p
->eType
!=FTSQUERY_PHRASE
);
562 if( sqlite3_fts3_enable_parentheses
){
564 }else if( p
->eType
==FTSQUERY_NEAR
){
566 }else if( p
->eType
==FTSQUERY_OR
){
569 assert( p
->eType
==FTSQUERY_AND
);
574 ** Argument ppHead contains a pointer to the current head of a query
575 ** expression tree being parsed. pPrev is the expression node most recently
576 ** inserted into the tree. This function adds pNew, which is always a binary
577 ** operator node, into the expression tree based on the relative precedence
578 ** of pNew and the existing nodes of the tree. This may result in the head
579 ** of the tree changing, in which case *ppHead is set to the new root node.
581 static void insertBinaryOperator(
582 Fts3Expr
**ppHead
, /* Pointer to the root node of a tree */
583 Fts3Expr
*pPrev
, /* Node most recently inserted into the tree */
584 Fts3Expr
*pNew
/* New binary node to insert into expression tree */
586 Fts3Expr
*pSplit
= pPrev
;
587 while( pSplit
->pParent
&& opPrecedence(pSplit
->pParent
)<=opPrecedence(pNew
) ){
588 pSplit
= pSplit
->pParent
;
591 if( pSplit
->pParent
){
592 assert( pSplit
->pParent
->pRight
==pSplit
);
593 pSplit
->pParent
->pRight
= pNew
;
594 pNew
->pParent
= pSplit
->pParent
;
598 pNew
->pLeft
= pSplit
;
599 pSplit
->pParent
= pNew
;
603 ** Parse the fts3 query expression found in buffer z, length n. This function
604 ** returns either when the end of the buffer is reached or an unmatched
605 ** closing bracket - ')' - is encountered.
607 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
608 ** parsed form of the expression and *pnConsumed is set to the number of
609 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
610 ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
612 static int fts3ExprParse(
613 ParseContext
*pParse
, /* fts3 query parse context */
614 const char *z
, int n
, /* Text of MATCH query */
615 Fts3Expr
**ppExpr
, /* OUT: Parsed query structure */
616 int *pnConsumed
/* OUT: Number of bytes consumed */
620 Fts3Expr
*pNotBranch
= 0; /* Only used in legacy parse mode */
624 int isRequirePhrase
= 1;
626 while( rc
==SQLITE_OK
){
630 rc
= getNextNode(pParse
, zIn
, nIn
, &p
, &nByte
);
631 assert( nByte
>0 || (rc
!=SQLITE_OK
&& p
==0) );
636 if( !sqlite3_fts3_enable_parentheses
637 && p
->eType
==FTSQUERY_PHRASE
&& pParse
->isNot
639 /* Create an implicit NOT operator. */
640 Fts3Expr
*pNot
= sqlite3Fts3MallocZero(sizeof(Fts3Expr
));
642 sqlite3Fts3ExprFree(p
);
646 pNot
->eType
= FTSQUERY_NOT
;
650 pNot
->pLeft
= pNotBranch
;
651 pNotBranch
->pParent
= pNot
;
656 int eType
= p
->eType
;
657 isPhrase
= (eType
==FTSQUERY_PHRASE
|| p
->pLeft
);
659 /* The isRequirePhrase variable is set to true if a phrase or
660 ** an expression contained in parenthesis is required. If a
661 ** binary operator (AND, OR, NOT or NEAR) is encounted when
662 ** isRequirePhrase is set, this is a syntax error.
664 if( !isPhrase
&& isRequirePhrase
){
665 sqlite3Fts3ExprFree(p
);
670 if( isPhrase
&& !isRequirePhrase
){
671 /* Insert an implicit AND operator. */
673 assert( pRet
&& pPrev
);
674 pAnd
= sqlite3Fts3MallocZero(sizeof(Fts3Expr
));
676 sqlite3Fts3ExprFree(p
);
680 pAnd
->eType
= FTSQUERY_AND
;
681 insertBinaryOperator(&pRet
, pPrev
, pAnd
);
685 /* This test catches attempts to make either operand of a NEAR
686 ** operator something other than a phrase. For example, either of
689 ** (bracketed expression) NEAR phrase
690 ** phrase NEAR (bracketed expression)
692 ** Return an error in either case.
695 (eType
==FTSQUERY_NEAR
&& !isPhrase
&& pPrev
->eType
!=FTSQUERY_PHRASE
)
696 || (eType
!=FTSQUERY_PHRASE
&& isPhrase
&& pPrev
->eType
==FTSQUERY_NEAR
)
698 sqlite3Fts3ExprFree(p
);
705 assert( pPrev
&& pPrev
->pLeft
&& pPrev
->pRight
==0 );
712 insertBinaryOperator(&pRet
, pPrev
, p
);
714 isRequirePhrase
= !isPhrase
;
720 assert( rc
!=SQLITE_OK
|| (nByte
>0 && nByte
<=nIn
) );
725 if( rc
==SQLITE_DONE
&& pRet
&& isRequirePhrase
){
729 if( rc
==SQLITE_DONE
){
731 if( !sqlite3_fts3_enable_parentheses
&& pNotBranch
){
735 Fts3Expr
*pIter
= pNotBranch
;
736 while( pIter
->pLeft
){
737 pIter
= pIter
->pLeft
;
740 pRet
->pParent
= pIter
;
745 *pnConsumed
= n
- nIn
;
749 sqlite3Fts3ExprFree(pRet
);
750 sqlite3Fts3ExprFree(pNotBranch
);
758 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed
759 ** as the only argument is more than nMaxDepth.
761 static int fts3ExprCheckDepth(Fts3Expr
*p
, int nMaxDepth
){
767 rc
= fts3ExprCheckDepth(p
->pLeft
, nMaxDepth
-1);
769 rc
= fts3ExprCheckDepth(p
->pRight
, nMaxDepth
-1);
777 ** This function attempts to transform the expression tree at (*pp) to
778 ** an equivalent but more balanced form. The tree is modified in place.
779 ** If successful, SQLITE_OK is returned and (*pp) set to point to the
780 ** new root expression node.
782 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
784 ** Otherwise, if an error occurs, an SQLite error code is returned and
785 ** expression (*pp) freed.
787 static int fts3ExprBalance(Fts3Expr
**pp
, int nMaxDepth
){
788 int rc
= SQLITE_OK
; /* Return code */
789 Fts3Expr
*pRoot
= *pp
; /* Initial root node */
790 Fts3Expr
*pFree
= 0; /* List of free nodes. Linked by pParent. */
791 int eType
= pRoot
->eType
; /* Type of node in this tree */
798 if( (eType
==FTSQUERY_AND
|| eType
==FTSQUERY_OR
) ){
800 apLeaf
= (Fts3Expr
**)sqlite3_malloc64(sizeof(Fts3Expr
*) * nMaxDepth
);
804 memset(apLeaf
, 0, sizeof(Fts3Expr
*) * nMaxDepth
);
811 /* Set $p to point to the left-most leaf in the tree of eType nodes. */
812 for(p
=pRoot
; p
->eType
==eType
; p
=p
->pLeft
){
813 assert( p
->pParent
==0 || p
->pParent
->pLeft
==p
);
814 assert( p
->pLeft
&& p
->pRight
);
817 /* This loop runs once for each leaf in the tree of eType nodes. */
820 Fts3Expr
*pParent
= p
->pParent
; /* Current parent of p */
822 assert( pParent
==0 || pParent
->pLeft
==p
);
829 rc
= fts3ExprBalance(&p
, nMaxDepth
-1);
830 if( rc
!=SQLITE_OK
) break;
832 for(iLvl
=0; p
&& iLvl
<nMaxDepth
; iLvl
++){
833 if( apLeaf
[iLvl
]==0 ){
838 pFree
->pLeft
= apLeaf
[iLvl
];
840 pFree
->pLeft
->pParent
= pFree
;
841 pFree
->pRight
->pParent
= pFree
;
844 pFree
= pFree
->pParent
;
850 sqlite3Fts3ExprFree(p
);
855 /* If that was the last leaf node, break out of the loop */
856 if( pParent
==0 ) break;
858 /* Set $p to point to the next leaf in the tree of eType nodes */
859 for(p
=pParent
->pRight
; p
->eType
==eType
; p
=p
->pLeft
);
861 /* Remove pParent from the original tree. */
862 assert( pParent
->pParent
==0 || pParent
->pParent
->pLeft
==pParent
);
863 pParent
->pRight
->pParent
= pParent
->pParent
;
864 if( pParent
->pParent
){
865 pParent
->pParent
->pLeft
= pParent
->pRight
;
867 assert( pParent
==pRoot
);
868 pRoot
= pParent
->pRight
;
871 /* Link pParent into the free node list. It will be used as an
872 ** internal node of the new tree. */
873 pParent
->pParent
= pFree
;
879 for(i
=0; i
<nMaxDepth
; i
++){
887 pFree
->pLeft
= apLeaf
[i
];
888 pFree
->pLeft
->pParent
= pFree
;
889 pFree
->pRight
->pParent
= pFree
;
892 pFree
= pFree
->pParent
;
899 /* An error occurred. Delete the contents of the apLeaf[] array
900 ** and pFree list. Everything else is cleaned up by the call to
901 ** sqlite3Fts3ExprFree(pRoot) below. */
903 for(i
=0; i
<nMaxDepth
; i
++){
904 sqlite3Fts3ExprFree(apLeaf
[i
]);
906 while( (pDel
=pFree
)!=0 ){
907 pFree
= pDel
->pParent
;
913 sqlite3_free( apLeaf
);
915 }else if( eType
==FTSQUERY_NOT
){
916 Fts3Expr
*pLeft
= pRoot
->pLeft
;
917 Fts3Expr
*pRight
= pRoot
->pRight
;
924 rc
= fts3ExprBalance(&pLeft
, nMaxDepth
-1);
926 rc
= fts3ExprBalance(&pRight
, nMaxDepth
-1);
930 sqlite3Fts3ExprFree(pRight
);
931 sqlite3Fts3ExprFree(pLeft
);
933 assert( pLeft
&& pRight
);
934 pRoot
->pLeft
= pLeft
;
935 pLeft
->pParent
= pRoot
;
936 pRoot
->pRight
= pRight
;
937 pRight
->pParent
= pRoot
;
943 sqlite3Fts3ExprFree(pRoot
);
951 ** This function is similar to sqlite3Fts3ExprParse(), with the following
954 ** 1. It does not do expression rebalancing.
955 ** 2. It does not check that the expression does not exceed the
956 ** maximum allowable depth.
957 ** 3. Even if it fails, *ppExpr may still be set to point to an
958 ** expression tree. It should be deleted using sqlite3Fts3ExprFree()
961 static int fts3ExprParseUnbalanced(
962 sqlite3_tokenizer
*pTokenizer
, /* Tokenizer module */
963 int iLangid
, /* Language id for tokenizer */
964 char **azCol
, /* Array of column names for fts3 table */
965 int bFts4
, /* True to allow FTS4-only syntax */
966 int nCol
, /* Number of entries in azCol[] */
967 int iDefaultCol
, /* Default column to query */
968 const char *z
, int n
, /* Text of MATCH query */
969 Fts3Expr
**ppExpr
/* OUT: Parsed query structure */
975 memset(&sParse
, 0, sizeof(ParseContext
));
976 sParse
.pTokenizer
= pTokenizer
;
977 sParse
.iLangid
= iLangid
;
978 sParse
.azCol
= (const char **)azCol
;
980 sParse
.iDefaultCol
= iDefaultCol
;
981 sParse
.bFts4
= bFts4
;
989 rc
= fts3ExprParse(&sParse
, z
, n
, ppExpr
, &nParsed
);
990 assert( rc
==SQLITE_OK
|| *ppExpr
==0 );
992 /* Check for mismatched parenthesis */
993 if( rc
==SQLITE_OK
&& sParse
.nNest
){
1001 ** Parameters z and n contain a pointer to and length of a buffer containing
1002 ** an fts3 query expression, respectively. This function attempts to parse the
1003 ** query expression and create a tree of Fts3Expr structures representing the
1004 ** parsed expression. If successful, *ppExpr is set to point to the head
1005 ** of the parsed expression tree and SQLITE_OK is returned. If an error
1006 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
1007 ** error) is returned and *ppExpr is set to 0.
1009 ** If parameter n is a negative number, then z is assumed to point to a
1010 ** nul-terminated string and the length is determined using strlen().
1012 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
1013 ** use to normalize query tokens while parsing the expression. The azCol[]
1014 ** array, which is assumed to contain nCol entries, should contain the names
1015 ** of each column in the target fts3 table, in order from left to right.
1016 ** Column names must be nul-terminated strings.
1018 ** The iDefaultCol parameter should be passed the index of the table column
1019 ** that appears on the left-hand-side of the MATCH operator (the default
1020 ** column to match against for tokens for which a column name is not explicitly
1021 ** specified as part of the query string), or -1 if tokens may by default
1022 ** match any table column.
1024 int sqlite3Fts3ExprParse(
1025 sqlite3_tokenizer
*pTokenizer
, /* Tokenizer module */
1026 int iLangid
, /* Language id for tokenizer */
1027 char **azCol
, /* Array of column names for fts3 table */
1028 int bFts4
, /* True to allow FTS4-only syntax */
1029 int nCol
, /* Number of entries in azCol[] */
1030 int iDefaultCol
, /* Default column to query */
1031 const char *z
, int n
, /* Text of MATCH query */
1032 Fts3Expr
**ppExpr
, /* OUT: Parsed query structure */
1033 char **pzErr
/* OUT: Error message (sqlite3_malloc) */
1035 int rc
= fts3ExprParseUnbalanced(
1036 pTokenizer
, iLangid
, azCol
, bFts4
, nCol
, iDefaultCol
, z
, n
, ppExpr
1039 /* Rebalance the expression. And check that its depth does not exceed
1040 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */
1041 if( rc
==SQLITE_OK
&& *ppExpr
){
1042 rc
= fts3ExprBalance(ppExpr
, SQLITE_FTS3_MAX_EXPR_DEPTH
);
1043 if( rc
==SQLITE_OK
){
1044 rc
= fts3ExprCheckDepth(*ppExpr
, SQLITE_FTS3_MAX_EXPR_DEPTH
);
1048 if( rc
!=SQLITE_OK
){
1049 sqlite3Fts3ExprFree(*ppExpr
);
1051 if( rc
==SQLITE_TOOBIG
){
1052 sqlite3Fts3ErrMsg(pzErr
,
1053 "FTS expression tree is too large (maximum depth %d)",
1054 SQLITE_FTS3_MAX_EXPR_DEPTH
1057 }else if( rc
==SQLITE_ERROR
){
1058 sqlite3Fts3ErrMsg(pzErr
, "malformed MATCH expression: [%s]", z
);
1066 ** Free a single node of an expression tree.
1068 static void fts3FreeExprNode(Fts3Expr
*p
){
1069 assert( p
->eType
==FTSQUERY_PHRASE
|| p
->pPhrase
==0 );
1070 sqlite3Fts3EvalPhraseCleanup(p
->pPhrase
);
1071 sqlite3_free(p
->aMI
);
1076 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
1078 ** This function would be simpler if it recursively called itself. But
1079 ** that would mean passing a sufficiently large expression to ExprParse()
1080 ** could cause a stack overflow.
1082 void sqlite3Fts3ExprFree(Fts3Expr
*pDel
){
1084 assert( pDel
==0 || pDel
->pParent
==0 );
1085 for(p
=pDel
; p
&& (p
->pLeft
||p
->pRight
); p
=(p
->pLeft
? p
->pLeft
: p
->pRight
)){
1086 assert( p
->pParent
==0 || p
==p
->pParent
->pRight
|| p
==p
->pParent
->pLeft
);
1089 Fts3Expr
*pParent
= p
->pParent
;
1090 fts3FreeExprNode(p
);
1091 if( pParent
&& p
==pParent
->pLeft
&& pParent
->pRight
){
1092 p
= pParent
->pRight
;
1093 while( p
&& (p
->pLeft
|| p
->pRight
) ){
1094 assert( p
==p
->pParent
->pRight
|| p
==p
->pParent
->pLeft
);
1095 p
= (p
->pLeft
? p
->pLeft
: p
->pRight
);
1103 /****************************************************************************
1104 *****************************************************************************
1105 ** Everything after this point is just test code.
1113 ** Return a pointer to a buffer containing a text representation of the
1114 ** expression passed as the first argument. The buffer is obtained from
1115 ** sqlite3_malloc(). It is the responsibility of the caller to use
1116 ** sqlite3_free() to release the memory. If an OOM condition is encountered,
1117 ** NULL is returned.
1119 ** If the second argument is not NULL, then its contents are prepended to
1120 ** the returned expression text and then freed using sqlite3_free().
1122 static char *exprToString(Fts3Expr
*pExpr
, char *zBuf
){
1124 return sqlite3_mprintf("");
1126 switch( pExpr
->eType
){
1127 case FTSQUERY_PHRASE
: {
1128 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
1130 zBuf
= sqlite3_mprintf(
1131 "%zPHRASE %d 0", zBuf
, pPhrase
->iColumn
);
1132 for(i
=0; zBuf
&& i
<pPhrase
->nToken
; i
++){
1133 zBuf
= sqlite3_mprintf("%z %.*s%s", zBuf
,
1134 pPhrase
->aToken
[i
].n
, pPhrase
->aToken
[i
].z
,
1135 (pPhrase
->aToken
[i
].isPrefix
?"+":"")
1142 zBuf
= sqlite3_mprintf("%zNEAR/%d ", zBuf
, pExpr
->nNear
);
1145 zBuf
= sqlite3_mprintf("%zNOT ", zBuf
);
1148 zBuf
= sqlite3_mprintf("%zAND ", zBuf
);
1151 zBuf
= sqlite3_mprintf("%zOR ", zBuf
);
1155 if( zBuf
) zBuf
= sqlite3_mprintf("%z{", zBuf
);
1156 if( zBuf
) zBuf
= exprToString(pExpr
->pLeft
, zBuf
);
1157 if( zBuf
) zBuf
= sqlite3_mprintf("%z} {", zBuf
);
1159 if( zBuf
) zBuf
= exprToString(pExpr
->pRight
, zBuf
);
1160 if( zBuf
) zBuf
= sqlite3_mprintf("%z}", zBuf
);
1166 ** This is the implementation of a scalar SQL function used to test the
1167 ** expression parser. It should be called as follows:
1169 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
1171 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
1172 ** to parse the query expression (see README.tokenizers). The second argument
1173 ** is the query expression to parse. Each subsequent argument is the name
1174 ** of a column of the fts3 table that the query expression may refer to.
1177 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
1179 static void fts3ExprTestCommon(
1181 sqlite3_context
*context
,
1183 sqlite3_value
**argv
1185 sqlite3_tokenizer
*pTokenizer
= 0;
1194 Fts3Hash
*pHash
= (Fts3Hash
*)sqlite3_user_data(context
);
1195 const char *zTokenizer
= 0;
1199 sqlite3_result_error(context
,
1200 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
1205 zTokenizer
= (const char*)sqlite3_value_text(argv
[0]);
1206 rc
= sqlite3Fts3InitTokenizer(pHash
, zTokenizer
, &pTokenizer
, &zErr
);
1207 if( rc
!=SQLITE_OK
){
1208 if( rc
==SQLITE_NOMEM
){
1209 sqlite3_result_error_nomem(context
);
1211 sqlite3_result_error(context
, zErr
, -1);
1217 zExpr
= (const char *)sqlite3_value_text(argv
[1]);
1218 nExpr
= sqlite3_value_bytes(argv
[1]);
1220 azCol
= (char **)sqlite3_malloc64(nCol
*sizeof(char *));
1222 sqlite3_result_error_nomem(context
);
1225 for(ii
=0; ii
<nCol
; ii
++){
1226 azCol
[ii
] = (char *)sqlite3_value_text(argv
[ii
+2]);
1231 rc
= sqlite3Fts3ExprParse(
1232 pTokenizer
, 0, azCol
, 0, nCol
, nCol
, zExpr
, nExpr
, &pExpr
, &zDummy
1234 assert( rc
==SQLITE_OK
|| pExpr
==0 );
1235 sqlite3_free(zDummy
);
1237 rc
= fts3ExprParseUnbalanced(
1238 pTokenizer
, 0, azCol
, 0, nCol
, nCol
, zExpr
, nExpr
, &pExpr
1242 if( rc
!=SQLITE_OK
&& rc
!=SQLITE_NOMEM
){
1243 sqlite3Fts3ExprFree(pExpr
);
1244 sqlite3_result_error(context
, "Error parsing expression", -1);
1245 }else if( rc
==SQLITE_NOMEM
|| !(zBuf
= exprToString(pExpr
, 0)) ){
1246 sqlite3_result_error_nomem(context
);
1248 sqlite3_result_text(context
, zBuf
, -1, SQLITE_TRANSIENT
);
1252 sqlite3Fts3ExprFree(pExpr
);
1256 rc
= pTokenizer
->pModule
->xDestroy(pTokenizer
);
1258 sqlite3_free(azCol
);
1261 static void fts3ExprTest(
1262 sqlite3_context
*context
,
1264 sqlite3_value
**argv
1266 fts3ExprTestCommon(0, context
, argc
, argv
);
1268 static void fts3ExprTestRebalance(
1269 sqlite3_context
*context
,
1271 sqlite3_value
**argv
1273 fts3ExprTestCommon(1, context
, argc
, argv
);
1277 ** Register the query expression parser test function fts3_exprtest()
1278 ** with database connection db.
1280 int sqlite3Fts3ExprInitTestInterface(sqlite3
*db
, Fts3Hash
*pHash
){
1281 int rc
= sqlite3_create_function(
1282 db
, "fts3_exprtest", -1, SQLITE_UTF8
, (void*)pHash
, fts3ExprTest
, 0, 0
1284 if( rc
==SQLITE_OK
){
1285 rc
= sqlite3_create_function(db
, "fts3_exprtest_rebalance",
1286 -1, SQLITE_UTF8
, (void*)pHash
, fts3ExprTestRebalance
, 0, 0
1293 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */