4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
21 ** Characters that may appear in the second argument to matchinfo().
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
30 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
31 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
34 ** The default value for the second argument to matchinfo().
36 #define FTS3_MATCHINFO_DEFAULT "pcx"
40 ** Used as an fts3ExprIterate() context when loading phrase doclists to
41 ** Fts3Expr.aDoclist[]/nDoclist.
43 typedef struct LoadDoclistCtx LoadDoclistCtx
;
44 struct LoadDoclistCtx
{
45 Fts3Cursor
*pCsr
; /* FTS3 Cursor */
46 int nPhrase
; /* Number of phrases seen so far */
47 int nToken
; /* Number of tokens seen so far */
51 ** The following types are used as part of the implementation of the
52 ** fts3BestSnippet() routine.
54 typedef struct SnippetIter SnippetIter
;
55 typedef struct SnippetPhrase SnippetPhrase
;
56 typedef struct SnippetFragment SnippetFragment
;
59 Fts3Cursor
*pCsr
; /* Cursor snippet is being generated from */
60 int iCol
; /* Extract snippet from this column */
61 int nSnippet
; /* Requested snippet length (in tokens) */
62 int nPhrase
; /* Number of phrases in query */
63 SnippetPhrase
*aPhrase
; /* Array of size nPhrase */
64 int iCurrent
; /* First token of current snippet */
67 struct SnippetPhrase
{
68 int nToken
; /* Number of tokens in phrase */
69 char *pList
; /* Pointer to start of phrase position list */
70 int iHead
; /* Next value in position list */
71 char *pHead
; /* Position list data following iHead */
72 int iTail
; /* Next value in trailing position list */
73 char *pTail
; /* Position list data following iTail */
76 struct SnippetFragment
{
77 int iCol
; /* Column snippet is extracted from */
78 int iPos
; /* Index of first token in snippet */
79 u64 covered
; /* Mask of query phrases covered */
80 u64 hlmask
; /* Mask of snippet terms to highlight */
84 ** This type is used as an fts3ExprIterate() context object while
85 ** accumulating the data returned by the matchinfo() function.
87 typedef struct MatchInfo MatchInfo
;
89 Fts3Cursor
*pCursor
; /* FTS3 Cursor */
90 int nCol
; /* Number of columns in table */
91 int nPhrase
; /* Number of matchable phrases in query */
92 sqlite3_int64 nDoc
; /* Number of docs in database */
94 u32
*aMatchinfo
; /* Pre-allocated buffer */
98 ** An instance of this structure is used to manage a pair of buffers, each
99 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
102 struct MatchinfoBuffer
{
105 int bGlobal
; /* Set if global data is loaded */
112 ** The snippet() and offsets() functions both return text values. An instance
113 ** of the following structure is used to accumulate those values while the
114 ** functions are running. See fts3StringAppend() for details.
116 typedef struct StrBuffer StrBuffer
;
118 char *z
; /* Pointer to buffer containing string */
119 int n
; /* Length of z in bytes (excl. nul-term) */
120 int nAlloc
; /* Allocated size of buffer z in bytes */
124 /*************************************************************************
125 ** Start of MatchinfoBuffer code.
129 ** Allocate a two-slot MatchinfoBuffer object.
131 static MatchinfoBuffer
*fts3MIBufferNew(int nElem
, const char *zMatchinfo
){
132 MatchinfoBuffer
*pRet
;
133 int nByte
= sizeof(u32
) * (2*nElem
+ 1) + sizeof(MatchinfoBuffer
);
134 int nStr
= (int)strlen(zMatchinfo
);
136 pRet
= sqlite3_malloc(nByte
+ nStr
+1);
138 memset(pRet
, 0, nByte
);
139 pRet
->aMatchinfo
[0] = (u8
*)(&pRet
->aMatchinfo
[1]) - (u8
*)pRet
;
140 pRet
->aMatchinfo
[1+nElem
] = pRet
->aMatchinfo
[0] + sizeof(u32
)*(nElem
+1);
142 pRet
->zMatchinfo
= ((char*)pRet
) + nByte
;
143 memcpy(pRet
->zMatchinfo
, zMatchinfo
, nStr
+1);
150 static void fts3MIBufferFree(void *p
){
151 MatchinfoBuffer
*pBuf
= (MatchinfoBuffer
*)((u8
*)p
- ((u32
*)p
)[-1]);
153 assert( (u32
*)p
==&pBuf
->aMatchinfo
[1]
154 || (u32
*)p
==&pBuf
->aMatchinfo
[pBuf
->nElem
+2]
156 if( (u32
*)p
==&pBuf
->aMatchinfo
[1] ){
162 if( pBuf
->aRef
[0]==0 && pBuf
->aRef
[1]==0 && pBuf
->aRef
[2]==0 ){
167 static void (*fts3MIBufferAlloc(MatchinfoBuffer
*p
, u32
**paOut
))(void*){
168 void (*xRet
)(void*) = 0;
173 aOut
= &p
->aMatchinfo
[1];
174 xRet
= fts3MIBufferFree
;
176 else if( p
->aRef
[2]==0 ){
178 aOut
= &p
->aMatchinfo
[p
->nElem
+2];
179 xRet
= fts3MIBufferFree
;
181 aOut
= (u32
*)sqlite3_malloc(p
->nElem
* sizeof(u32
));
184 if( p
->bGlobal
) memcpy(aOut
, &p
->aMatchinfo
[1], p
->nElem
*sizeof(u32
));
192 static void fts3MIBufferSetGlobal(MatchinfoBuffer
*p
){
194 memcpy(&p
->aMatchinfo
[2+p
->nElem
], &p
->aMatchinfo
[1], p
->nElem
*sizeof(u32
));
198 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
200 void sqlite3Fts3MIBufferFree(MatchinfoBuffer
*p
){
202 assert( p
->aRef
[0]==1 );
204 if( p
->aRef
[0]==0 && p
->aRef
[1]==0 && p
->aRef
[2]==0 ){
211 ** End of MatchinfoBuffer code.
212 *************************************************************************/
216 ** This function is used to help iterate through a position-list. A position
217 ** list is a list of unique integers, sorted from smallest to largest. Each
218 ** element of the list is represented by an FTS3 varint that takes the value
219 ** of the difference between the current element and the previous one plus
220 ** two. For example, to store the position-list:
224 ** the three varints:
230 ** When this function is called, *pp points to the start of an element of
231 ** the list. *piPos contains the value of the previous entry in the list.
232 ** After it returns, *piPos contains the value of the next element of the
233 ** list and *pp is advanced to the following varint.
235 static void fts3GetDeltaPosition(char **pp
, int *piPos
){
237 *pp
+= fts3GetVarint32(*pp
, &iVal
);
242 ** Helper function for fts3ExprIterate() (see below).
244 static int fts3ExprIterate2(
245 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
246 int *piPhrase
, /* Pointer to phrase counter */
247 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
248 void *pCtx
/* Second argument to pass to callback */
250 int rc
; /* Return code */
251 int eType
= pExpr
->eType
; /* Type of expression node pExpr */
253 if( eType
!=FTSQUERY_PHRASE
){
254 assert( pExpr
->pLeft
&& pExpr
->pRight
);
255 rc
= fts3ExprIterate2(pExpr
->pLeft
, piPhrase
, x
, pCtx
);
256 if( rc
==SQLITE_OK
&& eType
!=FTSQUERY_NOT
){
257 rc
= fts3ExprIterate2(pExpr
->pRight
, piPhrase
, x
, pCtx
);
260 rc
= x(pExpr
, *piPhrase
, pCtx
);
267 ** Iterate through all phrase nodes in an FTS3 query, except those that
268 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
269 ** For each phrase node found, the supplied callback function is invoked.
271 ** If the callback function returns anything other than SQLITE_OK,
272 ** the iteration is abandoned and the error code returned immediately.
273 ** Otherwise, SQLITE_OK is returned after a callback has been made for
274 ** all eligible phrase nodes.
276 static int fts3ExprIterate(
277 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
278 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
279 void *pCtx
/* Second argument to pass to callback */
281 int iPhrase
= 0; /* Variable used as the phrase counter */
282 return fts3ExprIterate2(pExpr
, &iPhrase
, x
, pCtx
);
287 ** This is an fts3ExprIterate() callback used while loading the doclists
288 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
289 ** fts3ExprLoadDoclists().
291 static int fts3ExprLoadDoclistsCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
293 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
294 LoadDoclistCtx
*p
= (LoadDoclistCtx
*)ctx
;
296 UNUSED_PARAMETER(iPhrase
);
299 p
->nToken
+= pPhrase
->nToken
;
305 ** Load the doclists for each phrase in the query associated with FTS3 cursor
308 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
309 ** phrases in the expression (all phrases except those directly or
310 ** indirectly descended from the right-hand-side of a NOT operator). If
311 ** pnToken is not NULL, then it is set to the number of tokens in all
312 ** matchable phrases of the expression.
314 static int fts3ExprLoadDoclists(
315 Fts3Cursor
*pCsr
, /* Fts3 cursor for current query */
316 int *pnPhrase
, /* OUT: Number of phrases in query */
317 int *pnToken
/* OUT: Number of tokens in query */
319 int rc
; /* Return Code */
320 LoadDoclistCtx sCtx
= {0,0,0}; /* Context for fts3ExprIterate() */
322 rc
= fts3ExprIterate(pCsr
->pExpr
, fts3ExprLoadDoclistsCb
, (void *)&sCtx
);
323 if( pnPhrase
) *pnPhrase
= sCtx
.nPhrase
;
324 if( pnToken
) *pnToken
= sCtx
.nToken
;
328 static int fts3ExprPhraseCountCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
330 pExpr
->iPhrase
= iPhrase
;
333 static int fts3ExprPhraseCount(Fts3Expr
*pExpr
){
335 (void)fts3ExprIterate(pExpr
, fts3ExprPhraseCountCb
, (void *)&nPhrase
);
340 ** Advance the position list iterator specified by the first two
341 ** arguments so that it points to the first element with a value greater
342 ** than or equal to parameter iNext.
344 static void fts3SnippetAdvance(char **ppIter
, int *piIter
, int iNext
){
345 char *pIter
= *ppIter
;
349 while( iIter
<iNext
){
350 if( 0==(*pIter
& 0xFE) ){
355 fts3GetDeltaPosition(&pIter
, &iIter
);
364 ** Advance the snippet iterator to the next candidate snippet.
366 static int fts3SnippetNextCandidate(SnippetIter
*pIter
){
367 int i
; /* Loop counter */
369 if( pIter
->iCurrent
<0 ){
370 /* The SnippetIter object has just been initialized. The first snippet
371 ** candidate always starts at offset 0 (even if this candidate has a
376 /* Advance the 'head' iterator of each phrase to the first offset that
377 ** is greater than or equal to (iNext+nSnippet).
379 for(i
=0; i
<pIter
->nPhrase
; i
++){
380 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
381 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, pIter
->nSnippet
);
385 int iEnd
= 0x7FFFFFFF;
387 for(i
=0; i
<pIter
->nPhrase
; i
++){
388 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
389 if( pPhrase
->pHead
&& pPhrase
->iHead
<iEnd
){
390 iEnd
= pPhrase
->iHead
;
393 if( iEnd
==0x7FFFFFFF ){
397 pIter
->iCurrent
= iStart
= iEnd
- pIter
->nSnippet
+ 1;
398 for(i
=0; i
<pIter
->nPhrase
; i
++){
399 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
400 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, iEnd
+1);
401 fts3SnippetAdvance(&pPhrase
->pTail
, &pPhrase
->iTail
, iStart
);
409 ** Retrieve information about the current candidate snippet of snippet
412 static void fts3SnippetDetails(
413 SnippetIter
*pIter
, /* Snippet iterator */
414 u64 mCovered
, /* Bitmask of phrases already covered */
415 int *piToken
, /* OUT: First token of proposed snippet */
416 int *piScore
, /* OUT: "Score" for this snippet */
417 u64
*pmCover
, /* OUT: Bitmask of phrases covered */
418 u64
*pmHighlight
/* OUT: Bitmask of terms to highlight */
420 int iStart
= pIter
->iCurrent
; /* First token of snippet */
421 int iScore
= 0; /* Score of this snippet */
422 int i
; /* Loop counter */
423 u64 mCover
= 0; /* Mask of phrases covered by this snippet */
424 u64 mHighlight
= 0; /* Mask of tokens to highlight in snippet */
426 for(i
=0; i
<pIter
->nPhrase
; i
++){
427 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
428 if( pPhrase
->pTail
){
429 char *pCsr
= pPhrase
->pTail
;
430 int iCsr
= pPhrase
->iTail
;
432 while( iCsr
<(iStart
+pIter
->nSnippet
) ){
434 u64 mPhrase
= (u64
)1 << i
;
435 u64 mPos
= (u64
)1 << (iCsr
- iStart
);
436 assert( iCsr
>=iStart
);
437 if( (mCover
|mCovered
)&mPhrase
){
444 for(j
=0; j
<pPhrase
->nToken
; j
++){
445 mHighlight
|= (mPos
>>j
);
448 if( 0==(*pCsr
& 0x0FE) ) break;
449 fts3GetDeltaPosition(&pCsr
, &iCsr
);
454 /* Set the output variables before returning. */
458 *pmHighlight
= mHighlight
;
462 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
463 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
465 static int fts3SnippetFindPositions(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
466 SnippetIter
*p
= (SnippetIter
*)ctx
;
467 SnippetPhrase
*pPhrase
= &p
->aPhrase
[iPhrase
];
471 pPhrase
->nToken
= pExpr
->pPhrase
->nToken
;
472 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pCsr
);
473 assert( rc
==SQLITE_OK
|| pCsr
==0 );
476 pPhrase
->pList
= pCsr
;
477 fts3GetDeltaPosition(&pCsr
, &iFirst
);
479 pPhrase
->pHead
= pCsr
;
480 pPhrase
->pTail
= pCsr
;
481 pPhrase
->iHead
= iFirst
;
482 pPhrase
->iTail
= iFirst
;
484 assert( rc
!=SQLITE_OK
|| (
485 pPhrase
->pList
==0 && pPhrase
->pHead
==0 && pPhrase
->pTail
==0
493 ** Select the fragment of text consisting of nFragment contiguous tokens
494 ** from column iCol that represent the "best" snippet. The best snippet
495 ** is the snippet with the highest score, where scores are calculated
498 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
500 ** (b) +1000 points for the first occurrence of each matchable phrase in
501 ** the snippet for which the corresponding mCovered bit is not set.
503 ** The selected snippet parameters are stored in structure *pFragment before
504 ** returning. The score of the selected snippet is stored in *piScore
507 static int fts3BestSnippet(
508 int nSnippet
, /* Desired snippet length */
509 Fts3Cursor
*pCsr
, /* Cursor to create snippet for */
510 int iCol
, /* Index of column to create snippet from */
511 u64 mCovered
, /* Mask of phrases already covered */
512 u64
*pmSeen
, /* IN/OUT: Mask of phrases seen */
513 SnippetFragment
*pFragment
, /* OUT: Best snippet found */
514 int *piScore
/* OUT: Score of snippet pFragment */
516 int rc
; /* Return Code */
517 int nList
; /* Number of phrases in expression */
518 SnippetIter sIter
; /* Iterates through snippet candidates */
519 int nByte
; /* Number of bytes of space to allocate */
520 int iBestScore
= -1; /* Best snippet score found so far */
521 int i
; /* Loop counter */
523 memset(&sIter
, 0, sizeof(sIter
));
525 /* Iterate through the phrases in the expression to count them. The same
526 ** callback makes sure the doclists are loaded for each phrase.
528 rc
= fts3ExprLoadDoclists(pCsr
, &nList
, 0);
533 /* Now that it is known how many phrases there are, allocate and zero
534 ** the required space using malloc().
536 nByte
= sizeof(SnippetPhrase
) * nList
;
537 sIter
.aPhrase
= (SnippetPhrase
*)sqlite3_malloc(nByte
);
538 if( !sIter
.aPhrase
){
541 memset(sIter
.aPhrase
, 0, nByte
);
543 /* Initialize the contents of the SnippetIter object. Then iterate through
544 ** the set of phrases in the expression to populate the aPhrase[] array.
548 sIter
.nSnippet
= nSnippet
;
549 sIter
.nPhrase
= nList
;
551 rc
= fts3ExprIterate(pCsr
->pExpr
, fts3SnippetFindPositions
, (void*)&sIter
);
554 /* Set the *pmSeen output variable. */
555 for(i
=0; i
<nList
; i
++){
556 if( sIter
.aPhrase
[i
].pHead
){
557 *pmSeen
|= (u64
)1 << i
;
561 /* Loop through all candidate snippets. Store the best snippet in
562 ** *pFragment. Store its associated 'score' in iBestScore.
564 pFragment
->iCol
= iCol
;
565 while( !fts3SnippetNextCandidate(&sIter
) ){
570 fts3SnippetDetails(&sIter
, mCovered
, &iPos
, &iScore
, &mCover
,&mHighlite
);
572 if( iScore
>iBestScore
){
573 pFragment
->iPos
= iPos
;
574 pFragment
->hlmask
= mHighlite
;
575 pFragment
->covered
= mCover
;
580 *piScore
= iBestScore
;
582 sqlite3_free(sIter
.aPhrase
);
588 ** Append a string to the string-buffer passed as the first argument.
590 ** If nAppend is negative, then the length of the string zAppend is
591 ** determined using strlen().
593 static int fts3StringAppend(
594 StrBuffer
*pStr
, /* Buffer to append to */
595 const char *zAppend
, /* Pointer to data to append to buffer */
596 int nAppend
/* Size of zAppend in bytes (or -1) */
599 nAppend
= (int)strlen(zAppend
);
602 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
603 ** to grow the buffer until so that it is big enough to accomadate the
606 if( pStr
->n
+nAppend
+1>=pStr
->nAlloc
){
607 int nAlloc
= pStr
->nAlloc
+nAppend
+100;
608 char *zNew
= sqlite3_realloc(pStr
->z
, nAlloc
);
613 pStr
->nAlloc
= nAlloc
;
615 assert( pStr
->z
!=0 && (pStr
->nAlloc
>= pStr
->n
+nAppend
+1) );
617 /* Append the data to the string buffer. */
618 memcpy(&pStr
->z
[pStr
->n
], zAppend
, nAppend
);
620 pStr
->z
[pStr
->n
] = '\0';
626 ** The fts3BestSnippet() function often selects snippets that end with a
627 ** query term. That is, the final term of the snippet is always a term
628 ** that requires highlighting. For example, if 'X' is a highlighted term
629 ** and '.' is a non-highlighted term, BestSnippet() may select:
633 ** This function "shifts" the beginning of the snippet forward in the
634 ** document so that there are approximately the same number of
635 ** non-highlighted terms to the right of the final highlighted term as there
636 ** are to the left of the first highlighted term. For example, to this:
640 ** This is done as part of extracting the snippet text, not when selecting
641 ** the snippet. Snippet selection is done based on doclists only, so there
642 ** is no way for fts3BestSnippet() to know whether or not the document
643 ** actually contains terms that follow the final highlighted term.
645 static int fts3SnippetShift(
646 Fts3Table
*pTab
, /* FTS3 table snippet comes from */
647 int iLangid
, /* Language id to use in tokenizing */
648 int nSnippet
, /* Number of tokens desired for snippet */
649 const char *zDoc
, /* Document text to extract snippet from */
650 int nDoc
, /* Size of buffer zDoc in bytes */
651 int *piPos
, /* IN/OUT: First token of snippet */
652 u64
*pHlmask
/* IN/OUT: Mask of tokens to highlight */
654 u64 hlmask
= *pHlmask
; /* Local copy of initial highlight-mask */
657 int nLeft
; /* Tokens to the left of first highlight */
658 int nRight
; /* Tokens to the right of last highlight */
659 int nDesired
; /* Ideal number of tokens to shift forward */
661 for(nLeft
=0; !(hlmask
& ((u64
)1 << nLeft
)); nLeft
++);
662 for(nRight
=0; !(hlmask
& ((u64
)1 << (nSnippet
-1-nRight
))); nRight
++);
663 nDesired
= (nLeft
-nRight
)/2;
665 /* Ideally, the start of the snippet should be pushed forward in the
666 ** document nDesired tokens. This block checks if there are actually
667 ** nDesired tokens to the right of the snippet. If so, *piPos and
668 ** *pHlMask are updated to shift the snippet nDesired tokens to the
669 ** right. Otherwise, the snippet is shifted by the number of tokens
673 int nShift
; /* Number of tokens to shift snippet by */
674 int iCurrent
= 0; /* Token counter */
675 int rc
; /* Return Code */
676 sqlite3_tokenizer_module
*pMod
;
677 sqlite3_tokenizer_cursor
*pC
;
678 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
680 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
681 ** or more tokens in zDoc/nDoc.
683 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, iLangid
, zDoc
, nDoc
, &pC
);
687 while( rc
==SQLITE_OK
&& iCurrent
<(nSnippet
+nDesired
) ){
688 const char *ZDUMMY
; int DUMMY1
= 0, DUMMY2
= 0, DUMMY3
= 0;
689 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &DUMMY2
, &DUMMY3
, &iCurrent
);
692 if( rc
!=SQLITE_OK
&& rc
!=SQLITE_DONE
){ return rc
; }
694 nShift
= (rc
==SQLITE_DONE
)+iCurrent
-nSnippet
;
695 assert( nShift
<=nDesired
);
698 *pHlmask
= hlmask
>> nShift
;
706 ** Extract the snippet text for fragment pFragment from cursor pCsr and
707 ** append it to string buffer pOut.
709 static int fts3SnippetText(
710 Fts3Cursor
*pCsr
, /* FTS3 Cursor */
711 SnippetFragment
*pFragment
, /* Snippet to extract */
712 int iFragment
, /* Fragment number */
713 int isLast
, /* True for final fragment in snippet */
714 int nSnippet
, /* Number of tokens in extracted snippet */
715 const char *zOpen
, /* String inserted before highlighted term */
716 const char *zClose
, /* String inserted after highlighted term */
717 const char *zEllipsis
, /* String inserted between snippets */
718 StrBuffer
*pOut
/* Write output here */
720 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
721 int rc
; /* Return code */
722 const char *zDoc
; /* Document text to extract snippet from */
723 int nDoc
; /* Size of zDoc in bytes */
724 int iCurrent
= 0; /* Current token number of document */
725 int iEnd
= 0; /* Byte offset of end of current token */
726 int isShiftDone
= 0; /* True after snippet is shifted */
727 int iPos
= pFragment
->iPos
; /* First token of snippet */
728 u64 hlmask
= pFragment
->hlmask
; /* Highlight-mask for snippet */
729 int iCol
= pFragment
->iCol
+1; /* Query column to extract text from */
730 sqlite3_tokenizer_module
*pMod
; /* Tokenizer module methods object */
731 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor open on zDoc/nDoc */
733 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
);
735 if( sqlite3_column_type(pCsr
->pStmt
, iCol
)!=SQLITE_NULL
){
740 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
);
742 /* Open a token cursor on the document. */
743 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
744 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
, zDoc
,nDoc
,&pC
);
749 while( rc
==SQLITE_OK
){
750 const char *ZDUMMY
; /* Dummy argument used with tokenizer */
751 int DUMMY1
= -1; /* Dummy argument used with tokenizer */
752 int iBegin
= 0; /* Offset in zDoc of start of token */
753 int iFin
= 0; /* Offset in zDoc of end of token */
754 int isHighlight
= 0; /* True for highlighted terms */
756 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
757 ** in the FTS code the variable that the third argument to xNext points to
758 ** is initialized to zero before the first (*but not necessarily
759 ** subsequent*) call to xNext(). This is done for a particular application
760 ** that needs to know whether or not the tokenizer is being used for
761 ** snippet generation or for some other purpose.
763 ** Extreme care is required when writing code to depend on this
764 ** initialization. It is not a documented part of the tokenizer interface.
765 ** If a tokenizer is used directly by any code outside of FTS, this
766 ** convention might not be respected. */
767 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &iBegin
, &iFin
, &iCurrent
);
769 if( rc
==SQLITE_DONE
){
770 /* Special case - the last token of the snippet is also the last token
771 ** of the column. Append any punctuation that occurred between the end
772 ** of the previous token and the end of the document to the output.
773 ** Then break out of the loop. */
774 rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], -1);
778 if( iCurrent
<iPos
){ continue; }
781 int n
= nDoc
- iBegin
;
782 rc
= fts3SnippetShift(
783 pTab
, pCsr
->iLangid
, nSnippet
, &zDoc
[iBegin
], n
, &iPos
, &hlmask
787 /* Now that the shift has been done, check if the initial "..." are
788 ** required. They are required if (a) this is not the first fragment,
789 ** or (b) this fragment does not begin at position 0 of its column.
792 if( iPos
>0 || iFragment
>0 ){
793 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
795 rc
= fts3StringAppend(pOut
, zDoc
, iBegin
);
798 if( rc
!=SQLITE_OK
|| iCurrent
<iPos
) continue;
801 if( iCurrent
>=(iPos
+nSnippet
) ){
803 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
808 /* Set isHighlight to true if this term should be highlighted. */
809 isHighlight
= (hlmask
& ((u64
)1 << (iCurrent
-iPos
)))!=0;
811 if( iCurrent
>iPos
) rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], iBegin
-iEnd
);
812 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zOpen
, -1);
813 if( rc
==SQLITE_OK
) rc
= fts3StringAppend(pOut
, &zDoc
[iBegin
], iFin
-iBegin
);
814 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zClose
, -1);
825 ** This function is used to count the entries in a column-list (a
826 ** delta-encoded list of term offsets within a single column of a single
827 ** row). When this function is called, *ppCollist should point to the
828 ** beginning of the first varint in the column-list (the varint that
829 ** contains the position of the first matching term in the column data).
830 ** Before returning, *ppCollist is set to point to the first byte after
831 ** the last varint in the column-list (either the 0x00 signifying the end
832 ** of the position-list, or the 0x01 that precedes the column number of
833 ** the next column in the position-list).
835 ** The number of elements in the column-list is returned.
837 static int fts3ColumnlistCount(char **ppCollist
){
838 char *pEnd
= *ppCollist
;
842 /* A column-list is terminated by either a 0x01 or 0x00. */
843 while( 0xFE & (*pEnd
| c
) ){
853 ** This function gathers 'y' or 'b' data for a single phrase.
855 static void fts3ExprLHits(
856 Fts3Expr
*pExpr
, /* Phrase expression node */
857 MatchInfo
*p
/* Matchinfo context */
859 Fts3Table
*pTab
= (Fts3Table
*)p
->pCursor
->base
.pVtab
;
861 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
862 char *pIter
= pPhrase
->doclist
.pList
;
865 assert( p
->flag
==FTS3_MATCHINFO_LHITS_BM
|| p
->flag
==FTS3_MATCHINFO_LHITS
);
866 if( p
->flag
==FTS3_MATCHINFO_LHITS
){
867 iStart
= pExpr
->iPhrase
* p
->nCol
;
869 iStart
= pExpr
->iPhrase
* ((p
->nCol
+ 31) / 32);
873 int nHit
= fts3ColumnlistCount(&pIter
);
874 if( (pPhrase
->iColumn
>=pTab
->nColumn
|| pPhrase
->iColumn
==iCol
) ){
875 if( p
->flag
==FTS3_MATCHINFO_LHITS
){
876 p
->aMatchinfo
[iStart
+ iCol
] = (u32
)nHit
;
878 p
->aMatchinfo
[iStart
+ (iCol
+1)/32] |= (1 << (iCol
&0x1F));
881 assert( *pIter
==0x00 || *pIter
==0x01 );
882 if( *pIter
!=0x01 ) break;
884 pIter
+= fts3GetVarint32(pIter
, &iCol
);
889 ** Gather the results for matchinfo directives 'y' and 'b'.
891 static void fts3ExprLHitGather(
895 assert( (pExpr
->pLeft
==0)==(pExpr
->pRight
==0) );
896 if( pExpr
->bEof
==0 && pExpr
->iDocid
==p
->pCursor
->iPrevId
){
898 fts3ExprLHitGather(pExpr
->pLeft
, p
);
899 fts3ExprLHitGather(pExpr
->pRight
, p
);
901 fts3ExprLHits(pExpr
, p
);
907 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
908 ** for a single query.
910 ** fts3ExprIterate() callback to load the 'global' elements of a
911 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
912 ** of the matchinfo array that are constant for all rows returned by the
915 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
916 ** function populates Matchinfo.aMatchinfo[] as follows:
918 ** for(iCol=0; iCol<nCol; iCol++){
919 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
920 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
923 ** where X is the number of matches for phrase iPhrase is column iCol of all
924 ** rows of the table. Y is the number of rows for which column iCol contains
925 ** at least one instance of phrase iPhrase.
927 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
928 ** Y values are set to nDoc, where nDoc is the number of documents in the
929 ** file system. This is done because the full-text index doclist is required
930 ** to calculate these values properly, and the full-text index doclist is
931 ** not available for deferred tokens.
933 static int fts3ExprGlobalHitsCb(
934 Fts3Expr
*pExpr
, /* Phrase expression node */
935 int iPhrase
, /* Phrase number (numbered from zero) */
936 void *pCtx
/* Pointer to MatchInfo structure */
938 MatchInfo
*p
= (MatchInfo
*)pCtx
;
939 return sqlite3Fts3EvalPhraseStats(
940 p
->pCursor
, pExpr
, &p
->aMatchinfo
[3*iPhrase
*p
->nCol
]
945 ** fts3ExprIterate() callback used to collect the "local" part of the
946 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
947 ** array that are different for each row returned by the query.
949 static int fts3ExprLocalHitsCb(
950 Fts3Expr
*pExpr
, /* Phrase expression node */
951 int iPhrase
, /* Phrase number */
952 void *pCtx
/* Pointer to MatchInfo structure */
955 MatchInfo
*p
= (MatchInfo
*)pCtx
;
956 int iStart
= iPhrase
* p
->nCol
* 3;
959 for(i
=0; i
<p
->nCol
&& rc
==SQLITE_OK
; i
++){
961 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCursor
, pExpr
, i
, &pCsr
);
963 p
->aMatchinfo
[iStart
+i
*3] = fts3ColumnlistCount(&pCsr
);
965 p
->aMatchinfo
[iStart
+i
*3] = 0;
972 static int fts3MatchinfoCheck(
977 if( (cArg
==FTS3_MATCHINFO_NPHRASE
)
978 || (cArg
==FTS3_MATCHINFO_NCOL
)
979 || (cArg
==FTS3_MATCHINFO_NDOC
&& pTab
->bFts4
)
980 || (cArg
==FTS3_MATCHINFO_AVGLENGTH
&& pTab
->bFts4
)
981 || (cArg
==FTS3_MATCHINFO_LENGTH
&& pTab
->bHasDocsize
)
982 || (cArg
==FTS3_MATCHINFO_LCS
)
983 || (cArg
==FTS3_MATCHINFO_HITS
)
984 || (cArg
==FTS3_MATCHINFO_LHITS
)
985 || (cArg
==FTS3_MATCHINFO_LHITS_BM
)
989 sqlite3Fts3ErrMsg(pzErr
, "unrecognized matchinfo request: %c", cArg
);
993 static int fts3MatchinfoSize(MatchInfo
*pInfo
, char cArg
){
994 int nVal
; /* Number of integers output by cArg */
997 case FTS3_MATCHINFO_NDOC
:
998 case FTS3_MATCHINFO_NPHRASE
:
999 case FTS3_MATCHINFO_NCOL
:
1003 case FTS3_MATCHINFO_AVGLENGTH
:
1004 case FTS3_MATCHINFO_LENGTH
:
1005 case FTS3_MATCHINFO_LCS
:
1009 case FTS3_MATCHINFO_LHITS
:
1010 nVal
= pInfo
->nCol
* pInfo
->nPhrase
;
1013 case FTS3_MATCHINFO_LHITS_BM
:
1014 nVal
= pInfo
->nPhrase
* ((pInfo
->nCol
+ 31) / 32);
1018 assert( cArg
==FTS3_MATCHINFO_HITS
);
1019 nVal
= pInfo
->nCol
* pInfo
->nPhrase
* 3;
1026 static int fts3MatchinfoSelectDoctotal(
1028 sqlite3_stmt
**ppStmt
,
1029 sqlite3_int64
*pnDoc
,
1032 sqlite3_stmt
*pStmt
;
1037 int rc
= sqlite3Fts3SelectDoctotal(pTab
, ppStmt
);
1038 if( rc
!=SQLITE_OK
) return rc
;
1041 assert( sqlite3_data_count(pStmt
)==1 );
1043 a
= sqlite3_column_blob(pStmt
, 0);
1044 a
+= sqlite3Fts3GetVarint(a
, &nDoc
);
1045 if( nDoc
==0 ) return FTS_CORRUPT_VTAB
;
1048 if( paLen
) *paLen
= a
;
1053 ** An instance of the following structure is used to store state while
1054 ** iterating through a multi-column position-list corresponding to the
1055 ** hits for a single phrase on a single row in order to calculate the
1056 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1058 typedef struct LcsIterator LcsIterator
;
1059 struct LcsIterator
{
1060 Fts3Expr
*pExpr
; /* Pointer to phrase expression */
1061 int iPosOffset
; /* Tokens count up to end of this phrase */
1062 char *pRead
; /* Cursor used to iterate through aDoclist */
1063 int iPos
; /* Current position */
1067 ** If LcsIterator.iCol is set to the following value, the iterator has
1068 ** finished iterating through all offsets for all columns.
1070 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1072 static int fts3MatchinfoLcsCb(
1073 Fts3Expr
*pExpr
, /* Phrase expression node */
1074 int iPhrase
, /* Phrase number (numbered from zero) */
1075 void *pCtx
/* Pointer to MatchInfo structure */
1077 LcsIterator
*aIter
= (LcsIterator
*)pCtx
;
1078 aIter
[iPhrase
].pExpr
= pExpr
;
1083 ** Advance the iterator passed as an argument to the next position. Return
1084 ** 1 if the iterator is at EOF or if it now points to the start of the
1085 ** position list for the next column.
1087 static int fts3LcsIteratorAdvance(LcsIterator
*pIter
){
1088 char *pRead
= pIter
->pRead
;
1089 sqlite3_int64 iRead
;
1092 pRead
+= sqlite3Fts3GetVarint(pRead
, &iRead
);
1093 if( iRead
==0 || iRead
==1 ){
1097 pIter
->iPos
+= (int)(iRead
-2);
1100 pIter
->pRead
= pRead
;
1105 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1107 ** If the call is successful, the longest-common-substring lengths for each
1108 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1109 ** array before returning. SQLITE_OK is returned in this case.
1111 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1112 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1115 static int fts3MatchinfoLcs(Fts3Cursor
*pCsr
, MatchInfo
*pInfo
){
1121 /* Allocate and populate the array of LcsIterator objects. The array
1122 ** contains one element for each matchable phrase in the query.
1124 aIter
= sqlite3_malloc(sizeof(LcsIterator
) * pCsr
->nPhrase
);
1125 if( !aIter
) return SQLITE_NOMEM
;
1126 memset(aIter
, 0, sizeof(LcsIterator
) * pCsr
->nPhrase
);
1127 (void)fts3ExprIterate(pCsr
->pExpr
, fts3MatchinfoLcsCb
, (void*)aIter
);
1129 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1130 LcsIterator
*pIter
= &aIter
[i
];
1131 nToken
-= pIter
->pExpr
->pPhrase
->nToken
;
1132 pIter
->iPosOffset
= nToken
;
1135 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1136 int nLcs
= 0; /* LCS value for this column */
1137 int nLive
= 0; /* Number of iterators in aIter not at EOF */
1139 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1141 LcsIterator
*pIt
= &aIter
[i
];
1142 rc
= sqlite3Fts3EvalPhrasePoslist(pCsr
, pIt
->pExpr
, iCol
, &pIt
->pRead
);
1143 if( rc
!=SQLITE_OK
) return rc
;
1145 pIt
->iPos
= pIt
->iPosOffset
;
1146 fts3LcsIteratorAdvance(&aIter
[i
]);
1152 LcsIterator
*pAdv
= 0; /* The iterator to advance by one position */
1153 int nThisLcs
= 0; /* LCS for the current iterator positions */
1155 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1156 LcsIterator
*pIter
= &aIter
[i
];
1157 if( pIter
->pRead
==0 ){
1158 /* This iterator is already at EOF for this column. */
1161 if( pAdv
==0 || pIter
->iPos
<pAdv
->iPos
){
1164 if( nThisLcs
==0 || pIter
->iPos
==pIter
[-1].iPos
){
1169 if( nThisLcs
>nLcs
) nLcs
= nThisLcs
;
1172 if( fts3LcsIteratorAdvance(pAdv
) ) nLive
--;
1175 pInfo
->aMatchinfo
[iCol
] = nLcs
;
1178 sqlite3_free(aIter
);
1183 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1184 ** be returned by the matchinfo() function. Argument zArg contains the
1185 ** format string passed as the second argument to matchinfo (or the
1186 ** default value "pcx" if no second argument was specified). The format
1187 ** string has already been validated and the pInfo->aMatchinfo[] array
1188 ** is guaranteed to be large enough for the output.
1190 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1191 ** If it is false, then assume that those fields that do not change between
1192 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1193 ** have already been populated.
1195 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1196 ** occurs. If a value other than SQLITE_OK is returned, the state the
1197 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1199 static int fts3MatchinfoValues(
1200 Fts3Cursor
*pCsr
, /* FTS3 cursor object */
1201 int bGlobal
, /* True to grab the global stats */
1202 MatchInfo
*pInfo
, /* Matchinfo context object */
1203 const char *zArg
/* Matchinfo format string */
1207 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1208 sqlite3_stmt
*pSelect
= 0;
1210 for(i
=0; rc
==SQLITE_OK
&& zArg
[i
]; i
++){
1211 pInfo
->flag
= zArg
[i
];
1213 case FTS3_MATCHINFO_NPHRASE
:
1214 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nPhrase
;
1217 case FTS3_MATCHINFO_NCOL
:
1218 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nCol
;
1221 case FTS3_MATCHINFO_NDOC
:
1223 sqlite3_int64 nDoc
= 0;
1224 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, 0);
1225 pInfo
->aMatchinfo
[0] = (u32
)nDoc
;
1229 case FTS3_MATCHINFO_AVGLENGTH
:
1231 sqlite3_int64 nDoc
; /* Number of rows in table */
1232 const char *a
; /* Aggregate column length array */
1234 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, &a
);
1235 if( rc
==SQLITE_OK
){
1237 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1239 sqlite3_int64 nToken
;
1240 a
+= sqlite3Fts3GetVarint(a
, &nToken
);
1241 iVal
= (u32
)(((u32
)(nToken
&0xffffffff)+nDoc
/2)/nDoc
);
1242 pInfo
->aMatchinfo
[iCol
] = iVal
;
1248 case FTS3_MATCHINFO_LENGTH
: {
1249 sqlite3_stmt
*pSelectDocsize
= 0;
1250 rc
= sqlite3Fts3SelectDocsize(pTab
, pCsr
->iPrevId
, &pSelectDocsize
);
1251 if( rc
==SQLITE_OK
){
1253 const char *a
= sqlite3_column_blob(pSelectDocsize
, 0);
1254 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1255 sqlite3_int64 nToken
;
1256 a
+= sqlite3Fts3GetVarint(a
, &nToken
);
1257 pInfo
->aMatchinfo
[iCol
] = (u32
)nToken
;
1260 sqlite3_reset(pSelectDocsize
);
1264 case FTS3_MATCHINFO_LCS
:
1265 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1266 if( rc
==SQLITE_OK
){
1267 rc
= fts3MatchinfoLcs(pCsr
, pInfo
);
1271 case FTS3_MATCHINFO_LHITS_BM
:
1272 case FTS3_MATCHINFO_LHITS
: {
1273 int nZero
= fts3MatchinfoSize(pInfo
, zArg
[i
]) * sizeof(u32
);
1274 memset(pInfo
->aMatchinfo
, 0, nZero
);
1275 fts3ExprLHitGather(pCsr
->pExpr
, pInfo
);
1281 assert( zArg
[i
]==FTS3_MATCHINFO_HITS
);
1282 pExpr
= pCsr
->pExpr
;
1283 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1284 if( rc
!=SQLITE_OK
) break;
1286 if( pCsr
->pDeferred
){
1287 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &pInfo
->nDoc
, 0);
1288 if( rc
!=SQLITE_OK
) break;
1290 rc
= fts3ExprIterate(pExpr
, fts3ExprGlobalHitsCb
,(void*)pInfo
);
1291 sqlite3Fts3EvalTestDeferred(pCsr
, &rc
);
1292 if( rc
!=SQLITE_OK
) break;
1294 (void)fts3ExprIterate(pExpr
, fts3ExprLocalHitsCb
,(void*)pInfo
);
1299 pInfo
->aMatchinfo
+= fts3MatchinfoSize(pInfo
, zArg
[i
]);
1302 sqlite3_reset(pSelect
);
1308 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1309 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1311 static void fts3GetMatchinfo(
1312 sqlite3_context
*pCtx
, /* Return results here */
1313 Fts3Cursor
*pCsr
, /* FTS3 Cursor object */
1314 const char *zArg
/* Second argument to matchinfo() function */
1317 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1319 int bGlobal
= 0; /* Collect 'global' stats as well as local */
1322 void (*xDestroyOut
)(void*) = 0;
1324 memset(&sInfo
, 0, sizeof(MatchInfo
));
1325 sInfo
.pCursor
= pCsr
;
1326 sInfo
.nCol
= pTab
->nColumn
;
1328 /* If there is cached matchinfo() data, but the format string for the
1329 ** cache does not match the format string for this request, discard
1330 ** the cached data. */
1331 if( pCsr
->pMIBuffer
&& strcmp(pCsr
->pMIBuffer
->zMatchinfo
, zArg
) ){
1332 sqlite3Fts3MIBufferFree(pCsr
->pMIBuffer
);
1333 pCsr
->pMIBuffer
= 0;
1336 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1337 ** matchinfo function has been called for this query. In this case
1338 ** allocate the array used to accumulate the matchinfo data and
1339 ** initialize those elements that are constant for every row.
1341 if( pCsr
->pMIBuffer
==0 ){
1342 int nMatchinfo
= 0; /* Number of u32 elements in match-info */
1343 int i
; /* Used to iterate through zArg */
1345 /* Determine the number of phrases in the query */
1346 pCsr
->nPhrase
= fts3ExprPhraseCount(pCsr
->pExpr
);
1347 sInfo
.nPhrase
= pCsr
->nPhrase
;
1349 /* Determine the number of integers in the buffer returned by this call. */
1350 for(i
=0; zArg
[i
]; i
++){
1352 if( fts3MatchinfoCheck(pTab
, zArg
[i
], &zErr
) ){
1353 sqlite3_result_error(pCtx
, zErr
, -1);
1357 nMatchinfo
+= fts3MatchinfoSize(&sInfo
, zArg
[i
]);
1360 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1361 pCsr
->pMIBuffer
= fts3MIBufferNew(nMatchinfo
, zArg
);
1362 if( !pCsr
->pMIBuffer
) rc
= SQLITE_NOMEM
;
1364 pCsr
->isMatchinfoNeeded
= 1;
1368 if( rc
==SQLITE_OK
){
1369 xDestroyOut
= fts3MIBufferAlloc(pCsr
->pMIBuffer
, &aOut
);
1370 if( xDestroyOut
==0 ){
1375 if( rc
==SQLITE_OK
){
1376 sInfo
.aMatchinfo
= aOut
;
1377 sInfo
.nPhrase
= pCsr
->nPhrase
;
1378 rc
= fts3MatchinfoValues(pCsr
, bGlobal
, &sInfo
, zArg
);
1380 fts3MIBufferSetGlobal(pCsr
->pMIBuffer
);
1384 if( rc
!=SQLITE_OK
){
1385 sqlite3_result_error_code(pCtx
, rc
);
1386 if( xDestroyOut
) xDestroyOut(aOut
);
1388 int n
= pCsr
->pMIBuffer
->nElem
* sizeof(u32
);
1389 sqlite3_result_blob(pCtx
, aOut
, n
, xDestroyOut
);
1394 ** Implementation of snippet() function.
1396 void sqlite3Fts3Snippet(
1397 sqlite3_context
*pCtx
, /* SQLite function call context */
1398 Fts3Cursor
*pCsr
, /* Cursor object */
1399 const char *zStart
, /* Snippet start text - "<b>" */
1400 const char *zEnd
, /* Snippet end text - "</b>" */
1401 const char *zEllipsis
, /* Snippet ellipsis text - "<b>...</b>" */
1402 int iCol
, /* Extract snippet from this column */
1403 int nToken
/* Approximate number of tokens in snippet */
1405 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1408 StrBuffer res
= {0, 0, 0};
1410 /* The returned text includes up to four fragments of text extracted from
1411 ** the data in the current row. The first iteration of the for(...) loop
1412 ** below attempts to locate a single fragment of text nToken tokens in
1413 ** size that contains at least one instance of all phrases in the query
1414 ** expression that appear in the current row. If such a fragment of text
1415 ** cannot be found, the second iteration of the loop attempts to locate
1416 ** a pair of fragments, and so on.
1418 int nSnippet
= 0; /* Number of fragments in this snippet */
1419 SnippetFragment aSnippet
[4]; /* Maximum of 4 fragments per snippet */
1420 int nFToken
= -1; /* Number of tokens in each fragment */
1423 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1427 for(nSnippet
=1; 1; nSnippet
++){
1429 int iSnip
; /* Loop counter 0..nSnippet-1 */
1430 u64 mCovered
= 0; /* Bitmask of phrases covered by snippet */
1431 u64 mSeen
= 0; /* Bitmask of phrases seen by BestSnippet() */
1434 nFToken
= (nToken
+nSnippet
-1) / nSnippet
;
1436 nFToken
= -1 * nToken
;
1439 for(iSnip
=0; iSnip
<nSnippet
; iSnip
++){
1440 int iBestScore
= -1; /* Best score of columns checked so far */
1441 int iRead
; /* Used to iterate through columns */
1442 SnippetFragment
*pFragment
= &aSnippet
[iSnip
];
1444 memset(pFragment
, 0, sizeof(*pFragment
));
1446 /* Loop through all columns of the table being considered for snippets.
1447 ** If the iCol argument to this function was negative, this means all
1448 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1450 for(iRead
=0; iRead
<pTab
->nColumn
; iRead
++){
1451 SnippetFragment sF
= {0, 0, 0, 0};
1453 if( iCol
>=0 && iRead
!=iCol
) continue;
1455 /* Find the best snippet of nFToken tokens in column iRead. */
1456 rc
= fts3BestSnippet(nFToken
, pCsr
, iRead
, mCovered
, &mSeen
, &sF
, &iS
);
1457 if( rc
!=SQLITE_OK
){
1460 if( iS
>iBestScore
){
1466 mCovered
|= pFragment
->covered
;
1469 /* If all query phrases seen by fts3BestSnippet() are present in at least
1470 ** one of the nSnippet snippet fragments, break out of the loop.
1472 assert( (mCovered
&mSeen
)==mCovered
);
1473 if( mSeen
==mCovered
|| nSnippet
==SizeofArray(aSnippet
) ) break;
1476 assert( nFToken
>0 );
1478 for(i
=0; i
<nSnippet
&& rc
==SQLITE_OK
; i
++){
1479 rc
= fts3SnippetText(pCsr
, &aSnippet
[i
],
1480 i
, (i
==nSnippet
-1), nFToken
, zStart
, zEnd
, zEllipsis
, &res
1485 sqlite3Fts3SegmentsClose(pTab
);
1486 if( rc
!=SQLITE_OK
){
1487 sqlite3_result_error_code(pCtx
, rc
);
1488 sqlite3_free(res
.z
);
1490 sqlite3_result_text(pCtx
, res
.z
, -1, sqlite3_free
);
1495 typedef struct TermOffset TermOffset
;
1496 typedef struct TermOffsetCtx TermOffsetCtx
;
1499 char *pList
; /* Position-list */
1500 int iPos
; /* Position just read from pList */
1501 int iOff
; /* Offset of this term from read positions */
1504 struct TermOffsetCtx
{
1506 int iCol
; /* Column of table to populate aTerm for */
1508 sqlite3_int64 iDocid
;
1513 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1515 static int fts3ExprTermOffsetInit(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
1516 TermOffsetCtx
*p
= (TermOffsetCtx
*)ctx
;
1517 int nTerm
; /* Number of tokens in phrase */
1518 int iTerm
; /* For looping through nTerm phrase terms */
1519 char *pList
; /* Pointer to position list for phrase */
1520 int iPos
= 0; /* First position in position-list */
1523 UNUSED_PARAMETER(iPhrase
);
1524 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pList
);
1525 nTerm
= pExpr
->pPhrase
->nToken
;
1527 fts3GetDeltaPosition(&pList
, &iPos
);
1531 for(iTerm
=0; iTerm
<nTerm
; iTerm
++){
1532 TermOffset
*pT
= &p
->aTerm
[p
->iTerm
++];
1533 pT
->iOff
= nTerm
-iTerm
-1;
1542 ** Implementation of offsets() function.
1544 void sqlite3Fts3Offsets(
1545 sqlite3_context
*pCtx
, /* SQLite function call context */
1546 Fts3Cursor
*pCsr
/* Cursor object */
1548 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1549 sqlite3_tokenizer_module
const *pMod
= pTab
->pTokenizer
->pModule
;
1550 int rc
; /* Return Code */
1551 int nToken
; /* Number of tokens in query */
1552 int iCol
; /* Column currently being processed */
1553 StrBuffer res
= {0, 0, 0}; /* Result string */
1554 TermOffsetCtx sCtx
; /* Context for fts3ExprTermOffsetInit() */
1557 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1561 memset(&sCtx
, 0, sizeof(sCtx
));
1562 assert( pCsr
->isRequireSeek
==0 );
1564 /* Count the number of terms in the query */
1565 rc
= fts3ExprLoadDoclists(pCsr
, 0, &nToken
);
1566 if( rc
!=SQLITE_OK
) goto offsets_out
;
1568 /* Allocate the array of TermOffset iterators. */
1569 sCtx
.aTerm
= (TermOffset
*)sqlite3_malloc(sizeof(TermOffset
)*nToken
);
1570 if( 0==sCtx
.aTerm
){
1574 sCtx
.iDocid
= pCsr
->iPrevId
;
1577 /* Loop through the table columns, appending offset information to
1578 ** string-buffer res for each column.
1580 for(iCol
=0; iCol
<pTab
->nColumn
; iCol
++){
1581 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor */
1582 const char *ZDUMMY
; /* Dummy argument used with xNext() */
1583 int NDUMMY
= 0; /* Dummy argument used with xNext() */
1590 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1591 ** no way that this operation can fail, so the return code from
1592 ** fts3ExprIterate() can be discarded.
1596 (void)fts3ExprIterate(pCsr
->pExpr
, fts3ExprTermOffsetInit
, (void*)&sCtx
);
1598 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1599 ** in column iCol, jump immediately to the next iteration of the loop.
1600 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1601 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1604 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
+1);
1605 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
+1);
1607 if( sqlite3_column_type(pCsr
->pStmt
, iCol
+1)==SQLITE_NULL
){
1614 /* Initialize a tokenizer iterator to iterate through column iCol. */
1615 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
,
1618 if( rc
!=SQLITE_OK
) goto offsets_out
;
1620 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1621 while( rc
==SQLITE_OK
){
1622 int i
; /* Used to loop through terms */
1623 int iMinPos
= 0x7FFFFFFF; /* Position of next token */
1624 TermOffset
*pTerm
= 0; /* TermOffset associated with next token */
1626 for(i
=0; i
<nToken
; i
++){
1627 TermOffset
*pT
= &sCtx
.aTerm
[i
];
1628 if( pT
->pList
&& (pT
->iPos
-pT
->iOff
)<iMinPos
){
1629 iMinPos
= pT
->iPos
-pT
->iOff
;
1635 /* All offsets for this column have been gathered. */
1638 assert( iCurrent
<=iMinPos
);
1639 if( 0==(0xFE&*pTerm
->pList
) ){
1642 fts3GetDeltaPosition(&pTerm
->pList
, &pTerm
->iPos
);
1644 while( rc
==SQLITE_OK
&& iCurrent
<iMinPos
){
1645 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1647 if( rc
==SQLITE_OK
){
1649 sqlite3_snprintf(sizeof(aBuffer
), aBuffer
,
1650 "%d %d %d %d ", iCol
, pTerm
-sCtx
.aTerm
, iStart
, iEnd
-iStart
1652 rc
= fts3StringAppend(&res
, aBuffer
, -1);
1653 }else if( rc
==SQLITE_DONE
&& pTab
->zContentTbl
==0 ){
1654 rc
= FTS_CORRUPT_VTAB
;
1658 if( rc
==SQLITE_DONE
){
1663 if( rc
!=SQLITE_OK
) goto offsets_out
;
1667 sqlite3_free(sCtx
.aTerm
);
1668 assert( rc
!=SQLITE_DONE
);
1669 sqlite3Fts3SegmentsClose(pTab
);
1670 if( rc
!=SQLITE_OK
){
1671 sqlite3_result_error_code(pCtx
, rc
);
1672 sqlite3_free(res
.z
);
1674 sqlite3_result_text(pCtx
, res
.z
, res
.n
-1, sqlite3_free
);
1680 ** Implementation of matchinfo() function.
1682 void sqlite3Fts3Matchinfo(
1683 sqlite3_context
*pContext
, /* Function call context */
1684 Fts3Cursor
*pCsr
, /* FTS3 table cursor */
1685 const char *zArg
/* Second arg to matchinfo() function */
1687 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1688 const char *zFormat
;
1693 zFormat
= FTS3_MATCHINFO_DEFAULT
;
1697 sqlite3_result_blob(pContext
, "", 0, SQLITE_STATIC
);
1700 /* Retrieve matchinfo() data. */
1701 fts3GetMatchinfo(pContext
, pCsr
, zFormat
);
1702 sqlite3Fts3SegmentsClose(pTab
);