Enhance the command-line completion extension to return the names of
[sqlite.git] / ext / fts3 / fts3_snippet.c
bloba0771c0b305b49ea0166799f650d2d4bbbdf738f
1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
17 #include <string.h>
18 #include <assert.h>
21 ** Characters that may appear in the second argument to matchinfo().
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
30 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
31 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
34 ** The default value for the second argument to matchinfo().
36 #define FTS3_MATCHINFO_DEFAULT "pcx"
40 ** Used as an fts3ExprIterate() context when loading phrase doclists to
41 ** Fts3Expr.aDoclist[]/nDoclist.
43 typedef struct LoadDoclistCtx LoadDoclistCtx;
44 struct LoadDoclistCtx {
45 Fts3Cursor *pCsr; /* FTS3 Cursor */
46 int nPhrase; /* Number of phrases seen so far */
47 int nToken; /* Number of tokens seen so far */
51 ** The following types are used as part of the implementation of the
52 ** fts3BestSnippet() routine.
54 typedef struct SnippetIter SnippetIter;
55 typedef struct SnippetPhrase SnippetPhrase;
56 typedef struct SnippetFragment SnippetFragment;
58 struct SnippetIter {
59 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
60 int iCol; /* Extract snippet from this column */
61 int nSnippet; /* Requested snippet length (in tokens) */
62 int nPhrase; /* Number of phrases in query */
63 SnippetPhrase *aPhrase; /* Array of size nPhrase */
64 int iCurrent; /* First token of current snippet */
67 struct SnippetPhrase {
68 int nToken; /* Number of tokens in phrase */
69 char *pList; /* Pointer to start of phrase position list */
70 int iHead; /* Next value in position list */
71 char *pHead; /* Position list data following iHead */
72 int iTail; /* Next value in trailing position list */
73 char *pTail; /* Position list data following iTail */
76 struct SnippetFragment {
77 int iCol; /* Column snippet is extracted from */
78 int iPos; /* Index of first token in snippet */
79 u64 covered; /* Mask of query phrases covered */
80 u64 hlmask; /* Mask of snippet terms to highlight */
84 ** This type is used as an fts3ExprIterate() context object while
85 ** accumulating the data returned by the matchinfo() function.
87 typedef struct MatchInfo MatchInfo;
88 struct MatchInfo {
89 Fts3Cursor *pCursor; /* FTS3 Cursor */
90 int nCol; /* Number of columns in table */
91 int nPhrase; /* Number of matchable phrases in query */
92 sqlite3_int64 nDoc; /* Number of docs in database */
93 char flag;
94 u32 *aMatchinfo; /* Pre-allocated buffer */
98 ** An instance of this structure is used to manage a pair of buffers, each
99 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
100 ** for details.
102 struct MatchinfoBuffer {
103 u8 aRef[3];
104 int nElem;
105 int bGlobal; /* Set if global data is loaded */
106 char *zMatchinfo;
107 u32 aMatchinfo[1];
112 ** The snippet() and offsets() functions both return text values. An instance
113 ** of the following structure is used to accumulate those values while the
114 ** functions are running. See fts3StringAppend() for details.
116 typedef struct StrBuffer StrBuffer;
117 struct StrBuffer {
118 char *z; /* Pointer to buffer containing string */
119 int n; /* Length of z in bytes (excl. nul-term) */
120 int nAlloc; /* Allocated size of buffer z in bytes */
124 /*************************************************************************
125 ** Start of MatchinfoBuffer code.
129 ** Allocate a two-slot MatchinfoBuffer object.
131 static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){
132 MatchinfoBuffer *pRet;
133 int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer);
134 int nStr = (int)strlen(zMatchinfo);
136 pRet = sqlite3_malloc(nByte + nStr+1);
137 if( pRet ){
138 memset(pRet, 0, nByte);
139 pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
140 pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1);
141 pRet->nElem = nElem;
142 pRet->zMatchinfo = ((char*)pRet) + nByte;
143 memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
144 pRet->aRef[0] = 1;
147 return pRet;
150 static void fts3MIBufferFree(void *p){
151 MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
153 assert( (u32*)p==&pBuf->aMatchinfo[1]
154 || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
156 if( (u32*)p==&pBuf->aMatchinfo[1] ){
157 pBuf->aRef[1] = 0;
158 }else{
159 pBuf->aRef[2] = 0;
162 if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
163 sqlite3_free(pBuf);
167 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
168 void (*xRet)(void*) = 0;
169 u32 *aOut = 0;
171 if( p->aRef[1]==0 ){
172 p->aRef[1] = 1;
173 aOut = &p->aMatchinfo[1];
174 xRet = fts3MIBufferFree;
176 else if( p->aRef[2]==0 ){
177 p->aRef[2] = 1;
178 aOut = &p->aMatchinfo[p->nElem+2];
179 xRet = fts3MIBufferFree;
180 }else{
181 aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32));
182 if( aOut ){
183 xRet = sqlite3_free;
184 if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
188 *paOut = aOut;
189 return xRet;
192 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
193 p->bGlobal = 1;
194 memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
198 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
200 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
201 if( p ){
202 assert( p->aRef[0]==1 );
203 p->aRef[0] = 0;
204 if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
205 sqlite3_free(p);
211 ** End of MatchinfoBuffer code.
212 *************************************************************************/
216 ** This function is used to help iterate through a position-list. A position
217 ** list is a list of unique integers, sorted from smallest to largest. Each
218 ** element of the list is represented by an FTS3 varint that takes the value
219 ** of the difference between the current element and the previous one plus
220 ** two. For example, to store the position-list:
222 ** 4 9 113
224 ** the three varints:
226 ** 6 7 106
228 ** are encoded.
230 ** When this function is called, *pp points to the start of an element of
231 ** the list. *piPos contains the value of the previous entry in the list.
232 ** After it returns, *piPos contains the value of the next element of the
233 ** list and *pp is advanced to the following varint.
235 static void fts3GetDeltaPosition(char **pp, int *piPos){
236 int iVal;
237 *pp += fts3GetVarint32(*pp, &iVal);
238 *piPos += (iVal-2);
242 ** Helper function for fts3ExprIterate() (see below).
244 static int fts3ExprIterate2(
245 Fts3Expr *pExpr, /* Expression to iterate phrases of */
246 int *piPhrase, /* Pointer to phrase counter */
247 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
248 void *pCtx /* Second argument to pass to callback */
250 int rc; /* Return code */
251 int eType = pExpr->eType; /* Type of expression node pExpr */
253 if( eType!=FTSQUERY_PHRASE ){
254 assert( pExpr->pLeft && pExpr->pRight );
255 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
256 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
257 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
259 }else{
260 rc = x(pExpr, *piPhrase, pCtx);
261 (*piPhrase)++;
263 return rc;
267 ** Iterate through all phrase nodes in an FTS3 query, except those that
268 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
269 ** For each phrase node found, the supplied callback function is invoked.
271 ** If the callback function returns anything other than SQLITE_OK,
272 ** the iteration is abandoned and the error code returned immediately.
273 ** Otherwise, SQLITE_OK is returned after a callback has been made for
274 ** all eligible phrase nodes.
276 static int fts3ExprIterate(
277 Fts3Expr *pExpr, /* Expression to iterate phrases of */
278 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
279 void *pCtx /* Second argument to pass to callback */
281 int iPhrase = 0; /* Variable used as the phrase counter */
282 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
287 ** This is an fts3ExprIterate() callback used while loading the doclists
288 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
289 ** fts3ExprLoadDoclists().
291 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
292 int rc = SQLITE_OK;
293 Fts3Phrase *pPhrase = pExpr->pPhrase;
294 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
296 UNUSED_PARAMETER(iPhrase);
298 p->nPhrase++;
299 p->nToken += pPhrase->nToken;
301 return rc;
305 ** Load the doclists for each phrase in the query associated with FTS3 cursor
306 ** pCsr.
308 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
309 ** phrases in the expression (all phrases except those directly or
310 ** indirectly descended from the right-hand-side of a NOT operator). If
311 ** pnToken is not NULL, then it is set to the number of tokens in all
312 ** matchable phrases of the expression.
314 static int fts3ExprLoadDoclists(
315 Fts3Cursor *pCsr, /* Fts3 cursor for current query */
316 int *pnPhrase, /* OUT: Number of phrases in query */
317 int *pnToken /* OUT: Number of tokens in query */
319 int rc; /* Return Code */
320 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
321 sCtx.pCsr = pCsr;
322 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
323 if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
324 if( pnToken ) *pnToken = sCtx.nToken;
325 return rc;
328 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
329 (*(int *)ctx)++;
330 pExpr->iPhrase = iPhrase;
331 return SQLITE_OK;
333 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
334 int nPhrase = 0;
335 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
336 return nPhrase;
340 ** Advance the position list iterator specified by the first two
341 ** arguments so that it points to the first element with a value greater
342 ** than or equal to parameter iNext.
344 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
345 char *pIter = *ppIter;
346 if( pIter ){
347 int iIter = *piIter;
349 while( iIter<iNext ){
350 if( 0==(*pIter & 0xFE) ){
351 iIter = -1;
352 pIter = 0;
353 break;
355 fts3GetDeltaPosition(&pIter, &iIter);
358 *piIter = iIter;
359 *ppIter = pIter;
364 ** Advance the snippet iterator to the next candidate snippet.
366 static int fts3SnippetNextCandidate(SnippetIter *pIter){
367 int i; /* Loop counter */
369 if( pIter->iCurrent<0 ){
370 /* The SnippetIter object has just been initialized. The first snippet
371 ** candidate always starts at offset 0 (even if this candidate has a
372 ** score of 0.0).
374 pIter->iCurrent = 0;
376 /* Advance the 'head' iterator of each phrase to the first offset that
377 ** is greater than or equal to (iNext+nSnippet).
379 for(i=0; i<pIter->nPhrase; i++){
380 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
381 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
383 }else{
384 int iStart;
385 int iEnd = 0x7FFFFFFF;
387 for(i=0; i<pIter->nPhrase; i++){
388 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
389 if( pPhrase->pHead && pPhrase->iHead<iEnd ){
390 iEnd = pPhrase->iHead;
393 if( iEnd==0x7FFFFFFF ){
394 return 1;
397 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
398 for(i=0; i<pIter->nPhrase; i++){
399 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
400 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
401 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
405 return 0;
409 ** Retrieve information about the current candidate snippet of snippet
410 ** iterator pIter.
412 static void fts3SnippetDetails(
413 SnippetIter *pIter, /* Snippet iterator */
414 u64 mCovered, /* Bitmask of phrases already covered */
415 int *piToken, /* OUT: First token of proposed snippet */
416 int *piScore, /* OUT: "Score" for this snippet */
417 u64 *pmCover, /* OUT: Bitmask of phrases covered */
418 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
420 int iStart = pIter->iCurrent; /* First token of snippet */
421 int iScore = 0; /* Score of this snippet */
422 int i; /* Loop counter */
423 u64 mCover = 0; /* Mask of phrases covered by this snippet */
424 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
426 for(i=0; i<pIter->nPhrase; i++){
427 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
428 if( pPhrase->pTail ){
429 char *pCsr = pPhrase->pTail;
430 int iCsr = pPhrase->iTail;
432 while( iCsr<(iStart+pIter->nSnippet) ){
433 int j;
434 u64 mPhrase = (u64)1 << i;
435 u64 mPos = (u64)1 << (iCsr - iStart);
436 assert( iCsr>=iStart );
437 if( (mCover|mCovered)&mPhrase ){
438 iScore++;
439 }else{
440 iScore += 1000;
442 mCover |= mPhrase;
444 for(j=0; j<pPhrase->nToken; j++){
445 mHighlight |= (mPos>>j);
448 if( 0==(*pCsr & 0x0FE) ) break;
449 fts3GetDeltaPosition(&pCsr, &iCsr);
454 /* Set the output variables before returning. */
455 *piToken = iStart;
456 *piScore = iScore;
457 *pmCover = mCover;
458 *pmHighlight = mHighlight;
462 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
463 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
465 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
466 SnippetIter *p = (SnippetIter *)ctx;
467 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
468 char *pCsr;
469 int rc;
471 pPhrase->nToken = pExpr->pPhrase->nToken;
472 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
473 assert( rc==SQLITE_OK || pCsr==0 );
474 if( pCsr ){
475 int iFirst = 0;
476 pPhrase->pList = pCsr;
477 fts3GetDeltaPosition(&pCsr, &iFirst);
478 assert( iFirst>=0 );
479 pPhrase->pHead = pCsr;
480 pPhrase->pTail = pCsr;
481 pPhrase->iHead = iFirst;
482 pPhrase->iTail = iFirst;
483 }else{
484 assert( rc!=SQLITE_OK || (
485 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
489 return rc;
493 ** Select the fragment of text consisting of nFragment contiguous tokens
494 ** from column iCol that represent the "best" snippet. The best snippet
495 ** is the snippet with the highest score, where scores are calculated
496 ** by adding:
498 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
500 ** (b) +1000 points for the first occurrence of each matchable phrase in
501 ** the snippet for which the corresponding mCovered bit is not set.
503 ** The selected snippet parameters are stored in structure *pFragment before
504 ** returning. The score of the selected snippet is stored in *piScore
505 ** before returning.
507 static int fts3BestSnippet(
508 int nSnippet, /* Desired snippet length */
509 Fts3Cursor *pCsr, /* Cursor to create snippet for */
510 int iCol, /* Index of column to create snippet from */
511 u64 mCovered, /* Mask of phrases already covered */
512 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
513 SnippetFragment *pFragment, /* OUT: Best snippet found */
514 int *piScore /* OUT: Score of snippet pFragment */
516 int rc; /* Return Code */
517 int nList; /* Number of phrases in expression */
518 SnippetIter sIter; /* Iterates through snippet candidates */
519 int nByte; /* Number of bytes of space to allocate */
520 int iBestScore = -1; /* Best snippet score found so far */
521 int i; /* Loop counter */
523 memset(&sIter, 0, sizeof(sIter));
525 /* Iterate through the phrases in the expression to count them. The same
526 ** callback makes sure the doclists are loaded for each phrase.
528 rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
529 if( rc!=SQLITE_OK ){
530 return rc;
533 /* Now that it is known how many phrases there are, allocate and zero
534 ** the required space using malloc().
536 nByte = sizeof(SnippetPhrase) * nList;
537 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
538 if( !sIter.aPhrase ){
539 return SQLITE_NOMEM;
541 memset(sIter.aPhrase, 0, nByte);
543 /* Initialize the contents of the SnippetIter object. Then iterate through
544 ** the set of phrases in the expression to populate the aPhrase[] array.
546 sIter.pCsr = pCsr;
547 sIter.iCol = iCol;
548 sIter.nSnippet = nSnippet;
549 sIter.nPhrase = nList;
550 sIter.iCurrent = -1;
551 rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
552 if( rc==SQLITE_OK ){
554 /* Set the *pmSeen output variable. */
555 for(i=0; i<nList; i++){
556 if( sIter.aPhrase[i].pHead ){
557 *pmSeen |= (u64)1 << i;
561 /* Loop through all candidate snippets. Store the best snippet in
562 ** *pFragment. Store its associated 'score' in iBestScore.
564 pFragment->iCol = iCol;
565 while( !fts3SnippetNextCandidate(&sIter) ){
566 int iPos;
567 int iScore;
568 u64 mCover;
569 u64 mHighlite;
570 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
571 assert( iScore>=0 );
572 if( iScore>iBestScore ){
573 pFragment->iPos = iPos;
574 pFragment->hlmask = mHighlite;
575 pFragment->covered = mCover;
576 iBestScore = iScore;
580 *piScore = iBestScore;
582 sqlite3_free(sIter.aPhrase);
583 return rc;
588 ** Append a string to the string-buffer passed as the first argument.
590 ** If nAppend is negative, then the length of the string zAppend is
591 ** determined using strlen().
593 static int fts3StringAppend(
594 StrBuffer *pStr, /* Buffer to append to */
595 const char *zAppend, /* Pointer to data to append to buffer */
596 int nAppend /* Size of zAppend in bytes (or -1) */
598 if( nAppend<0 ){
599 nAppend = (int)strlen(zAppend);
602 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
603 ** to grow the buffer until so that it is big enough to accomadate the
604 ** appended data.
606 if( pStr->n+nAppend+1>=pStr->nAlloc ){
607 int nAlloc = pStr->nAlloc+nAppend+100;
608 char *zNew = sqlite3_realloc(pStr->z, nAlloc);
609 if( !zNew ){
610 return SQLITE_NOMEM;
612 pStr->z = zNew;
613 pStr->nAlloc = nAlloc;
615 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
617 /* Append the data to the string buffer. */
618 memcpy(&pStr->z[pStr->n], zAppend, nAppend);
619 pStr->n += nAppend;
620 pStr->z[pStr->n] = '\0';
622 return SQLITE_OK;
626 ** The fts3BestSnippet() function often selects snippets that end with a
627 ** query term. That is, the final term of the snippet is always a term
628 ** that requires highlighting. For example, if 'X' is a highlighted term
629 ** and '.' is a non-highlighted term, BestSnippet() may select:
631 ** ........X.....X
633 ** This function "shifts" the beginning of the snippet forward in the
634 ** document so that there are approximately the same number of
635 ** non-highlighted terms to the right of the final highlighted term as there
636 ** are to the left of the first highlighted term. For example, to this:
638 ** ....X.....X....
640 ** This is done as part of extracting the snippet text, not when selecting
641 ** the snippet. Snippet selection is done based on doclists only, so there
642 ** is no way for fts3BestSnippet() to know whether or not the document
643 ** actually contains terms that follow the final highlighted term.
645 static int fts3SnippetShift(
646 Fts3Table *pTab, /* FTS3 table snippet comes from */
647 int iLangid, /* Language id to use in tokenizing */
648 int nSnippet, /* Number of tokens desired for snippet */
649 const char *zDoc, /* Document text to extract snippet from */
650 int nDoc, /* Size of buffer zDoc in bytes */
651 int *piPos, /* IN/OUT: First token of snippet */
652 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
654 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
656 if( hlmask ){
657 int nLeft; /* Tokens to the left of first highlight */
658 int nRight; /* Tokens to the right of last highlight */
659 int nDesired; /* Ideal number of tokens to shift forward */
661 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
662 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
663 nDesired = (nLeft-nRight)/2;
665 /* Ideally, the start of the snippet should be pushed forward in the
666 ** document nDesired tokens. This block checks if there are actually
667 ** nDesired tokens to the right of the snippet. If so, *piPos and
668 ** *pHlMask are updated to shift the snippet nDesired tokens to the
669 ** right. Otherwise, the snippet is shifted by the number of tokens
670 ** available.
672 if( nDesired>0 ){
673 int nShift; /* Number of tokens to shift snippet by */
674 int iCurrent = 0; /* Token counter */
675 int rc; /* Return Code */
676 sqlite3_tokenizer_module *pMod;
677 sqlite3_tokenizer_cursor *pC;
678 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
680 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
681 ** or more tokens in zDoc/nDoc.
683 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
684 if( rc!=SQLITE_OK ){
685 return rc;
687 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
688 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
689 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
691 pMod->xClose(pC);
692 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
694 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
695 assert( nShift<=nDesired );
696 if( nShift>0 ){
697 *piPos += nShift;
698 *pHlmask = hlmask >> nShift;
702 return SQLITE_OK;
706 ** Extract the snippet text for fragment pFragment from cursor pCsr and
707 ** append it to string buffer pOut.
709 static int fts3SnippetText(
710 Fts3Cursor *pCsr, /* FTS3 Cursor */
711 SnippetFragment *pFragment, /* Snippet to extract */
712 int iFragment, /* Fragment number */
713 int isLast, /* True for final fragment in snippet */
714 int nSnippet, /* Number of tokens in extracted snippet */
715 const char *zOpen, /* String inserted before highlighted term */
716 const char *zClose, /* String inserted after highlighted term */
717 const char *zEllipsis, /* String inserted between snippets */
718 StrBuffer *pOut /* Write output here */
720 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
721 int rc; /* Return code */
722 const char *zDoc; /* Document text to extract snippet from */
723 int nDoc; /* Size of zDoc in bytes */
724 int iCurrent = 0; /* Current token number of document */
725 int iEnd = 0; /* Byte offset of end of current token */
726 int isShiftDone = 0; /* True after snippet is shifted */
727 int iPos = pFragment->iPos; /* First token of snippet */
728 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
729 int iCol = pFragment->iCol+1; /* Query column to extract text from */
730 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
731 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
733 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
734 if( zDoc==0 ){
735 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
736 return SQLITE_NOMEM;
738 return SQLITE_OK;
740 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
742 /* Open a token cursor on the document. */
743 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
744 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
745 if( rc!=SQLITE_OK ){
746 return rc;
749 while( rc==SQLITE_OK ){
750 const char *ZDUMMY; /* Dummy argument used with tokenizer */
751 int DUMMY1 = -1; /* Dummy argument used with tokenizer */
752 int iBegin = 0; /* Offset in zDoc of start of token */
753 int iFin = 0; /* Offset in zDoc of end of token */
754 int isHighlight = 0; /* True for highlighted terms */
756 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
757 ** in the FTS code the variable that the third argument to xNext points to
758 ** is initialized to zero before the first (*but not necessarily
759 ** subsequent*) call to xNext(). This is done for a particular application
760 ** that needs to know whether or not the tokenizer is being used for
761 ** snippet generation or for some other purpose.
763 ** Extreme care is required when writing code to depend on this
764 ** initialization. It is not a documented part of the tokenizer interface.
765 ** If a tokenizer is used directly by any code outside of FTS, this
766 ** convention might not be respected. */
767 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
768 if( rc!=SQLITE_OK ){
769 if( rc==SQLITE_DONE ){
770 /* Special case - the last token of the snippet is also the last token
771 ** of the column. Append any punctuation that occurred between the end
772 ** of the previous token and the end of the document to the output.
773 ** Then break out of the loop. */
774 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
776 break;
778 if( iCurrent<iPos ){ continue; }
780 if( !isShiftDone ){
781 int n = nDoc - iBegin;
782 rc = fts3SnippetShift(
783 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
785 isShiftDone = 1;
787 /* Now that the shift has been done, check if the initial "..." are
788 ** required. They are required if (a) this is not the first fragment,
789 ** or (b) this fragment does not begin at position 0 of its column.
791 if( rc==SQLITE_OK ){
792 if( iPos>0 || iFragment>0 ){
793 rc = fts3StringAppend(pOut, zEllipsis, -1);
794 }else if( iBegin ){
795 rc = fts3StringAppend(pOut, zDoc, iBegin);
798 if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
801 if( iCurrent>=(iPos+nSnippet) ){
802 if( isLast ){
803 rc = fts3StringAppend(pOut, zEllipsis, -1);
805 break;
808 /* Set isHighlight to true if this term should be highlighted. */
809 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
811 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
812 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
813 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
814 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
816 iEnd = iFin;
819 pMod->xClose(pC);
820 return rc;
825 ** This function is used to count the entries in a column-list (a
826 ** delta-encoded list of term offsets within a single column of a single
827 ** row). When this function is called, *ppCollist should point to the
828 ** beginning of the first varint in the column-list (the varint that
829 ** contains the position of the first matching term in the column data).
830 ** Before returning, *ppCollist is set to point to the first byte after
831 ** the last varint in the column-list (either the 0x00 signifying the end
832 ** of the position-list, or the 0x01 that precedes the column number of
833 ** the next column in the position-list).
835 ** The number of elements in the column-list is returned.
837 static int fts3ColumnlistCount(char **ppCollist){
838 char *pEnd = *ppCollist;
839 char c = 0;
840 int nEntry = 0;
842 /* A column-list is terminated by either a 0x01 or 0x00. */
843 while( 0xFE & (*pEnd | c) ){
844 c = *pEnd++ & 0x80;
845 if( !c ) nEntry++;
848 *ppCollist = pEnd;
849 return nEntry;
853 ** This function gathers 'y' or 'b' data for a single phrase.
855 static void fts3ExprLHits(
856 Fts3Expr *pExpr, /* Phrase expression node */
857 MatchInfo *p /* Matchinfo context */
859 Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
860 int iStart;
861 Fts3Phrase *pPhrase = pExpr->pPhrase;
862 char *pIter = pPhrase->doclist.pList;
863 int iCol = 0;
865 assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
866 if( p->flag==FTS3_MATCHINFO_LHITS ){
867 iStart = pExpr->iPhrase * p->nCol;
868 }else{
869 iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
872 while( 1 ){
873 int nHit = fts3ColumnlistCount(&pIter);
874 if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
875 if( p->flag==FTS3_MATCHINFO_LHITS ){
876 p->aMatchinfo[iStart + iCol] = (u32)nHit;
877 }else if( nHit ){
878 p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
881 assert( *pIter==0x00 || *pIter==0x01 );
882 if( *pIter!=0x01 ) break;
883 pIter++;
884 pIter += fts3GetVarint32(pIter, &iCol);
889 ** Gather the results for matchinfo directives 'y' and 'b'.
891 static void fts3ExprLHitGather(
892 Fts3Expr *pExpr,
893 MatchInfo *p
895 assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
896 if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
897 if( pExpr->pLeft ){
898 fts3ExprLHitGather(pExpr->pLeft, p);
899 fts3ExprLHitGather(pExpr->pRight, p);
900 }else{
901 fts3ExprLHits(pExpr, p);
907 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
908 ** for a single query.
910 ** fts3ExprIterate() callback to load the 'global' elements of a
911 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
912 ** of the matchinfo array that are constant for all rows returned by the
913 ** current query.
915 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
916 ** function populates Matchinfo.aMatchinfo[] as follows:
918 ** for(iCol=0; iCol<nCol; iCol++){
919 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
920 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
921 ** }
923 ** where X is the number of matches for phrase iPhrase is column iCol of all
924 ** rows of the table. Y is the number of rows for which column iCol contains
925 ** at least one instance of phrase iPhrase.
927 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
928 ** Y values are set to nDoc, where nDoc is the number of documents in the
929 ** file system. This is done because the full-text index doclist is required
930 ** to calculate these values properly, and the full-text index doclist is
931 ** not available for deferred tokens.
933 static int fts3ExprGlobalHitsCb(
934 Fts3Expr *pExpr, /* Phrase expression node */
935 int iPhrase, /* Phrase number (numbered from zero) */
936 void *pCtx /* Pointer to MatchInfo structure */
938 MatchInfo *p = (MatchInfo *)pCtx;
939 return sqlite3Fts3EvalPhraseStats(
940 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
945 ** fts3ExprIterate() callback used to collect the "local" part of the
946 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
947 ** array that are different for each row returned by the query.
949 static int fts3ExprLocalHitsCb(
950 Fts3Expr *pExpr, /* Phrase expression node */
951 int iPhrase, /* Phrase number */
952 void *pCtx /* Pointer to MatchInfo structure */
954 int rc = SQLITE_OK;
955 MatchInfo *p = (MatchInfo *)pCtx;
956 int iStart = iPhrase * p->nCol * 3;
957 int i;
959 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
960 char *pCsr;
961 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
962 if( pCsr ){
963 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
964 }else{
965 p->aMatchinfo[iStart+i*3] = 0;
969 return rc;
972 static int fts3MatchinfoCheck(
973 Fts3Table *pTab,
974 char cArg,
975 char **pzErr
977 if( (cArg==FTS3_MATCHINFO_NPHRASE)
978 || (cArg==FTS3_MATCHINFO_NCOL)
979 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
980 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
981 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
982 || (cArg==FTS3_MATCHINFO_LCS)
983 || (cArg==FTS3_MATCHINFO_HITS)
984 || (cArg==FTS3_MATCHINFO_LHITS)
985 || (cArg==FTS3_MATCHINFO_LHITS_BM)
987 return SQLITE_OK;
989 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
990 return SQLITE_ERROR;
993 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
994 int nVal; /* Number of integers output by cArg */
996 switch( cArg ){
997 case FTS3_MATCHINFO_NDOC:
998 case FTS3_MATCHINFO_NPHRASE:
999 case FTS3_MATCHINFO_NCOL:
1000 nVal = 1;
1001 break;
1003 case FTS3_MATCHINFO_AVGLENGTH:
1004 case FTS3_MATCHINFO_LENGTH:
1005 case FTS3_MATCHINFO_LCS:
1006 nVal = pInfo->nCol;
1007 break;
1009 case FTS3_MATCHINFO_LHITS:
1010 nVal = pInfo->nCol * pInfo->nPhrase;
1011 break;
1013 case FTS3_MATCHINFO_LHITS_BM:
1014 nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
1015 break;
1017 default:
1018 assert( cArg==FTS3_MATCHINFO_HITS );
1019 nVal = pInfo->nCol * pInfo->nPhrase * 3;
1020 break;
1023 return nVal;
1026 static int fts3MatchinfoSelectDoctotal(
1027 Fts3Table *pTab,
1028 sqlite3_stmt **ppStmt,
1029 sqlite3_int64 *pnDoc,
1030 const char **paLen
1032 sqlite3_stmt *pStmt;
1033 const char *a;
1034 sqlite3_int64 nDoc;
1036 if( !*ppStmt ){
1037 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
1038 if( rc!=SQLITE_OK ) return rc;
1040 pStmt = *ppStmt;
1041 assert( sqlite3_data_count(pStmt)==1 );
1043 a = sqlite3_column_blob(pStmt, 0);
1044 a += sqlite3Fts3GetVarint(a, &nDoc);
1045 if( nDoc==0 ) return FTS_CORRUPT_VTAB;
1046 *pnDoc = (u32)nDoc;
1048 if( paLen ) *paLen = a;
1049 return SQLITE_OK;
1053 ** An instance of the following structure is used to store state while
1054 ** iterating through a multi-column position-list corresponding to the
1055 ** hits for a single phrase on a single row in order to calculate the
1056 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1058 typedef struct LcsIterator LcsIterator;
1059 struct LcsIterator {
1060 Fts3Expr *pExpr; /* Pointer to phrase expression */
1061 int iPosOffset; /* Tokens count up to end of this phrase */
1062 char *pRead; /* Cursor used to iterate through aDoclist */
1063 int iPos; /* Current position */
1067 ** If LcsIterator.iCol is set to the following value, the iterator has
1068 ** finished iterating through all offsets for all columns.
1070 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1072 static int fts3MatchinfoLcsCb(
1073 Fts3Expr *pExpr, /* Phrase expression node */
1074 int iPhrase, /* Phrase number (numbered from zero) */
1075 void *pCtx /* Pointer to MatchInfo structure */
1077 LcsIterator *aIter = (LcsIterator *)pCtx;
1078 aIter[iPhrase].pExpr = pExpr;
1079 return SQLITE_OK;
1083 ** Advance the iterator passed as an argument to the next position. Return
1084 ** 1 if the iterator is at EOF or if it now points to the start of the
1085 ** position list for the next column.
1087 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1088 char *pRead = pIter->pRead;
1089 sqlite3_int64 iRead;
1090 int rc = 0;
1092 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1093 if( iRead==0 || iRead==1 ){
1094 pRead = 0;
1095 rc = 1;
1096 }else{
1097 pIter->iPos += (int)(iRead-2);
1100 pIter->pRead = pRead;
1101 return rc;
1105 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1107 ** If the call is successful, the longest-common-substring lengths for each
1108 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1109 ** array before returning. SQLITE_OK is returned in this case.
1111 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1112 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1113 ** undefined.
1115 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
1116 LcsIterator *aIter;
1117 int i;
1118 int iCol;
1119 int nToken = 0;
1121 /* Allocate and populate the array of LcsIterator objects. The array
1122 ** contains one element for each matchable phrase in the query.
1124 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
1125 if( !aIter ) return SQLITE_NOMEM;
1126 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
1127 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
1129 for(i=0; i<pInfo->nPhrase; i++){
1130 LcsIterator *pIter = &aIter[i];
1131 nToken -= pIter->pExpr->pPhrase->nToken;
1132 pIter->iPosOffset = nToken;
1135 for(iCol=0; iCol<pInfo->nCol; iCol++){
1136 int nLcs = 0; /* LCS value for this column */
1137 int nLive = 0; /* Number of iterators in aIter not at EOF */
1139 for(i=0; i<pInfo->nPhrase; i++){
1140 int rc;
1141 LcsIterator *pIt = &aIter[i];
1142 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
1143 if( rc!=SQLITE_OK ) return rc;
1144 if( pIt->pRead ){
1145 pIt->iPos = pIt->iPosOffset;
1146 fts3LcsIteratorAdvance(&aIter[i]);
1147 nLive++;
1151 while( nLive>0 ){
1152 LcsIterator *pAdv = 0; /* The iterator to advance by one position */
1153 int nThisLcs = 0; /* LCS for the current iterator positions */
1155 for(i=0; i<pInfo->nPhrase; i++){
1156 LcsIterator *pIter = &aIter[i];
1157 if( pIter->pRead==0 ){
1158 /* This iterator is already at EOF for this column. */
1159 nThisLcs = 0;
1160 }else{
1161 if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1162 pAdv = pIter;
1164 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1165 nThisLcs++;
1166 }else{
1167 nThisLcs = 1;
1169 if( nThisLcs>nLcs ) nLcs = nThisLcs;
1172 if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
1175 pInfo->aMatchinfo[iCol] = nLcs;
1178 sqlite3_free(aIter);
1179 return SQLITE_OK;
1183 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1184 ** be returned by the matchinfo() function. Argument zArg contains the
1185 ** format string passed as the second argument to matchinfo (or the
1186 ** default value "pcx" if no second argument was specified). The format
1187 ** string has already been validated and the pInfo->aMatchinfo[] array
1188 ** is guaranteed to be large enough for the output.
1190 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1191 ** If it is false, then assume that those fields that do not change between
1192 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1193 ** have already been populated.
1195 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1196 ** occurs. If a value other than SQLITE_OK is returned, the state the
1197 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1199 static int fts3MatchinfoValues(
1200 Fts3Cursor *pCsr, /* FTS3 cursor object */
1201 int bGlobal, /* True to grab the global stats */
1202 MatchInfo *pInfo, /* Matchinfo context object */
1203 const char *zArg /* Matchinfo format string */
1205 int rc = SQLITE_OK;
1206 int i;
1207 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1208 sqlite3_stmt *pSelect = 0;
1210 for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1211 pInfo->flag = zArg[i];
1212 switch( zArg[i] ){
1213 case FTS3_MATCHINFO_NPHRASE:
1214 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1215 break;
1217 case FTS3_MATCHINFO_NCOL:
1218 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1219 break;
1221 case FTS3_MATCHINFO_NDOC:
1222 if( bGlobal ){
1223 sqlite3_int64 nDoc = 0;
1224 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
1225 pInfo->aMatchinfo[0] = (u32)nDoc;
1227 break;
1229 case FTS3_MATCHINFO_AVGLENGTH:
1230 if( bGlobal ){
1231 sqlite3_int64 nDoc; /* Number of rows in table */
1232 const char *a; /* Aggregate column length array */
1234 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
1235 if( rc==SQLITE_OK ){
1236 int iCol;
1237 for(iCol=0; iCol<pInfo->nCol; iCol++){
1238 u32 iVal;
1239 sqlite3_int64 nToken;
1240 a += sqlite3Fts3GetVarint(a, &nToken);
1241 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1242 pInfo->aMatchinfo[iCol] = iVal;
1246 break;
1248 case FTS3_MATCHINFO_LENGTH: {
1249 sqlite3_stmt *pSelectDocsize = 0;
1250 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1251 if( rc==SQLITE_OK ){
1252 int iCol;
1253 const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1254 for(iCol=0; iCol<pInfo->nCol; iCol++){
1255 sqlite3_int64 nToken;
1256 a += sqlite3Fts3GetVarint(a, &nToken);
1257 pInfo->aMatchinfo[iCol] = (u32)nToken;
1260 sqlite3_reset(pSelectDocsize);
1261 break;
1264 case FTS3_MATCHINFO_LCS:
1265 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1266 if( rc==SQLITE_OK ){
1267 rc = fts3MatchinfoLcs(pCsr, pInfo);
1269 break;
1271 case FTS3_MATCHINFO_LHITS_BM:
1272 case FTS3_MATCHINFO_LHITS: {
1273 int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
1274 memset(pInfo->aMatchinfo, 0, nZero);
1275 fts3ExprLHitGather(pCsr->pExpr, pInfo);
1276 break;
1279 default: {
1280 Fts3Expr *pExpr;
1281 assert( zArg[i]==FTS3_MATCHINFO_HITS );
1282 pExpr = pCsr->pExpr;
1283 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1284 if( rc!=SQLITE_OK ) break;
1285 if( bGlobal ){
1286 if( pCsr->pDeferred ){
1287 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
1288 if( rc!=SQLITE_OK ) break;
1290 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1291 sqlite3Fts3EvalTestDeferred(pCsr, &rc);
1292 if( rc!=SQLITE_OK ) break;
1294 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1295 break;
1299 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1302 sqlite3_reset(pSelect);
1303 return rc;
1308 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1309 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1311 static void fts3GetMatchinfo(
1312 sqlite3_context *pCtx, /* Return results here */
1313 Fts3Cursor *pCsr, /* FTS3 Cursor object */
1314 const char *zArg /* Second argument to matchinfo() function */
1316 MatchInfo sInfo;
1317 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1318 int rc = SQLITE_OK;
1319 int bGlobal = 0; /* Collect 'global' stats as well as local */
1321 u32 *aOut = 0;
1322 void (*xDestroyOut)(void*) = 0;
1324 memset(&sInfo, 0, sizeof(MatchInfo));
1325 sInfo.pCursor = pCsr;
1326 sInfo.nCol = pTab->nColumn;
1328 /* If there is cached matchinfo() data, but the format string for the
1329 ** cache does not match the format string for this request, discard
1330 ** the cached data. */
1331 if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
1332 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
1333 pCsr->pMIBuffer = 0;
1336 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1337 ** matchinfo function has been called for this query. In this case
1338 ** allocate the array used to accumulate the matchinfo data and
1339 ** initialize those elements that are constant for every row.
1341 if( pCsr->pMIBuffer==0 ){
1342 int nMatchinfo = 0; /* Number of u32 elements in match-info */
1343 int i; /* Used to iterate through zArg */
1345 /* Determine the number of phrases in the query */
1346 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1347 sInfo.nPhrase = pCsr->nPhrase;
1349 /* Determine the number of integers in the buffer returned by this call. */
1350 for(i=0; zArg[i]; i++){
1351 char *zErr = 0;
1352 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1353 sqlite3_result_error(pCtx, zErr, -1);
1354 sqlite3_free(zErr);
1355 return;
1357 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1360 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1361 pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
1362 if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
1364 pCsr->isMatchinfoNeeded = 1;
1365 bGlobal = 1;
1368 if( rc==SQLITE_OK ){
1369 xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
1370 if( xDestroyOut==0 ){
1371 rc = SQLITE_NOMEM;
1375 if( rc==SQLITE_OK ){
1376 sInfo.aMatchinfo = aOut;
1377 sInfo.nPhrase = pCsr->nPhrase;
1378 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1379 if( bGlobal ){
1380 fts3MIBufferSetGlobal(pCsr->pMIBuffer);
1384 if( rc!=SQLITE_OK ){
1385 sqlite3_result_error_code(pCtx, rc);
1386 if( xDestroyOut ) xDestroyOut(aOut);
1387 }else{
1388 int n = pCsr->pMIBuffer->nElem * sizeof(u32);
1389 sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
1394 ** Implementation of snippet() function.
1396 void sqlite3Fts3Snippet(
1397 sqlite3_context *pCtx, /* SQLite function call context */
1398 Fts3Cursor *pCsr, /* Cursor object */
1399 const char *zStart, /* Snippet start text - "<b>" */
1400 const char *zEnd, /* Snippet end text - "</b>" */
1401 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
1402 int iCol, /* Extract snippet from this column */
1403 int nToken /* Approximate number of tokens in snippet */
1405 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1406 int rc = SQLITE_OK;
1407 int i;
1408 StrBuffer res = {0, 0, 0};
1410 /* The returned text includes up to four fragments of text extracted from
1411 ** the data in the current row. The first iteration of the for(...) loop
1412 ** below attempts to locate a single fragment of text nToken tokens in
1413 ** size that contains at least one instance of all phrases in the query
1414 ** expression that appear in the current row. If such a fragment of text
1415 ** cannot be found, the second iteration of the loop attempts to locate
1416 ** a pair of fragments, and so on.
1418 int nSnippet = 0; /* Number of fragments in this snippet */
1419 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
1420 int nFToken = -1; /* Number of tokens in each fragment */
1422 if( !pCsr->pExpr ){
1423 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1424 return;
1427 for(nSnippet=1; 1; nSnippet++){
1429 int iSnip; /* Loop counter 0..nSnippet-1 */
1430 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
1431 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
1433 if( nToken>=0 ){
1434 nFToken = (nToken+nSnippet-1) / nSnippet;
1435 }else{
1436 nFToken = -1 * nToken;
1439 for(iSnip=0; iSnip<nSnippet; iSnip++){
1440 int iBestScore = -1; /* Best score of columns checked so far */
1441 int iRead; /* Used to iterate through columns */
1442 SnippetFragment *pFragment = &aSnippet[iSnip];
1444 memset(pFragment, 0, sizeof(*pFragment));
1446 /* Loop through all columns of the table being considered for snippets.
1447 ** If the iCol argument to this function was negative, this means all
1448 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1450 for(iRead=0; iRead<pTab->nColumn; iRead++){
1451 SnippetFragment sF = {0, 0, 0, 0};
1452 int iS = 0;
1453 if( iCol>=0 && iRead!=iCol ) continue;
1455 /* Find the best snippet of nFToken tokens in column iRead. */
1456 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1457 if( rc!=SQLITE_OK ){
1458 goto snippet_out;
1460 if( iS>iBestScore ){
1461 *pFragment = sF;
1462 iBestScore = iS;
1466 mCovered |= pFragment->covered;
1469 /* If all query phrases seen by fts3BestSnippet() are present in at least
1470 ** one of the nSnippet snippet fragments, break out of the loop.
1472 assert( (mCovered&mSeen)==mCovered );
1473 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1476 assert( nFToken>0 );
1478 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1479 rc = fts3SnippetText(pCsr, &aSnippet[i],
1480 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1484 snippet_out:
1485 sqlite3Fts3SegmentsClose(pTab);
1486 if( rc!=SQLITE_OK ){
1487 sqlite3_result_error_code(pCtx, rc);
1488 sqlite3_free(res.z);
1489 }else{
1490 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1495 typedef struct TermOffset TermOffset;
1496 typedef struct TermOffsetCtx TermOffsetCtx;
1498 struct TermOffset {
1499 char *pList; /* Position-list */
1500 int iPos; /* Position just read from pList */
1501 int iOff; /* Offset of this term from read positions */
1504 struct TermOffsetCtx {
1505 Fts3Cursor *pCsr;
1506 int iCol; /* Column of table to populate aTerm for */
1507 int iTerm;
1508 sqlite3_int64 iDocid;
1509 TermOffset *aTerm;
1513 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1515 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1516 TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1517 int nTerm; /* Number of tokens in phrase */
1518 int iTerm; /* For looping through nTerm phrase terms */
1519 char *pList; /* Pointer to position list for phrase */
1520 int iPos = 0; /* First position in position-list */
1521 int rc;
1523 UNUSED_PARAMETER(iPhrase);
1524 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1525 nTerm = pExpr->pPhrase->nToken;
1526 if( pList ){
1527 fts3GetDeltaPosition(&pList, &iPos);
1528 assert( iPos>=0 );
1531 for(iTerm=0; iTerm<nTerm; iTerm++){
1532 TermOffset *pT = &p->aTerm[p->iTerm++];
1533 pT->iOff = nTerm-iTerm-1;
1534 pT->pList = pList;
1535 pT->iPos = iPos;
1538 return rc;
1542 ** Implementation of offsets() function.
1544 void sqlite3Fts3Offsets(
1545 sqlite3_context *pCtx, /* SQLite function call context */
1546 Fts3Cursor *pCsr /* Cursor object */
1548 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1549 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1550 int rc; /* Return Code */
1551 int nToken; /* Number of tokens in query */
1552 int iCol; /* Column currently being processed */
1553 StrBuffer res = {0, 0, 0}; /* Result string */
1554 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
1556 if( !pCsr->pExpr ){
1557 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1558 return;
1561 memset(&sCtx, 0, sizeof(sCtx));
1562 assert( pCsr->isRequireSeek==0 );
1564 /* Count the number of terms in the query */
1565 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1566 if( rc!=SQLITE_OK ) goto offsets_out;
1568 /* Allocate the array of TermOffset iterators. */
1569 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
1570 if( 0==sCtx.aTerm ){
1571 rc = SQLITE_NOMEM;
1572 goto offsets_out;
1574 sCtx.iDocid = pCsr->iPrevId;
1575 sCtx.pCsr = pCsr;
1577 /* Loop through the table columns, appending offset information to
1578 ** string-buffer res for each column.
1580 for(iCol=0; iCol<pTab->nColumn; iCol++){
1581 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1582 const char *ZDUMMY; /* Dummy argument used with xNext() */
1583 int NDUMMY = 0; /* Dummy argument used with xNext() */
1584 int iStart = 0;
1585 int iEnd = 0;
1586 int iCurrent = 0;
1587 const char *zDoc;
1588 int nDoc;
1590 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1591 ** no way that this operation can fail, so the return code from
1592 ** fts3ExprIterate() can be discarded.
1594 sCtx.iCol = iCol;
1595 sCtx.iTerm = 0;
1596 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
1598 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1599 ** in column iCol, jump immediately to the next iteration of the loop.
1600 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1601 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1602 ** to the caller.
1604 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1605 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1606 if( zDoc==0 ){
1607 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1608 continue;
1610 rc = SQLITE_NOMEM;
1611 goto offsets_out;
1614 /* Initialize a tokenizer iterator to iterate through column iCol. */
1615 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1616 zDoc, nDoc, &pC
1618 if( rc!=SQLITE_OK ) goto offsets_out;
1620 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1621 while( rc==SQLITE_OK ){
1622 int i; /* Used to loop through terms */
1623 int iMinPos = 0x7FFFFFFF; /* Position of next token */
1624 TermOffset *pTerm = 0; /* TermOffset associated with next token */
1626 for(i=0; i<nToken; i++){
1627 TermOffset *pT = &sCtx.aTerm[i];
1628 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1629 iMinPos = pT->iPos-pT->iOff;
1630 pTerm = pT;
1634 if( !pTerm ){
1635 /* All offsets for this column have been gathered. */
1636 rc = SQLITE_DONE;
1637 }else{
1638 assert( iCurrent<=iMinPos );
1639 if( 0==(0xFE&*pTerm->pList) ){
1640 pTerm->pList = 0;
1641 }else{
1642 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1644 while( rc==SQLITE_OK && iCurrent<iMinPos ){
1645 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1647 if( rc==SQLITE_OK ){
1648 char aBuffer[64];
1649 sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1650 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1652 rc = fts3StringAppend(&res, aBuffer, -1);
1653 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1654 rc = FTS_CORRUPT_VTAB;
1658 if( rc==SQLITE_DONE ){
1659 rc = SQLITE_OK;
1662 pMod->xClose(pC);
1663 if( rc!=SQLITE_OK ) goto offsets_out;
1666 offsets_out:
1667 sqlite3_free(sCtx.aTerm);
1668 assert( rc!=SQLITE_DONE );
1669 sqlite3Fts3SegmentsClose(pTab);
1670 if( rc!=SQLITE_OK ){
1671 sqlite3_result_error_code(pCtx, rc);
1672 sqlite3_free(res.z);
1673 }else{
1674 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1676 return;
1680 ** Implementation of matchinfo() function.
1682 void sqlite3Fts3Matchinfo(
1683 sqlite3_context *pContext, /* Function call context */
1684 Fts3Cursor *pCsr, /* FTS3 table cursor */
1685 const char *zArg /* Second arg to matchinfo() function */
1687 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1688 const char *zFormat;
1690 if( zArg ){
1691 zFormat = zArg;
1692 }else{
1693 zFormat = FTS3_MATCHINFO_DEFAULT;
1696 if( !pCsr->pExpr ){
1697 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1698 return;
1699 }else{
1700 /* Retrieve matchinfo() data. */
1701 fts3GetMatchinfo(pContext, pCsr, zFormat);
1702 sqlite3Fts3SegmentsClose(pTab);
1706 #endif