4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains code for the "fts5tokenize" virtual table module.
14 ** An fts5tokenize virtual table is created as follows:
16 ** CREATE VIRTUAL TABLE <tbl> USING fts5tokenize(
17 ** <tokenizer-name>, <arg-1>, ...
20 ** The table created has the following schema:
22 ** CREATE TABLE <tbl>(input HIDDEN, token, start, end, position)
24 ** When queried, the query must include a WHERE clause of type:
28 ** The virtual table module tokenizes this <string>, using the FTS3
29 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
30 ** statement and returns one row for each token in the result. With
31 ** fields set as follows:
33 ** input: Always set to a copy of <string>
34 ** token: A token from the input.
35 ** start: Byte offset of the token within the input <string>.
36 ** end: Byte offset of the byte immediately following the end of the
37 ** token within the input string.
38 ** pos: Token offset of token within input.
41 #if defined(SQLITE_TEST) && defined(SQLITE_ENABLE_FTS5)
47 typedef struct Fts5tokTable Fts5tokTable
;
48 typedef struct Fts5tokCursor Fts5tokCursor
;
49 typedef struct Fts5tokRow Fts5tokRow
;
52 ** Virtual table structure.
55 sqlite3_vtab base
; /* Base class used by SQLite core */
56 fts5_tokenizer tok
; /* Tokenizer functions */
57 Fts5Tokenizer
*pTok
; /* Tokenizer instance */
61 ** A container for a rows values.
71 ** Virtual table cursor structure.
73 struct Fts5tokCursor
{
74 sqlite3_vtab_cursor base
; /* Base class used by SQLite core */
75 int iRowid
; /* Current 'rowid' value */
76 char *zInput
; /* Input string */
77 int nRow
; /* Number of entries in aRow[] */
78 Fts5tokRow
*aRow
; /* Array of rows to return */
81 static void fts5tokDequote(char *z
){
84 if( q
=='[' || q
=='\'' || q
=='"' || q
=='`' ){
92 /* Character iIn was the close quote. */
96 /* Character iIn and iIn+1 form an escaped quote character. Skip
97 ** the input cursor past both and copy a single quote character
98 ** to the output buffer. */
103 z
[iOut
++] = z
[iIn
++];
112 ** The second argument, argv[], is an array of pointers to nul-terminated
113 ** strings. This function makes a copy of the array and strings into a
114 ** single block of memory. It then dequotes any of the strings that appear
117 ** If successful, output parameter *pazDequote is set to point at the
118 ** array of dequoted strings and SQLITE_OK is returned. The caller is
119 ** responsible for eventually calling sqlite3_free() to free the array
120 ** in this case. Or, if an error occurs, an SQLite error code is returned.
121 ** The final value of *pazDequote is undefined in this case.
123 static int fts5tokDequoteArray(
124 int argc
, /* Number of elements in argv[] */
125 const char * const *argv
, /* Input array */
126 char ***pazDequote
/* Output array */
128 int rc
= SQLITE_OK
; /* Return code */
136 for(i
=0; i
<argc
; i
++){
137 nByte
+= (int)(strlen(argv
[i
]) + 1);
140 *pazDequote
= azDequote
= sqlite3_malloc(sizeof(char *)*argc
+ nByte
);
144 char *pSpace
= (char *)&azDequote
[argc
];
145 for(i
=0; i
<argc
; i
++){
146 int n
= (int)strlen(argv
[i
]);
147 azDequote
[i
] = pSpace
;
148 memcpy(pSpace
, argv
[i
], n
+1);
149 fts5tokDequote(pSpace
);
159 ** Schema of the tokenizer table.
161 #define FTS3_TOK_SCHEMA "CREATE TABLE x(input HIDDEN, token, start, end, position)"
164 ** This function does all the work for both the xConnect and xCreate methods.
165 ** These tables have no persistent representation of their own, so xConnect
166 ** and xCreate are identical operations.
168 ** argv[0]: module name
169 ** argv[1]: database name
170 ** argv[2]: table name
171 ** argv[3]: first argument (tokenizer name)
173 static int fts5tokConnectMethod(
174 sqlite3
*db
, /* Database connection */
175 void *pCtx
, /* Pointer to fts5_api object */
176 int argc
, /* Number of elements in argv array */
177 const char * const *argv
, /* xCreate/xConnect argument array */
178 sqlite3_vtab
**ppVtab
, /* OUT: New sqlite3_vtab object */
179 char **pzErr
/* OUT: sqlite3_malloc'd error message */
181 fts5_api
*pApi
= (fts5_api
*)pCtx
;
182 Fts5tokTable
*pTab
= 0;
184 char **azDequote
= 0;
187 rc
= sqlite3_declare_vtab(db
,
188 "CREATE TABLE x(input HIDDEN, token, start, end, position)"
193 rc
= fts5tokDequoteArray(nDequote
, &argv
[3], &azDequote
);
197 pTab
= (Fts5tokTable
*)sqlite3_malloc(sizeof(Fts5tokTable
));
201 memset(pTab
, 0, sizeof(Fts5tokTable
));
207 const char *zModule
= 0;
209 zModule
= azDequote
[0];
212 rc
= pApi
->xFindTokenizer(pApi
, zModule
, &pTokCtx
, &pTab
->tok
);
214 const char **azArg
= (const char **)&azDequote
[1];
215 int nArg
= nDequote
>0 ? nDequote
-1 : 0;
216 rc
= pTab
->tok
.xCreate(pTokCtx
, azArg
, nArg
, &pTab
->pTok
);
225 *ppVtab
= (sqlite3_vtab
*)pTab
;
226 sqlite3_free(azDequote
);
231 ** This function does the work for both the xDisconnect and xDestroy methods.
232 ** These tables have no persistent representation of their own, so xDisconnect
233 ** and xDestroy are identical operations.
235 static int fts5tokDisconnectMethod(sqlite3_vtab
*pVtab
){
236 Fts5tokTable
*pTab
= (Fts5tokTable
*)pVtab
;
238 pTab
->tok
.xDelete(pTab
->pTok
);
245 ** xBestIndex - Analyze a WHERE and ORDER BY clause.
247 static int fts5tokBestIndexMethod(
249 sqlite3_index_info
*pInfo
253 for(i
=0; i
<pInfo
->nConstraint
; i
++){
254 if( pInfo
->aConstraint
[i
].usable
255 && pInfo
->aConstraint
[i
].iColumn
==0
256 && pInfo
->aConstraint
[i
].op
==SQLITE_INDEX_CONSTRAINT_EQ
259 pInfo
->aConstraintUsage
[i
].argvIndex
= 1;
260 pInfo
->aConstraintUsage
[i
].omit
= 1;
261 pInfo
->estimatedCost
= 1;
267 assert( pInfo
->estimatedCost
>1000000.0 );
273 ** xOpen - Open a cursor.
275 static int fts5tokOpenMethod(sqlite3_vtab
*pVTab
, sqlite3_vtab_cursor
**ppCsr
){
278 pCsr
= (Fts5tokCursor
*)sqlite3_malloc(sizeof(Fts5tokCursor
));
282 memset(pCsr
, 0, sizeof(Fts5tokCursor
));
284 *ppCsr
= (sqlite3_vtab_cursor
*)pCsr
;
289 ** Reset the tokenizer cursor passed as the only argument. As if it had
290 ** just been returned by fts5tokOpenMethod().
292 static void fts5tokResetCursor(Fts5tokCursor
*pCsr
){
294 for(i
=0; i
<pCsr
->nRow
; i
++){
295 sqlite3_free(pCsr
->aRow
[i
].zToken
);
297 sqlite3_free(pCsr
->zInput
);
298 sqlite3_free(pCsr
->aRow
);
306 ** xClose - Close a cursor.
308 static int fts5tokCloseMethod(sqlite3_vtab_cursor
*pCursor
){
309 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
310 fts5tokResetCursor(pCsr
);
316 ** xNext - Advance the cursor to the next row, if any.
318 static int fts5tokNextMethod(sqlite3_vtab_cursor
*pCursor
){
319 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
324 static int fts5tokCb(
325 void *pCtx
, /* Pointer to Fts5tokCursor */
326 int tflags
, /* Mask of FTS5_TOKEN_* flags */
327 const char *pToken
, /* Pointer to buffer containing token */
328 int nToken
, /* Size of token in bytes */
329 int iStart
, /* Byte offset of token within input text */
330 int iEnd
/* Byte offset of end of token within input text */
332 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCtx
;
335 if( (pCsr
->nRow
& (pCsr
->nRow
-1))==0 ){
336 int nNew
= pCsr
->nRow
? pCsr
->nRow
*2 : 32;
338 aNew
= (Fts5tokRow
*)sqlite3_realloc(pCsr
->aRow
, nNew
*sizeof(Fts5tokRow
));
339 if( aNew
==0 ) return SQLITE_NOMEM
;
340 memset(&aNew
[pCsr
->nRow
], 0, sizeof(Fts5tokRow
)*(nNew
-pCsr
->nRow
));
344 pRow
= &pCsr
->aRow
[pCsr
->nRow
];
345 pRow
->iStart
= iStart
;
348 pRow
->iPos
= pRow
[-1].iPos
+ ((tflags
& FTS5_TOKEN_COLOCATED
) ? 0 : 1);
350 pRow
->zToken
= sqlite3_malloc(nToken
+1);
351 if( pRow
->zToken
==0 ) return SQLITE_NOMEM
;
352 memcpy(pRow
->zToken
, pToken
, nToken
);
353 pRow
->zToken
[nToken
] = 0;
360 ** xFilter - Initialize a cursor to point at the start of its data.
362 static int fts5tokFilterMethod(
363 sqlite3_vtab_cursor
*pCursor
, /* The cursor used for this query */
364 int idxNum
, /* Strategy index */
365 const char *idxStr
, /* Unused */
366 int nVal
, /* Number of elements in apVal */
367 sqlite3_value
**apVal
/* Arguments for the indexing scheme */
369 int rc
= SQLITE_ERROR
;
370 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
371 Fts5tokTable
*pTab
= (Fts5tokTable
*)(pCursor
->pVtab
);
373 fts5tokResetCursor(pCsr
);
375 const char *zByte
= (const char *)sqlite3_value_text(apVal
[0]);
376 int nByte
= sqlite3_value_bytes(apVal
[0]);
377 pCsr
->zInput
= sqlite3_malloc(nByte
+1);
378 if( pCsr
->zInput
==0 ){
381 memcpy(pCsr
->zInput
, zByte
, nByte
);
382 pCsr
->zInput
[nByte
] = 0;
383 rc
= pTab
->tok
.xTokenize(
384 pTab
->pTok
, (void*)pCsr
, 0, zByte
, nByte
, fts5tokCb
389 if( rc
!=SQLITE_OK
) return rc
;
390 return fts5tokNextMethod(pCursor
);
394 ** xEof - Return true if the cursor is at EOF, or false otherwise.
396 static int fts5tokEofMethod(sqlite3_vtab_cursor
*pCursor
){
397 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
398 return (pCsr
->iRowid
>pCsr
->nRow
);
402 ** xColumn - Return a column value.
404 static int fts5tokColumnMethod(
405 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
406 sqlite3_context
*pCtx
, /* Context for sqlite3_result_xxx() calls */
407 int iCol
/* Index of column to read value from */
409 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
410 Fts5tokRow
*pRow
= &pCsr
->aRow
[pCsr
->iRowid
-1];
412 /* CREATE TABLE x(input, token, start, end, position) */
415 sqlite3_result_text(pCtx
, pCsr
->zInput
, -1, SQLITE_TRANSIENT
);
418 sqlite3_result_text(pCtx
, pRow
->zToken
, -1, SQLITE_TRANSIENT
);
421 sqlite3_result_int(pCtx
, pRow
->iStart
);
424 sqlite3_result_int(pCtx
, pRow
->iEnd
);
428 sqlite3_result_int(pCtx
, pRow
->iPos
);
435 ** xRowid - Return the current rowid for the cursor.
437 static int fts5tokRowidMethod(
438 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
439 sqlite_int64
*pRowid
/* OUT: Rowid value */
441 Fts5tokCursor
*pCsr
= (Fts5tokCursor
*)pCursor
;
442 *pRowid
= (sqlite3_int64
)pCsr
->iRowid
;
447 ** Register the fts5tok module with database connection db. Return SQLITE_OK
448 ** if successful or an error code if sqlite3_create_module() fails.
450 int sqlite3Fts5TestRegisterTok(sqlite3
*db
, fts5_api
*pApi
){
451 static const sqlite3_module fts5tok_module
= {
453 fts5tokConnectMethod
, /* xCreate */
454 fts5tokConnectMethod
, /* xConnect */
455 fts5tokBestIndexMethod
, /* xBestIndex */
456 fts5tokDisconnectMethod
, /* xDisconnect */
457 fts5tokDisconnectMethod
, /* xDestroy */
458 fts5tokOpenMethod
, /* xOpen */
459 fts5tokCloseMethod
, /* xClose */
460 fts5tokFilterMethod
, /* xFilter */
461 fts5tokNextMethod
, /* xNext */
462 fts5tokEofMethod
, /* xEof */
463 fts5tokColumnMethod
, /* xColumn */
464 fts5tokRowidMethod
, /* xRowid */
470 0, /* xFindFunction */
476 int rc
; /* Return code */
478 rc
= sqlite3_create_module(db
, "fts5tokenize", &fts5tok_module
, (void*)pApi
);
482 #endif /* defined(SQLITE_TEST) && defined(SQLITE_ENABLE_FTS5) */