4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains code for the "fts3tokenize" virtual table module.
14 ** An fts3tokenize virtual table is created as follows:
16 ** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
17 ** <tokenizer-name>, <arg-1>, ...
20 ** The table created has the following schema:
22 ** CREATE TABLE <tbl>(input, token, start, end, position)
24 ** When queried, the query must include a WHERE clause of type:
28 ** The virtual table module tokenizes this <string>, using the FTS3
29 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
30 ** statement and returns one row for each token in the result. With
31 ** fields set as follows:
33 ** input: Always set to a copy of <string>
34 ** token: A token from the input.
35 ** start: Byte offset of the token within the input <string>.
36 ** end: Byte offset of the byte immediately following the end of the
37 ** token within the input string.
38 ** pos: Token offset of token within input.
42 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
47 typedef struct Fts3tokTable Fts3tokTable
;
48 typedef struct Fts3tokCursor Fts3tokCursor
;
51 ** Virtual table structure.
54 sqlite3_vtab base
; /* Base class used by SQLite core */
55 const sqlite3_tokenizer_module
*pMod
;
56 sqlite3_tokenizer
*pTok
;
60 ** Virtual table cursor structure.
62 struct Fts3tokCursor
{
63 sqlite3_vtab_cursor base
; /* Base class used by SQLite core */
64 char *zInput
; /* Input string */
65 sqlite3_tokenizer_cursor
*pCsr
; /* Cursor to iterate through zInput */
66 int iRowid
; /* Current 'rowid' value */
67 const char *zToken
; /* Current 'token' value */
68 int nToken
; /* Size of zToken in bytes */
69 int iStart
; /* Current 'start' value */
70 int iEnd
; /* Current 'end' value */
71 int iPos
; /* Current 'pos' value */
75 ** Query FTS for the tokenizer implementation named zName.
77 static int fts3tokQueryTokenizer(
80 const sqlite3_tokenizer_module
**pp
,
83 sqlite3_tokenizer_module
*p
;
84 int nName
= (int)strlen(zName
);
86 p
= (sqlite3_tokenizer_module
*)sqlite3Fts3HashFind(pHash
, zName
, nName
+1);
88 sqlite3Fts3ErrMsg(pzErr
, "unknown tokenizer: %s", zName
);
97 ** The second argument, argv[], is an array of pointers to nul-terminated
98 ** strings. This function makes a copy of the array and strings into a
99 ** single block of memory. It then dequotes any of the strings that appear
102 ** If successful, output parameter *pazDequote is set to point at the
103 ** array of dequoted strings and SQLITE_OK is returned. The caller is
104 ** responsible for eventually calling sqlite3_free() to free the array
105 ** in this case. Or, if an error occurs, an SQLite error code is returned.
106 ** The final value of *pazDequote is undefined in this case.
108 static int fts3tokDequoteArray(
109 int argc
, /* Number of elements in argv[] */
110 const char * const *argv
, /* Input array */
111 char ***pazDequote
/* Output array */
113 int rc
= SQLITE_OK
; /* Return code */
121 for(i
=0; i
<argc
; i
++){
122 nByte
+= (int)(strlen(argv
[i
]) + 1);
125 *pazDequote
= azDequote
= sqlite3_malloc(sizeof(char *)*argc
+ nByte
);
129 char *pSpace
= (char *)&azDequote
[argc
];
130 for(i
=0; i
<argc
; i
++){
131 int n
= (int)strlen(argv
[i
]);
132 azDequote
[i
] = pSpace
;
133 memcpy(pSpace
, argv
[i
], n
+1);
134 sqlite3Fts3Dequote(pSpace
);
144 ** Schema of the tokenizer table.
146 #define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
149 ** This function does all the work for both the xConnect and xCreate methods.
150 ** These tables have no persistent representation of their own, so xConnect
151 ** and xCreate are identical operations.
153 ** argv[0]: module name
154 ** argv[1]: database name
155 ** argv[2]: table name
156 ** argv[3]: first argument (tokenizer name)
158 static int fts3tokConnectMethod(
159 sqlite3
*db
, /* Database connection */
160 void *pHash
, /* Hash table of tokenizers */
161 int argc
, /* Number of elements in argv array */
162 const char * const *argv
, /* xCreate/xConnect argument array */
163 sqlite3_vtab
**ppVtab
, /* OUT: New sqlite3_vtab object */
164 char **pzErr
/* OUT: sqlite3_malloc'd error message */
166 Fts3tokTable
*pTab
= 0;
167 const sqlite3_tokenizer_module
*pMod
= 0;
168 sqlite3_tokenizer
*pTok
= 0;
170 char **azDequote
= 0;
173 rc
= sqlite3_declare_vtab(db
, FTS3_TOK_SCHEMA
);
174 if( rc
!=SQLITE_OK
) return rc
;
177 rc
= fts3tokDequoteArray(nDequote
, &argv
[3], &azDequote
);
184 zModule
= azDequote
[0];
186 rc
= fts3tokQueryTokenizer((Fts3Hash
*)pHash
, zModule
, &pMod
, pzErr
);
189 assert( (rc
==SQLITE_OK
)==(pMod
!=0) );
191 const char * const *azArg
= (const char * const *)&azDequote
[1];
192 rc
= pMod
->xCreate((nDequote
>1 ? nDequote
-1 : 0), azArg
, &pTok
);
196 pTab
= (Fts3tokTable
*)sqlite3_malloc(sizeof(Fts3tokTable
));
203 memset(pTab
, 0, sizeof(Fts3tokTable
));
206 *ppVtab
= &pTab
->base
;
209 pMod
->xDestroy(pTok
);
213 sqlite3_free(azDequote
);
218 ** This function does the work for both the xDisconnect and xDestroy methods.
219 ** These tables have no persistent representation of their own, so xDisconnect
220 ** and xDestroy are identical operations.
222 static int fts3tokDisconnectMethod(sqlite3_vtab
*pVtab
){
223 Fts3tokTable
*pTab
= (Fts3tokTable
*)pVtab
;
225 pTab
->pMod
->xDestroy(pTab
->pTok
);
231 ** xBestIndex - Analyze a WHERE and ORDER BY clause.
233 static int fts3tokBestIndexMethod(
235 sqlite3_index_info
*pInfo
238 UNUSED_PARAMETER(pVTab
);
240 for(i
=0; i
<pInfo
->nConstraint
; i
++){
241 if( pInfo
->aConstraint
[i
].usable
242 && pInfo
->aConstraint
[i
].iColumn
==0
243 && pInfo
->aConstraint
[i
].op
==SQLITE_INDEX_CONSTRAINT_EQ
246 pInfo
->aConstraintUsage
[i
].argvIndex
= 1;
247 pInfo
->aConstraintUsage
[i
].omit
= 1;
248 pInfo
->estimatedCost
= 1;
254 assert( pInfo
->estimatedCost
>1000000.0 );
260 ** xOpen - Open a cursor.
262 static int fts3tokOpenMethod(sqlite3_vtab
*pVTab
, sqlite3_vtab_cursor
**ppCsr
){
264 UNUSED_PARAMETER(pVTab
);
266 pCsr
= (Fts3tokCursor
*)sqlite3_malloc(sizeof(Fts3tokCursor
));
270 memset(pCsr
, 0, sizeof(Fts3tokCursor
));
272 *ppCsr
= (sqlite3_vtab_cursor
*)pCsr
;
277 ** Reset the tokenizer cursor passed as the only argument. As if it had
278 ** just been returned by fts3tokOpenMethod().
280 static void fts3tokResetCursor(Fts3tokCursor
*pCsr
){
282 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCsr
->base
.pVtab
);
283 pTab
->pMod
->xClose(pCsr
->pCsr
);
286 sqlite3_free(pCsr
->zInput
);
297 ** xClose - Close a cursor.
299 static int fts3tokCloseMethod(sqlite3_vtab_cursor
*pCursor
){
300 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
302 fts3tokResetCursor(pCsr
);
308 ** xNext - Advance the cursor to the next row, if any.
310 static int fts3tokNextMethod(sqlite3_vtab_cursor
*pCursor
){
311 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
312 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCursor
->pVtab
);
313 int rc
; /* Return code */
316 rc
= pTab
->pMod
->xNext(pCsr
->pCsr
,
317 &pCsr
->zToken
, &pCsr
->nToken
,
318 &pCsr
->iStart
, &pCsr
->iEnd
, &pCsr
->iPos
322 fts3tokResetCursor(pCsr
);
323 if( rc
==SQLITE_DONE
) rc
= SQLITE_OK
;
330 ** xFilter - Initialize a cursor to point at the start of its data.
332 static int fts3tokFilterMethod(
333 sqlite3_vtab_cursor
*pCursor
, /* The cursor used for this query */
334 int idxNum
, /* Strategy index */
335 const char *idxStr
, /* Unused */
336 int nVal
, /* Number of elements in apVal */
337 sqlite3_value
**apVal
/* Arguments for the indexing scheme */
339 int rc
= SQLITE_ERROR
;
340 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
341 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCursor
->pVtab
);
342 UNUSED_PARAMETER(idxStr
);
343 UNUSED_PARAMETER(nVal
);
345 fts3tokResetCursor(pCsr
);
347 const char *zByte
= (const char *)sqlite3_value_text(apVal
[0]);
348 int nByte
= sqlite3_value_bytes(apVal
[0]);
349 pCsr
->zInput
= sqlite3_malloc(nByte
+1);
350 if( pCsr
->zInput
==0 ){
353 memcpy(pCsr
->zInput
, zByte
, nByte
);
354 pCsr
->zInput
[nByte
] = 0;
355 rc
= pTab
->pMod
->xOpen(pTab
->pTok
, pCsr
->zInput
, nByte
, &pCsr
->pCsr
);
357 pCsr
->pCsr
->pTokenizer
= pTab
->pTok
;
362 if( rc
!=SQLITE_OK
) return rc
;
363 return fts3tokNextMethod(pCursor
);
367 ** xEof - Return true if the cursor is at EOF, or false otherwise.
369 static int fts3tokEofMethod(sqlite3_vtab_cursor
*pCursor
){
370 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
371 return (pCsr
->zToken
==0);
375 ** xColumn - Return a column value.
377 static int fts3tokColumnMethod(
378 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
379 sqlite3_context
*pCtx
, /* Context for sqlite3_result_xxx() calls */
380 int iCol
/* Index of column to read value from */
382 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
384 /* CREATE TABLE x(input, token, start, end, position) */
387 sqlite3_result_text(pCtx
, pCsr
->zInput
, -1, SQLITE_TRANSIENT
);
390 sqlite3_result_text(pCtx
, pCsr
->zToken
, pCsr
->nToken
, SQLITE_TRANSIENT
);
393 sqlite3_result_int(pCtx
, pCsr
->iStart
);
396 sqlite3_result_int(pCtx
, pCsr
->iEnd
);
400 sqlite3_result_int(pCtx
, pCsr
->iPos
);
407 ** xRowid - Return the current rowid for the cursor.
409 static int fts3tokRowidMethod(
410 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
411 sqlite_int64
*pRowid
/* OUT: Rowid value */
413 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
414 *pRowid
= (sqlite3_int64
)pCsr
->iRowid
;
419 ** Register the fts3tok module with database connection db. Return SQLITE_OK
420 ** if successful or an error code if sqlite3_create_module() fails.
422 int sqlite3Fts3InitTok(sqlite3
*db
, Fts3Hash
*pHash
){
423 static const sqlite3_module fts3tok_module
= {
425 fts3tokConnectMethod
, /* xCreate */
426 fts3tokConnectMethod
, /* xConnect */
427 fts3tokBestIndexMethod
, /* xBestIndex */
428 fts3tokDisconnectMethod
, /* xDisconnect */
429 fts3tokDisconnectMethod
, /* xDestroy */
430 fts3tokOpenMethod
, /* xOpen */
431 fts3tokCloseMethod
, /* xClose */
432 fts3tokFilterMethod
, /* xFilter */
433 fts3tokNextMethod
, /* xNext */
434 fts3tokEofMethod
, /* xEof */
435 fts3tokColumnMethod
, /* xColumn */
436 fts3tokRowidMethod
, /* xRowid */
442 0, /* xFindFunction */
448 int rc
; /* Return code */
450 rc
= sqlite3_create_module(db
, "fts3tokenize", &fts3tok_module
, (void*)pHash
);
454 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */