4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** This file implements a simple virtual table wrapper around the LSM
14 ** storage engine from SQLite4.
16 #include "sqlite3ext.h"
17 SQLITE_EXTENSION_INIT1
22 /* Forward declaration of subclasses of virtual table objects */
23 typedef struct lsm1_vtab lsm1_vtab
;
24 typedef struct lsm1_cursor lsm1_cursor
;
27 typedef unsigned char u8
;
29 /* An open connection to an LSM table */
31 sqlite3_vtab base
; /* Base class - must be first */
32 lsm_db
*pDb
; /* Open connection to the LSM table */
36 /* lsm1_cursor is a subclass of sqlite3_vtab_cursor which will
37 ** serve as the underlying representation of a cursor that scans
38 ** over rows of the result
41 sqlite3_vtab_cursor base
; /* Base class - must be first */
42 lsm_cursor
*pLsmCur
; /* The LSM cursor */
43 u8 isDesc
; /* 0: scan forward. 1: scan reverse */
44 u8 atEof
; /* True if the scan is complete */
45 u8 bUnique
; /* True if no more than one row of output */
49 ** The lsm1Connect() method is invoked to create a new
50 ** lsm1_vtab that describes the virtual table.
52 static int lsm1Connect(
55 int argc
, const char *const*argv
,
56 sqlite3_vtab
**ppVtab
,
62 if( argc
!=4 || argv
[3]==0 || argv
[3][0]==0 ){
63 *pzErr
= sqlite3_mprintf("filename argument missing");
66 *ppVtab
= sqlite3_malloc( sizeof(*pNew
) );
67 pNew
= (lsm1_vtab
*)*ppVtab
;
71 memset(pNew
, 0, sizeof(*pNew
));
72 rc
= lsm_new(0, &pNew
->pDb
);
74 *pzErr
= sqlite3_mprintf("lsm_new failed with error code %d", rc
);
78 rc
= lsm_open(pNew
->pDb
, argv
[3]);
80 *pzErr
= sqlite3_mprintf("lsm_open failed with %d", rc
);
86 #define LSM1_COLUMN_KEY 0
87 #define LSM1_COLUMN_BLOBKEY 1
88 #define LSM1_COLUMN_VALUE 2
89 #define LSM1_COLUMN_BLOBVALUE 3
90 #define LSM1_COLUMN_COMMAND 4
92 rc
= sqlite3_declare_vtab(db
,
94 " key," /* The primary key. Any non-NULL */
95 " blobkey," /* Pure BLOB primary key */
96 " value," /* The value associated with key. Any non-NULL */
97 " blobvalue," /* Pure BLOB value */
98 " command hidden" /* Insert here for control operations */
104 if( pNew
->pDb
) lsm_close(pNew
->pDb
);
113 ** This method is the destructor for lsm1_cursor objects.
115 static int lsm1Disconnect(sqlite3_vtab
*pVtab
){
116 lsm1_vtab
*p
= (lsm1_vtab
*)pVtab
;
123 ** Constructor for a new lsm1_cursor object.
125 static int lsm1Open(sqlite3_vtab
*pVtab
, sqlite3_vtab_cursor
**ppCursor
){
126 lsm1_vtab
*p
= (lsm1_vtab
*)pVtab
;
129 pCur
= sqlite3_malloc( sizeof(*pCur
) );
130 if( pCur
==0 ) return SQLITE_NOMEM
;
131 memset(pCur
, 0, sizeof(*pCur
));
132 *ppCursor
= &pCur
->base
;
133 rc
= lsm_csr_open(p
->pDb
, &pCur
->pLsmCur
);
145 ** Destructor for a lsm1_cursor.
147 static int lsm1Close(sqlite3_vtab_cursor
*cur
){
148 lsm1_cursor
*pCur
= (lsm1_cursor
*)cur
;
149 lsm_csr_close(pCur
->pLsmCur
);
156 ** Advance a lsm1_cursor to its next row of output.
158 static int lsm1Next(sqlite3_vtab_cursor
*cur
){
159 lsm1_cursor
*pCur
= (lsm1_cursor
*)cur
;
165 rc
= lsm_csr_prev(pCur
->pLsmCur
);
167 rc
= lsm_csr_next(pCur
->pLsmCur
);
169 if( rc
==LSM_OK
&& lsm_csr_valid(pCur
->pLsmCur
)==0 ){
173 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
177 ** Return TRUE if the cursor has been moved off of the last
180 static int lsm1Eof(sqlite3_vtab_cursor
*cur
){
181 lsm1_cursor
*pCur
= (lsm1_cursor
*)cur
;
186 ** Rowids are not supported by the underlying virtual table. So always
187 ** return 0 for the rowid.
189 static int lsm1Rowid(sqlite3_vtab_cursor
*cur
, sqlite_int64
*pRowid
){
195 ** Type prefixes on LSM keys
197 #define LSM1_TYPE_NEGATIVE 0
198 #define LSM1_TYPE_POSITIVE 1
199 #define LSM1_TYPE_TEXT 2
200 #define LSM1_TYPE_BLOB 3
203 ** Write a 32-bit unsigned integer as 4 big-endian bytes.
205 static void varintWrite32(unsigned char *z
, unsigned int y
){
206 z
[0] = (unsigned char)(y
>>24);
207 z
[1] = (unsigned char)(y
>>16);
208 z
[2] = (unsigned char)(y
>>8);
209 z
[3] = (unsigned char)(y
);
213 ** Write a varint into z[]. The buffer z[] must be at least 9 characters
214 ** long to accommodate the largest possible varint. Return the number of
215 ** bytes of z[] used.
217 static int lsm1PutVarint64(unsigned char *z
, sqlite3_uint64 x
){
220 z
[0] = (unsigned char)x
;
224 y
= (unsigned int)(x
- 240);
225 z
[0] = (unsigned char)(y
/256 + 241);
226 z
[1] = (unsigned char)(y
%256);
230 y
= (unsigned int)(x
- 2288);
232 z
[1] = (unsigned char)(y
/256);
233 z
[2] = (unsigned char)(y
%256);
237 w
= (unsigned int)(x
>>32);
241 z
[1] = (unsigned char)(y
>>16);
242 z
[2] = (unsigned char)(y
>>8);
243 z
[3] = (unsigned char)(y
);
247 varintWrite32(z
+1, y
);
252 z
[1] = (unsigned char)w
;
253 varintWrite32(z
+2, y
);
258 z
[1] = (unsigned char)(w
>>8);
259 z
[2] = (unsigned char)w
;
260 varintWrite32(z
+3, y
);
265 z
[1] = (unsigned char)(w
>>16);
266 z
[2] = (unsigned char)(w
>>8);
267 z
[3] = (unsigned char)w
;
268 varintWrite32(z
+4, y
);
272 varintWrite32(z
+1, w
);
273 varintWrite32(z
+5, y
);
278 ** Decode the varint in the first n bytes z[]. Write the integer value
279 ** into *pResult and return the number of bytes in the varint.
281 ** If the decode fails because there are not enough bytes in z[] then
284 static int lsm1GetVarint64(
285 const unsigned char *z
,
287 sqlite3_uint64
*pResult
297 *pResult
= (z
[0]-241)*256 + z
[1] + 240;
300 if( n
<z
[0]-246 ) return 0;
302 *pResult
= 2288 + 256*z
[1] + z
[2];
306 *pResult
= (z
[1]<<16) + (z
[2]<<8) + z
[3];
309 x
= (z
[1]<<24) + (z
[2]<<16) + (z
[3]<<8) + z
[4];
315 *pResult
= (((sqlite3_uint64
)x
)<<8) + z
[5];
319 *pResult
= (((sqlite3_uint64
)x
)<<16) + (z
[5]<<8) + z
[6];
323 *pResult
= (((sqlite3_uint64
)x
)<<24) + (z
[5]<<16) + (z
[6]<<8) + z
[7];
326 *pResult
= (((sqlite3_uint64
)x
)<<32) +
327 (0xffffffff & ((z
[5]<<24) + (z
[6]<<16) + (z
[7]<<8) + z
[8]));
332 ** Generate a key encoding for pValue such that all keys compare in
333 ** lexicographical order. Return an SQLite error code or SQLITE_OK.
335 ** The key encoding is *pnKey bytes in length written into *ppKey.
336 ** Space to hold the key is taken from pSpace if sufficient, or else
337 ** from sqlite3_malloc(). The caller is responsible for freeing malloced
340 static int lsm1EncodeKey(
341 sqlite3_value
*pValue
, /* Value to be encoded */
342 unsigned char **ppKey
, /* Write the encoding here */
343 int *pnKey
, /* Write the size of the encoding here */
344 unsigned char *pSpace
, /* Use this space if it is large enough */
345 int nSpace
/* Size of pSpace[] */
347 int eType
= sqlite3_value_type(pValue
);
350 assert( nSpace
>=32 );
353 return SQLITE_ERROR
; /* We cannot handle NULL keys */
357 int nVal
= sqlite3_value_bytes(pValue
);
359 if( eType
==SQLITE_BLOB
){
360 eType
= LSM1_TYPE_BLOB
;
361 pVal
= sqlite3_value_blob(pValue
);
363 eType
= LSM1_TYPE_TEXT
;
364 pVal
= (const void*)sqlite3_value_text(pValue
);
365 if( pVal
==0 ) return SQLITE_NOMEM
;
368 pSpace
= sqlite3_malloc( nVal
+1 );
369 if( pSpace
==0 ) return SQLITE_NOMEM
;
371 pSpace
[0] = (unsigned char)eType
;
372 memcpy(&pSpace
[1], pVal
, nVal
);
377 case SQLITE_INTEGER
: {
378 sqlite3_int64 iVal
= sqlite3_value_int64(pValue
);
381 if( iVal
==0xffffffffffffffffLL
) return SQLITE_ERROR
;
382 uVal
= *(sqlite3_uint64
*)&iVal
;
383 eType
= LSM1_TYPE_NEGATIVE
;
386 eType
= LSM1_TYPE_POSITIVE
;
388 pSpace
[0] = (unsigned char)eType
;
390 *pnKey
= 1 + lsm1PutVarint64(&pSpace
[1], uVal
);
397 ** Return values of columns for the row at which the lsm1_cursor
398 ** is currently pointing.
400 static int lsm1Column(
401 sqlite3_vtab_cursor
*cur
, /* The cursor */
402 sqlite3_context
*ctx
, /* First argument to sqlite3_result_...() */
403 int i
/* Which column to return */
405 lsm1_cursor
*pCur
= (lsm1_cursor
*)cur
;
407 case LSM1_COLUMN_BLOBKEY
: {
410 if( lsm_csr_key(pCur
->pLsmCur
, &pVal
, &nVal
)==LSM_OK
){
411 sqlite3_result_blob(ctx
, pVal
, nVal
, SQLITE_TRANSIENT
);
415 case LSM1_COLUMN_KEY
: {
416 const unsigned char *pVal
;
418 if( lsm_csr_key(pCur
->pLsmCur
, (const void**)&pVal
, &nVal
)==LSM_OK
421 if( pVal
[0]==LSM1_TYPE_BLOB
){
422 sqlite3_result_blob(ctx
, (const void*)&pVal
[1],nVal
-1,
424 }else if( pVal
[0]==LSM1_TYPE_TEXT
){
425 sqlite3_result_text(ctx
, (const char*)&pVal
[1],nVal
-1,
427 }else if( nVal
>=2 && nVal
<=10 &&
428 (pVal
[0]==LSM1_TYPE_POSITIVE
|| pVal
[0]==LSM1_TYPE_NEGATIVE
)
431 lsm1GetVarint64(pVal
+1, nVal
-1, (sqlite3_uint64
*)&iVal
);
432 sqlite3_result_int64(ctx
, iVal
);
437 case LSM1_COLUMN_BLOBVALUE
: {
440 if( lsm_csr_value(pCur
->pLsmCur
, (const void**)&pVal
, &nVal
)==LSM_OK
){
441 sqlite3_result_blob(ctx
, pVal
, nVal
, SQLITE_TRANSIENT
);
445 case LSM1_COLUMN_VALUE
: {
446 const unsigned char *aVal
;
448 if( lsm_csr_value(pCur
->pLsmCur
, (const void**)&aVal
, &nVal
)==LSM_OK
453 case SQLITE_INTEGER
: {
454 sqlite3_uint64 x
= 0;
456 for(j
=1; j
<nVal
; j
++){
457 x
= (x
<<8) | aVal
[j
];
459 if( aVal
[0]==SQLITE_INTEGER
){
460 sqlite3_result_int64(ctx
, *(sqlite3_int64
*)&x
);
463 assert( sizeof(r
)==sizeof(x
) );
464 memcpy(&r
, &x
, sizeof(r
));
465 sqlite3_result_double(ctx
, r
);
470 sqlite3_result_text(ctx
, (char*)&aVal
[1], nVal
-1, SQLITE_TRANSIENT
);
474 sqlite3_result_blob(ctx
, &aVal
[1], nVal
-1, SQLITE_TRANSIENT
);
488 /* Move to the first row to return.
490 static int lsm1Filter(
491 sqlite3_vtab_cursor
*pVtabCursor
,
492 int idxNum
, const char *idxStr
,
493 int argc
, sqlite3_value
**argv
495 lsm1_cursor
*pCur
= (lsm1_cursor
*)pVtabCursor
;
502 if( sqlite3_value_type(argv
[0])==SQLITE_BLOB
){
503 const void *pVal
= sqlite3_value_blob(argv
[0]);
504 int nVal
= sqlite3_value_bytes(argv
[0]);
505 rc
= lsm_csr_seek(pCur
->pLsmCur
, pVal
, nVal
, LSM_SEEK_EQ
);
508 rc
= lsm_csr_first(pCur
->pLsmCur
);
512 if( rc
==LSM_OK
&& lsm_csr_valid(pCur
->pLsmCur
)!=0 ){
515 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
519 ** Only comparisons against the key are allowed. The idxNum defines
520 ** which comparisons are available:
522 ** 0 Full table scan only
523 ** bit 1 key==?1 single argument for ?1
526 ** bit 4 key<?N (N==1 if bits 2,3 clear, or 2 if bits2,3 set)
527 ** bit 5 key<=?N (N==1 if bits 2,3 clear, or 2 if bits2,3 set)
528 ** bit 6 Use blobkey instead of key
530 ** To put it another way:
532 ** 0 Full table scan.
537 ** 10 key>?1 AND key<?2
538 ** 12 key>=?1 AND key<?2
540 ** 18 key>?1 AND key<=?2
541 ** 20 key>=?1 AND key<=?2
542 ** 33..52 Use blobkey in place of key...
544 static int lsm1BestIndex(
546 sqlite3_index_info
*pIdxInfo
548 int i
; /* Loop over constraints */
549 int idxNum
= 0; /* The query plan bitmask */
550 int nArg
= 0; /* Number of arguments to xFilter */
551 int eqIdx
= -1; /* Index of the key== constraint, or -1 if none */
553 const struct sqlite3_index_constraint
*pConstraint
;
554 pConstraint
= pIdxInfo
->aConstraint
;
555 for(i
=0; i
<pIdxInfo
->nConstraint
&& idxNum
<16; i
++, pConstraint
++){
556 if( pConstraint
->usable
==0 ) continue;
557 if( pConstraint
->iColumn
!=LSM1_COLUMN_KEY
) continue;
558 if( pConstraint
->op
!=SQLITE_INDEX_CONSTRAINT_EQ
) continue;
559 switch( pConstraint
->op
){
560 case SQLITE_INDEX_CONSTRAINT_EQ
: {
568 pIdxInfo
->aConstraintUsage
[eqIdx
].argvIndex
= ++nArg
;
569 pIdxInfo
->aConstraintUsage
[eqIdx
].omit
= 1;
572 pIdxInfo
->estimatedCost
= (double)1;
573 pIdxInfo
->estimatedRows
= 1;
574 pIdxInfo
->orderByConsumed
= 1;
576 /* Full table scan */
577 pIdxInfo
->estimatedCost
= (double)2147483647;
578 pIdxInfo
->estimatedRows
= 2147483647;
580 pIdxInfo
->idxNum
= idxNum
;
585 ** The xUpdate method is normally used for INSERT, REPLACE, UPDATE, and
586 ** DELETE. But this virtual table only supports INSERT and REPLACE.
587 ** DELETE is accomplished by inserting a record with a value of NULL.
588 ** UPDATE is achieved by using REPLACE.
593 sqlite3_value
**argv
,
596 lsm1_vtab
*p
= (lsm1_vtab
*)pVTab
;
601 sqlite3_value
*pValue
;
602 const unsigned char *pVal
;
603 unsigned char *pData
;
605 unsigned char pSpace
[100];
608 pVTab
->zErrMsg
= sqlite3_mprintf("cannot DELETE");
611 if( sqlite3_value_type(argv
[0])!=SQLITE_NULL
){
612 pVTab
->zErrMsg
= sqlite3_mprintf("cannot UPDATE");
616 /* "INSERT INTO tab(command) VALUES('....')" is used to implement
619 if( sqlite3_value_type(argv
[2+LSM1_COLUMN_COMMAND
])!=SQLITE_NULL
){
622 if( sqlite3_value_type(argv
[2+LSM1_COLUMN_BLOBKEY
])==SQLITE_BLOB
){
623 /* Use the blob key exactly as supplied */
624 pKey
= sqlite3_value_blob(argv
[2+LSM1_COLUMN_BLOBKEY
]);
625 nKey
= sqlite3_value_bytes(argv
[2+LSM1_COLUMN_BLOBKEY
]);
627 /* Use a key encoding that sorts in lexicographical order */
628 rc
= lsm1EncodeKey(argv
[2+LSM1_COLUMN_KEY
],
629 (unsigned char**)&pKey
,&nKey
,
630 pSpace
,sizeof(pSpace
));
633 if( sqlite3_value_type(argv
[2+LSM1_COLUMN_BLOBVALUE
])==SQLITE_BLOB
){
634 pVal
= sqlite3_value_blob(argv
[2+LSM1_COLUMN_BLOBVALUE
]);
635 nVal
= sqlite3_value_bytes(argv
[2+LSM1_COLUMN_BLOBVALUE
]);
636 rc
= lsm_insert(p
->pDb
, pKey
, nKey
, pVal
, nVal
);
638 pValue
= argv
[2+LSM1_COLUMN_VALUE
];
639 eType
= sqlite3_value_type(pValue
);
642 rc
= lsm_delete(p
->pDb
, pKey
, nKey
);
647 if( eType
==SQLITE_TEXT
){
648 pVal
= sqlite3_value_text(pValue
);
650 pVal
= (unsigned char*)sqlite3_value_blob(pValue
);
652 nVal
= sqlite3_value_bytes(pValue
);
653 pData
= sqlite3_malloc( nVal
+1 );
657 pData
[0] = (unsigned char)eType
;
658 memcpy(&pData
[1], pVal
, nVal
);
659 rc
= lsm_insert(p
->pDb
, pKey
, nKey
, pData
, nVal
+1);
667 unsigned char aVal
[9];
669 if( eType
==SQLITE_INTEGER
){
670 *(sqlite3_int64
*)&x
= sqlite3_value_int64(pValue
);
672 double r
= sqlite3_value_double(pValue
);
673 assert( sizeof(r
)==sizeof(x
) );
674 memcpy(&x
, &r
, sizeof(r
));
676 for(i
=8; x
>0 && i
>=1; i
--){
680 aVal
[i
] = (unsigned char)eType
;
681 rc
= lsm_insert(p
->pDb
, pKey
, nKey
, &aVal
[i
], 9-i
);
686 if( pKey
!=(const void*)pSpace
) sqlite3_free((void*)pKey
);
687 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
690 /* Begin a transaction
692 static int lsm1Begin(sqlite3_vtab
*pVtab
){
693 lsm1_vtab
*p
= (lsm1_vtab
*)pVtab
;
694 int rc
= lsm_begin(p
->pDb
, 1);
695 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
698 /* Phase 1 of a transaction commit.
700 static int lsm1Sync(sqlite3_vtab
*pVtab
){
704 /* Commit a transaction
706 static int lsm1Commit(sqlite3_vtab
*pVtab
){
707 lsm1_vtab
*p
= (lsm1_vtab
*)pVtab
;
708 int rc
= lsm_commit(p
->pDb
, 0);
709 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
712 /* Rollback a transaction
714 static int lsm1Rollback(sqlite3_vtab
*pVtab
){
715 lsm1_vtab
*p
= (lsm1_vtab
*)pVtab
;
716 int rc
= lsm_rollback(p
->pDb
, 0);
717 return rc
==LSM_OK
? SQLITE_OK
: SQLITE_ERROR
;
721 ** This following structure defines all the methods for the
722 ** generate_lsm1 virtual table.
724 static sqlite3_module lsm1Module
= {
726 lsm1Connect
, /* xCreate */
727 lsm1Connect
, /* xConnect */
728 lsm1BestIndex
, /* xBestIndex */
729 lsm1Disconnect
, /* xDisconnect */
730 lsm1Disconnect
, /* xDestroy */
731 lsm1Open
, /* xOpen - open a cursor */
732 lsm1Close
, /* xClose - close a cursor */
733 lsm1Filter
, /* xFilter - configure scan constraints */
734 lsm1Next
, /* xNext - advance a cursor */
735 lsm1Eof
, /* xEof - check for end of scan */
736 lsm1Column
, /* xColumn - read data */
737 lsm1Rowid
, /* xRowid - read data */
738 lsm1Update
, /* xUpdate */
739 lsm1Begin
, /* xBegin */
740 lsm1Sync
, /* xSync */
741 lsm1Commit
, /* xCommit */
742 lsm1Rollback
, /* xRollback */
749 __declspec(dllexport
)
751 int sqlite3_lsm_init(
754 const sqlite3_api_routines
*pApi
757 SQLITE_EXTENSION_INIT2(pApi
);
758 rc
= sqlite3_create_module(db
, "lsm1", &lsm1Module
, 0);