Updates to the MSVC makefiles.
[sqlite.git] / ext / fts5 / fts5_hash.c
blobafa2a30739156c3612c7bcf01516f5a0aee8d95f
1 /*
2 ** 2014 August 11
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
17 #include "fts5Int.h"
19 typedef struct Fts5HashEntry Fts5HashEntry;
22 ** This file contains the implementation of an in-memory hash table used
23 ** to accumuluate "term -> doclist" content before it is flused to a level-0
24 ** segment.
28 struct Fts5Hash {
29 int eDetail; /* Copy of Fts5Config.eDetail */
30 int *pnByte; /* Pointer to bytes counter */
31 int nEntry; /* Number of entries currently in hash */
32 int nSlot; /* Size of aSlot[] array */
33 Fts5HashEntry *pScan; /* Current ordered scan item */
34 Fts5HashEntry **aSlot; /* Array of hash slots */
38 ** Each entry in the hash table is represented by an object of the
39 ** following type. Each object, its key (zKey[]) and its current data
40 ** are stored in a single memory allocation. The position list data
41 ** immediately follows the key data in memory.
43 ** The data that follows the key is in a similar, but not identical format
44 ** to the doclist data stored in the database. It is:
46 ** * Rowid, as a varint
47 ** * Position list, without 0x00 terminator.
48 ** * Size of previous position list and rowid, as a 4 byte
49 ** big-endian integer.
51 ** iRowidOff:
52 ** Offset of last rowid written to data area. Relative to first byte of
53 ** structure.
55 ** nData:
56 ** Bytes of data written since iRowidOff.
58 struct Fts5HashEntry {
59 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
60 Fts5HashEntry *pScanNext; /* Next entry in sorted order */
62 int nAlloc; /* Total size of allocation */
63 int iSzPoslist; /* Offset of space for 4-byte poslist size */
64 int nData; /* Total bytes of data (incl. structure) */
65 int nKey; /* Length of zKey[] in bytes */
66 u8 bDel; /* Set delete-flag @ iSzPoslist */
67 u8 bContent; /* Set content-flag (detail=none mode) */
68 i16 iCol; /* Column of last value written */
69 int iPos; /* Position of last value written */
70 i64 iRowid; /* Rowid of last value written */
71 char zKey[8]; /* Nul-terminated entry key */
75 ** Size of Fts5HashEntry without the zKey[] array.
77 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
82 ** Allocate a new hash table.
84 int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
85 int rc = SQLITE_OK;
86 Fts5Hash *pNew;
88 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
89 if( pNew==0 ){
90 rc = SQLITE_NOMEM;
91 }else{
92 int nByte;
93 memset(pNew, 0, sizeof(Fts5Hash));
94 pNew->pnByte = pnByte;
95 pNew->eDetail = pConfig->eDetail;
97 pNew->nSlot = 1024;
98 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
99 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
100 if( pNew->aSlot==0 ){
101 sqlite3_free(pNew);
102 *ppNew = 0;
103 rc = SQLITE_NOMEM;
104 }else{
105 memset(pNew->aSlot, 0, nByte);
108 return rc;
112 ** Free a hash table object.
114 void sqlite3Fts5HashFree(Fts5Hash *pHash){
115 if( pHash ){
116 sqlite3Fts5HashClear(pHash);
117 sqlite3_free(pHash->aSlot);
118 sqlite3_free(pHash);
123 ** Empty (but do not delete) a hash table.
125 void sqlite3Fts5HashClear(Fts5Hash *pHash){
126 int i;
127 for(i=0; i<pHash->nSlot; i++){
128 Fts5HashEntry *pNext;
129 Fts5HashEntry *pSlot;
130 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
131 pNext = pSlot->pHashNext;
132 sqlite3_free(pSlot);
135 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
136 pHash->nEntry = 0;
139 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
140 int i;
141 unsigned int h = 13;
142 for(i=n-1; i>=0; i--){
143 h = (h << 3) ^ h ^ p[i];
145 return (h % nSlot);
148 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
149 int i;
150 unsigned int h = 13;
151 for(i=n-1; i>=0; i--){
152 h = (h << 3) ^ h ^ p[i];
154 h = (h << 3) ^ h ^ b;
155 return (h % nSlot);
159 ** Resize the hash table by doubling the number of slots.
161 static int fts5HashResize(Fts5Hash *pHash){
162 int nNew = pHash->nSlot*2;
163 int i;
164 Fts5HashEntry **apNew;
165 Fts5HashEntry **apOld = pHash->aSlot;
167 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
168 if( !apNew ) return SQLITE_NOMEM;
169 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
171 for(i=0; i<pHash->nSlot; i++){
172 while( apOld[i] ){
173 int iHash;
174 Fts5HashEntry *p = apOld[i];
175 apOld[i] = p->pHashNext;
176 iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey));
177 p->pHashNext = apNew[iHash];
178 apNew[iHash] = p;
182 sqlite3_free(apOld);
183 pHash->nSlot = nNew;
184 pHash->aSlot = apNew;
185 return SQLITE_OK;
188 static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){
189 if( p->iSzPoslist ){
190 u8 *pPtr = (u8*)p;
191 if( pHash->eDetail==FTS5_DETAIL_NONE ){
192 assert( p->nData==p->iSzPoslist );
193 if( p->bDel ){
194 pPtr[p->nData++] = 0x00;
195 if( p->bContent ){
196 pPtr[p->nData++] = 0x00;
199 }else{
200 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
201 int nPos = nSz*2 + p->bDel; /* Value of nPos field */
203 assert( p->bDel==0 || p->bDel==1 );
204 if( nPos<=127 ){
205 pPtr[p->iSzPoslist] = (u8)nPos;
206 }else{
207 int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
208 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
209 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
210 p->nData += (nByte-1);
214 p->iSzPoslist = 0;
215 p->bDel = 0;
216 p->bContent = 0;
221 ** Add an entry to the in-memory hash table. The key is the concatenation
222 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
224 ** (bByte || pToken) -> (iRowid,iCol,iPos)
226 ** Or, if iCol is negative, then the value is a delete marker.
228 int sqlite3Fts5HashWrite(
229 Fts5Hash *pHash,
230 i64 iRowid, /* Rowid for this entry */
231 int iCol, /* Column token appears in (-ve -> delete) */
232 int iPos, /* Position of token within column */
233 char bByte, /* First byte of token */
234 const char *pToken, int nToken /* Token to add or remove to or from index */
236 unsigned int iHash;
237 Fts5HashEntry *p;
238 u8 *pPtr;
239 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
240 int bNew; /* If non-delete entry should be written */
242 bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
244 /* Attempt to locate an existing hash entry */
245 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
246 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
247 if( p->zKey[0]==bByte
248 && p->nKey==nToken
249 && memcmp(&p->zKey[1], pToken, nToken)==0
251 break;
255 /* If an existing hash entry cannot be found, create a new one. */
256 if( p==0 ){
257 /* Figure out how much space to allocate */
258 int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
259 if( nByte<128 ) nByte = 128;
261 /* Grow the Fts5Hash.aSlot[] array if necessary. */
262 if( (pHash->nEntry*2)>=pHash->nSlot ){
263 int rc = fts5HashResize(pHash);
264 if( rc!=SQLITE_OK ) return rc;
265 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
268 /* Allocate new Fts5HashEntry and add it to the hash table. */
269 p = (Fts5HashEntry*)sqlite3_malloc(nByte);
270 if( !p ) return SQLITE_NOMEM;
271 memset(p, 0, FTS5_HASHENTRYSIZE);
272 p->nAlloc = nByte;
273 p->zKey[0] = bByte;
274 memcpy(&p->zKey[1], pToken, nToken);
275 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
276 p->nKey = nToken;
277 p->zKey[nToken+1] = '\0';
278 p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
279 p->pHashNext = pHash->aSlot[iHash];
280 pHash->aSlot[iHash] = p;
281 pHash->nEntry++;
283 /* Add the first rowid field to the hash-entry */
284 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
285 p->iRowid = iRowid;
287 p->iSzPoslist = p->nData;
288 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
289 p->nData += 1;
290 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
293 nIncr += p->nData;
294 }else{
296 /* Appending to an existing hash-entry. Check that there is enough
297 ** space to append the largest possible new entry. Worst case scenario
298 ** is:
300 ** + 9 bytes for a new rowid,
301 ** + 4 byte reserved for the "poslist size" varint.
302 ** + 1 byte for a "new column" byte,
303 ** + 3 bytes for a new column number (16-bit max) as a varint,
304 ** + 5 bytes for the new position offset (32-bit max).
306 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
307 int nNew = p->nAlloc * 2;
308 Fts5HashEntry *pNew;
309 Fts5HashEntry **pp;
310 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
311 if( pNew==0 ) return SQLITE_NOMEM;
312 pNew->nAlloc = nNew;
313 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
314 *pp = pNew;
315 p = pNew;
317 nIncr -= p->nData;
319 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
321 pPtr = (u8*)p;
323 /* If this is a new rowid, append the 4-byte size field for the previous
324 ** entry, and the new rowid for this entry. */
325 if( iRowid!=p->iRowid ){
326 fts5HashAddPoslistSize(pHash, p);
327 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
328 p->iRowid = iRowid;
329 bNew = 1;
330 p->iSzPoslist = p->nData;
331 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
332 p->nData += 1;
333 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
334 p->iPos = 0;
338 if( iCol>=0 ){
339 if( pHash->eDetail==FTS5_DETAIL_NONE ){
340 p->bContent = 1;
341 }else{
342 /* Append a new column value, if necessary */
343 assert( iCol>=p->iCol );
344 if( iCol!=p->iCol ){
345 if( pHash->eDetail==FTS5_DETAIL_FULL ){
346 pPtr[p->nData++] = 0x01;
347 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
348 p->iCol = (i16)iCol;
349 p->iPos = 0;
350 }else{
351 bNew = 1;
352 p->iCol = (i16)(iPos = iCol);
356 /* Append the new position offset, if necessary */
357 if( bNew ){
358 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
359 p->iPos = iPos;
362 }else{
363 /* This is a delete. Set the delete flag. */
364 p->bDel = 1;
367 nIncr += p->nData;
368 *pHash->pnByte += nIncr;
369 return SQLITE_OK;
374 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
375 ** each sorted in key order. This function merges the two lists into a
376 ** single list and returns a pointer to its first element.
378 static Fts5HashEntry *fts5HashEntryMerge(
379 Fts5HashEntry *pLeft,
380 Fts5HashEntry *pRight
382 Fts5HashEntry *p1 = pLeft;
383 Fts5HashEntry *p2 = pRight;
384 Fts5HashEntry *pRet = 0;
385 Fts5HashEntry **ppOut = &pRet;
387 while( p1 || p2 ){
388 if( p1==0 ){
389 *ppOut = p2;
390 p2 = 0;
391 }else if( p2==0 ){
392 *ppOut = p1;
393 p1 = 0;
394 }else{
395 int i = 0;
396 while( p1->zKey[i]==p2->zKey[i] ) i++;
398 if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
399 /* p2 is smaller */
400 *ppOut = p2;
401 ppOut = &p2->pScanNext;
402 p2 = p2->pScanNext;
403 }else{
404 /* p1 is smaller */
405 *ppOut = p1;
406 ppOut = &p1->pScanNext;
407 p1 = p1->pScanNext;
409 *ppOut = 0;
413 return pRet;
417 ** Extract all tokens from hash table iHash and link them into a list
418 ** in sorted order. The hash table is cleared before returning. It is
419 ** the responsibility of the caller to free the elements of the returned
420 ** list.
422 static int fts5HashEntrySort(
423 Fts5Hash *pHash,
424 const char *pTerm, int nTerm, /* Query prefix, if any */
425 Fts5HashEntry **ppSorted
427 const int nMergeSlot = 32;
428 Fts5HashEntry **ap;
429 Fts5HashEntry *pList;
430 int iSlot;
431 int i;
433 *ppSorted = 0;
434 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
435 if( !ap ) return SQLITE_NOMEM;
436 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
438 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
439 Fts5HashEntry *pIter;
440 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
441 if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
442 Fts5HashEntry *pEntry = pIter;
443 pEntry->pScanNext = 0;
444 for(i=0; ap[i]; i++){
445 pEntry = fts5HashEntryMerge(pEntry, ap[i]);
446 ap[i] = 0;
448 ap[i] = pEntry;
453 pList = 0;
454 for(i=0; i<nMergeSlot; i++){
455 pList = fts5HashEntryMerge(pList, ap[i]);
458 pHash->nEntry = 0;
459 sqlite3_free(ap);
460 *ppSorted = pList;
461 return SQLITE_OK;
465 ** Query the hash table for a doclist associated with term pTerm/nTerm.
467 int sqlite3Fts5HashQuery(
468 Fts5Hash *pHash, /* Hash table to query */
469 const char *pTerm, int nTerm, /* Query term */
470 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
471 int *pnDoclist /* OUT: Size of doclist in bytes */
473 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
474 Fts5HashEntry *p;
476 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
477 if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
480 if( p ){
481 fts5HashAddPoslistSize(pHash, p);
482 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
483 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
484 }else{
485 *ppDoclist = 0;
486 *pnDoclist = 0;
489 return SQLITE_OK;
492 int sqlite3Fts5HashScanInit(
493 Fts5Hash *p, /* Hash table to query */
494 const char *pTerm, int nTerm /* Query prefix */
496 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
499 void sqlite3Fts5HashScanNext(Fts5Hash *p){
500 assert( !sqlite3Fts5HashScanEof(p) );
501 p->pScan = p->pScan->pScanNext;
504 int sqlite3Fts5HashScanEof(Fts5Hash *p){
505 return (p->pScan==0);
508 void sqlite3Fts5HashScanEntry(
509 Fts5Hash *pHash,
510 const char **pzTerm, /* OUT: term (nul-terminated) */
511 const u8 **ppDoclist, /* OUT: pointer to doclist */
512 int *pnDoclist /* OUT: size of doclist in bytes */
514 Fts5HashEntry *p;
515 if( (p = pHash->pScan) ){
516 int nTerm = (int)strlen(p->zKey);
517 fts5HashAddPoslistSize(pHash, p);
518 *pzTerm = p->zKey;
519 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
520 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
521 }else{
522 *pzTerm = 0;
523 *ppDoclist = 0;
524 *pnDoclist = 0;