4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
19 typedef struct Fts5HashEntry Fts5HashEntry
;
22 ** This file contains the implementation of an in-memory hash table used
23 ** to accumuluate "term -> doclist" content before it is flused to a level-0
29 int eDetail
; /* Copy of Fts5Config.eDetail */
30 int *pnByte
; /* Pointer to bytes counter */
31 int nEntry
; /* Number of entries currently in hash */
32 int nSlot
; /* Size of aSlot[] array */
33 Fts5HashEntry
*pScan
; /* Current ordered scan item */
34 Fts5HashEntry
**aSlot
; /* Array of hash slots */
38 ** Each entry in the hash table is represented by an object of the
39 ** following type. Each object, its key (zKey[]) and its current data
40 ** are stored in a single memory allocation. The position list data
41 ** immediately follows the key data in memory.
43 ** The data that follows the key is in a similar, but not identical format
44 ** to the doclist data stored in the database. It is:
46 ** * Rowid, as a varint
47 ** * Position list, without 0x00 terminator.
48 ** * Size of previous position list and rowid, as a 4 byte
49 ** big-endian integer.
52 ** Offset of last rowid written to data area. Relative to first byte of
56 ** Bytes of data written since iRowidOff.
58 struct Fts5HashEntry
{
59 Fts5HashEntry
*pHashNext
; /* Next hash entry with same hash-key */
60 Fts5HashEntry
*pScanNext
; /* Next entry in sorted order */
62 int nAlloc
; /* Total size of allocation */
63 int iSzPoslist
; /* Offset of space for 4-byte poslist size */
64 int nData
; /* Total bytes of data (incl. structure) */
65 int nKey
; /* Length of zKey[] in bytes */
66 u8 bDel
; /* Set delete-flag @ iSzPoslist */
67 u8 bContent
; /* Set content-flag (detail=none mode) */
68 i16 iCol
; /* Column of last value written */
69 int iPos
; /* Position of last value written */
70 i64 iRowid
; /* Rowid of last value written */
71 char zKey
[8]; /* Nul-terminated entry key */
75 ** Size of Fts5HashEntry without the zKey[] array.
77 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
82 ** Allocate a new hash table.
84 int sqlite3Fts5HashNew(Fts5Config
*pConfig
, Fts5Hash
**ppNew
, int *pnByte
){
88 *ppNew
= pNew
= (Fts5Hash
*)sqlite3_malloc(sizeof(Fts5Hash
));
93 memset(pNew
, 0, sizeof(Fts5Hash
));
94 pNew
->pnByte
= pnByte
;
95 pNew
->eDetail
= pConfig
->eDetail
;
98 nByte
= sizeof(Fts5HashEntry
*) * pNew
->nSlot
;
99 pNew
->aSlot
= (Fts5HashEntry
**)sqlite3_malloc(nByte
);
100 if( pNew
->aSlot
==0 ){
105 memset(pNew
->aSlot
, 0, nByte
);
112 ** Free a hash table object.
114 void sqlite3Fts5HashFree(Fts5Hash
*pHash
){
116 sqlite3Fts5HashClear(pHash
);
117 sqlite3_free(pHash
->aSlot
);
123 ** Empty (but do not delete) a hash table.
125 void sqlite3Fts5HashClear(Fts5Hash
*pHash
){
127 for(i
=0; i
<pHash
->nSlot
; i
++){
128 Fts5HashEntry
*pNext
;
129 Fts5HashEntry
*pSlot
;
130 for(pSlot
=pHash
->aSlot
[i
]; pSlot
; pSlot
=pNext
){
131 pNext
= pSlot
->pHashNext
;
135 memset(pHash
->aSlot
, 0, pHash
->nSlot
* sizeof(Fts5HashEntry
*));
139 static unsigned int fts5HashKey(int nSlot
, const u8
*p
, int n
){
142 for(i
=n
-1; i
>=0; i
--){
143 h
= (h
<< 3) ^ h
^ p
[i
];
148 static unsigned int fts5HashKey2(int nSlot
, u8 b
, const u8
*p
, int n
){
151 for(i
=n
-1; i
>=0; i
--){
152 h
= (h
<< 3) ^ h
^ p
[i
];
154 h
= (h
<< 3) ^ h
^ b
;
159 ** Resize the hash table by doubling the number of slots.
161 static int fts5HashResize(Fts5Hash
*pHash
){
162 int nNew
= pHash
->nSlot
*2;
164 Fts5HashEntry
**apNew
;
165 Fts5HashEntry
**apOld
= pHash
->aSlot
;
167 apNew
= (Fts5HashEntry
**)sqlite3_malloc(nNew
*sizeof(Fts5HashEntry
*));
168 if( !apNew
) return SQLITE_NOMEM
;
169 memset(apNew
, 0, nNew
*sizeof(Fts5HashEntry
*));
171 for(i
=0; i
<pHash
->nSlot
; i
++){
174 Fts5HashEntry
*p
= apOld
[i
];
175 apOld
[i
] = p
->pHashNext
;
176 iHash
= fts5HashKey(nNew
, (u8
*)p
->zKey
, (int)strlen(p
->zKey
));
177 p
->pHashNext
= apNew
[iHash
];
184 pHash
->aSlot
= apNew
;
188 static void fts5HashAddPoslistSize(Fts5Hash
*pHash
, Fts5HashEntry
*p
){
191 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
192 assert( p
->nData
==p
->iSzPoslist
);
194 pPtr
[p
->nData
++] = 0x00;
196 pPtr
[p
->nData
++] = 0x00;
200 int nSz
= (p
->nData
- p
->iSzPoslist
- 1); /* Size in bytes */
201 int nPos
= nSz
*2 + p
->bDel
; /* Value of nPos field */
203 assert( p
->bDel
==0 || p
->bDel
==1 );
205 pPtr
[p
->iSzPoslist
] = (u8
)nPos
;
207 int nByte
= sqlite3Fts5GetVarintLen((u32
)nPos
);
208 memmove(&pPtr
[p
->iSzPoslist
+ nByte
], &pPtr
[p
->iSzPoslist
+ 1], nSz
);
209 sqlite3Fts5PutVarint(&pPtr
[p
->iSzPoslist
], nPos
);
210 p
->nData
+= (nByte
-1);
221 ** Add an entry to the in-memory hash table. The key is the concatenation
222 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
224 ** (bByte || pToken) -> (iRowid,iCol,iPos)
226 ** Or, if iCol is negative, then the value is a delete marker.
228 int sqlite3Fts5HashWrite(
230 i64 iRowid
, /* Rowid for this entry */
231 int iCol
, /* Column token appears in (-ve -> delete) */
232 int iPos
, /* Position of token within column */
233 char bByte
, /* First byte of token */
234 const char *pToken
, int nToken
/* Token to add or remove to or from index */
239 int nIncr
= 0; /* Amount to increment (*pHash->pnByte) by */
240 int bNew
; /* If non-delete entry should be written */
242 bNew
= (pHash
->eDetail
==FTS5_DETAIL_FULL
);
244 /* Attempt to locate an existing hash entry */
245 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
246 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
247 if( p
->zKey
[0]==bByte
249 && memcmp(&p
->zKey
[1], pToken
, nToken
)==0
255 /* If an existing hash entry cannot be found, create a new one. */
257 /* Figure out how much space to allocate */
258 int nByte
= FTS5_HASHENTRYSIZE
+ (nToken
+1) + 1 + 64;
259 if( nByte
<128 ) nByte
= 128;
261 /* Grow the Fts5Hash.aSlot[] array if necessary. */
262 if( (pHash
->nEntry
*2)>=pHash
->nSlot
){
263 int rc
= fts5HashResize(pHash
);
264 if( rc
!=SQLITE_OK
) return rc
;
265 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
268 /* Allocate new Fts5HashEntry and add it to the hash table. */
269 p
= (Fts5HashEntry
*)sqlite3_malloc(nByte
);
270 if( !p
) return SQLITE_NOMEM
;
271 memset(p
, 0, FTS5_HASHENTRYSIZE
);
274 memcpy(&p
->zKey
[1], pToken
, nToken
);
275 assert( iHash
==fts5HashKey(pHash
->nSlot
, (u8
*)p
->zKey
, nToken
+1) );
277 p
->zKey
[nToken
+1] = '\0';
278 p
->nData
= nToken
+1 + 1 + FTS5_HASHENTRYSIZE
;
279 p
->pHashNext
= pHash
->aSlot
[iHash
];
280 pHash
->aSlot
[iHash
] = p
;
283 /* Add the first rowid field to the hash-entry */
284 p
->nData
+= sqlite3Fts5PutVarint(&((u8
*)p
)[p
->nData
], iRowid
);
287 p
->iSzPoslist
= p
->nData
;
288 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
290 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
296 /* Appending to an existing hash-entry. Check that there is enough
297 ** space to append the largest possible new entry. Worst case scenario
300 ** + 9 bytes for a new rowid,
301 ** + 4 byte reserved for the "poslist size" varint.
302 ** + 1 byte for a "new column" byte,
303 ** + 3 bytes for a new column number (16-bit max) as a varint,
304 ** + 5 bytes for the new position offset (32-bit max).
306 if( (p
->nAlloc
- p
->nData
) < (9 + 4 + 1 + 3 + 5) ){
307 int nNew
= p
->nAlloc
* 2;
310 pNew
= (Fts5HashEntry
*)sqlite3_realloc(p
, nNew
);
311 if( pNew
==0 ) return SQLITE_NOMEM
;
313 for(pp
=&pHash
->aSlot
[iHash
]; *pp
!=p
; pp
=&(*pp
)->pHashNext
);
319 assert( (p
->nAlloc
- p
->nData
) >= (9 + 4 + 1 + 3 + 5) );
323 /* If this is a new rowid, append the 4-byte size field for the previous
324 ** entry, and the new rowid for this entry. */
325 if( iRowid
!=p
->iRowid
){
326 fts5HashAddPoslistSize(pHash
, p
);
327 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iRowid
- p
->iRowid
);
330 p
->iSzPoslist
= p
->nData
;
331 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
333 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
339 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
342 /* Append a new column value, if necessary */
343 assert( iCol
>=p
->iCol
);
345 if( pHash
->eDetail
==FTS5_DETAIL_FULL
){
346 pPtr
[p
->nData
++] = 0x01;
347 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iCol
);
352 p
->iCol
= (i16
)(iPos
= iCol
);
356 /* Append the new position offset, if necessary */
358 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iPos
- p
->iPos
+ 2);
363 /* This is a delete. Set the delete flag. */
368 *pHash
->pnByte
+= nIncr
;
374 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
375 ** each sorted in key order. This function merges the two lists into a
376 ** single list and returns a pointer to its first element.
378 static Fts5HashEntry
*fts5HashEntryMerge(
379 Fts5HashEntry
*pLeft
,
380 Fts5HashEntry
*pRight
382 Fts5HashEntry
*p1
= pLeft
;
383 Fts5HashEntry
*p2
= pRight
;
384 Fts5HashEntry
*pRet
= 0;
385 Fts5HashEntry
**ppOut
= &pRet
;
396 while( p1
->zKey
[i
]==p2
->zKey
[i
] ) i
++;
398 if( ((u8
)p1
->zKey
[i
])>((u8
)p2
->zKey
[i
]) ){
401 ppOut
= &p2
->pScanNext
;
406 ppOut
= &p1
->pScanNext
;
417 ** Extract all tokens from hash table iHash and link them into a list
418 ** in sorted order. The hash table is cleared before returning. It is
419 ** the responsibility of the caller to free the elements of the returned
422 static int fts5HashEntrySort(
424 const char *pTerm
, int nTerm
, /* Query prefix, if any */
425 Fts5HashEntry
**ppSorted
427 const int nMergeSlot
= 32;
429 Fts5HashEntry
*pList
;
434 ap
= sqlite3_malloc(sizeof(Fts5HashEntry
*) * nMergeSlot
);
435 if( !ap
) return SQLITE_NOMEM
;
436 memset(ap
, 0, sizeof(Fts5HashEntry
*) * nMergeSlot
);
438 for(iSlot
=0; iSlot
<pHash
->nSlot
; iSlot
++){
439 Fts5HashEntry
*pIter
;
440 for(pIter
=pHash
->aSlot
[iSlot
]; pIter
; pIter
=pIter
->pHashNext
){
441 if( pTerm
==0 || 0==memcmp(pIter
->zKey
, pTerm
, nTerm
) ){
442 Fts5HashEntry
*pEntry
= pIter
;
443 pEntry
->pScanNext
= 0;
444 for(i
=0; ap
[i
]; i
++){
445 pEntry
= fts5HashEntryMerge(pEntry
, ap
[i
]);
454 for(i
=0; i
<nMergeSlot
; i
++){
455 pList
= fts5HashEntryMerge(pList
, ap
[i
]);
465 ** Query the hash table for a doclist associated with term pTerm/nTerm.
467 int sqlite3Fts5HashQuery(
468 Fts5Hash
*pHash
, /* Hash table to query */
469 const char *pTerm
, int nTerm
, /* Query term */
470 const u8
**ppDoclist
, /* OUT: Pointer to doclist for pTerm */
471 int *pnDoclist
/* OUT: Size of doclist in bytes */
473 unsigned int iHash
= fts5HashKey(pHash
->nSlot
, (const u8
*)pTerm
, nTerm
);
476 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
477 if( memcmp(p
->zKey
, pTerm
, nTerm
)==0 && p
->zKey
[nTerm
]==0 ) break;
481 fts5HashAddPoslistSize(pHash
, p
);
482 *ppDoclist
= (const u8
*)&p
->zKey
[nTerm
+1];
483 *pnDoclist
= p
->nData
- (FTS5_HASHENTRYSIZE
+ nTerm
+ 1);
492 int sqlite3Fts5HashScanInit(
493 Fts5Hash
*p
, /* Hash table to query */
494 const char *pTerm
, int nTerm
/* Query prefix */
496 return fts5HashEntrySort(p
, pTerm
, nTerm
, &p
->pScan
);
499 void sqlite3Fts5HashScanNext(Fts5Hash
*p
){
500 assert( !sqlite3Fts5HashScanEof(p
) );
501 p
->pScan
= p
->pScan
->pScanNext
;
504 int sqlite3Fts5HashScanEof(Fts5Hash
*p
){
505 return (p
->pScan
==0);
508 void sqlite3Fts5HashScanEntry(
510 const char **pzTerm
, /* OUT: term (nul-terminated) */
511 const u8
**ppDoclist
, /* OUT: pointer to doclist */
512 int *pnDoclist
/* OUT: size of doclist in bytes */
515 if( (p
= pHash
->pScan
) ){
516 int nTerm
= (int)strlen(p
->zKey
);
517 fts5HashAddPoslistSize(pHash
, p
);
519 *ppDoclist
= (const u8
*)&p
->zKey
[nTerm
+1];
520 *pnDoclist
= p
->nData
- (FTS5_HASHENTRYSIZE
+ nTerm
+ 1);