3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
10 #***********************************************************************
12 # This file contains common code used the fts3 tests. At one point
13 # equivalent functionality was implemented in C code. But it is easier
17 #-------------------------------------------------------------------------
20 # The following commands are available:
23 # Using database handle [db] create an FTS4 table named t1 and populate
24 # it with N rows of data. N must be less than 10,000. Refer to the
25 # header comments above the proc implementation below for details.
28 # Using database handle [db] create an FTS4 table named t2 and populate
29 # it with N rows of data. N must be less than 100,000. Refer to the
30 # header comments above the proc implementation below for details.
32 # fts3_integrity_check TBL
33 # TBL must be an FTS table in the database currently opened by handle
34 # [db]. This proc loads and tokenizes all documents within the table,
35 # then checks that the current contents of the FTS index matches the
38 # fts3_terms TBL WHERE
41 # fts3_doclist TBL TERM WHERE
47 #-------------------------------------------------------------------------
48 # USAGE: fts3_build_db_1 SWITCHES N
50 # Build a sample FTS table in the database opened by database connection
51 # [db]. The name of the new table is "t1".
53 proc fts3_build_db_1
{args
} {
55 set default(-module) fts4
57 set nArg
[llength $args]
59 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\""
62 set n
[lindex $args [expr $nArg-1]]
63 array set opts
[array get
default]
64 array set opts
[lrange $args 0 [expr $nArg-2]]
65 foreach k
[array names opts
] {
66 if {0==[info exists
default($k)]} { error "unknown option: $k" }
69 if {$n > 10000} {error "n must be <= 10000"}
70 db
eval "CREATE VIRTUAL TABLE t1 USING $opts(-module) (x, y)"
72 set xwords
[list zero one two three four five six seven eight nine ten
]
73 set ywords
[list alpha beta gamma delta epsilon zeta eta theta iota kappa
]
75 for {set i
0} {$i < $n} {incr i
} {
80 lappend x
[lindex $xwords [expr ($i / 1000) % 10]]
81 lappend x
[lindex $xwords [expr ($i / 100) % 10]]
82 lappend x
[lindex $xwords [expr ($i / 10) % 10]]
83 lappend x
[lindex $xwords [expr ($i / 1) % 10]]
86 lappend y
[lindex $ywords [expr ($i / 1000) % 10]]
87 lappend y
[lindex $ywords [expr ($i / 100) % 10]]
88 lappend y
[lindex $ywords [expr ($i / 10) % 10]]
89 lappend y
[lindex $ywords [expr ($i / 1) % 10]]
91 db
eval { INSERT INTO t1
(docid
, x
, y
) VALUES
($i, $x, $y) }
95 #-------------------------------------------------------------------------
96 # USAGE: fts3_build_db_2 N ARGS
98 # Build a sample FTS table in the database opened by database connection
99 # [db]. The name of the new table is "t2".
101 proc fts3_build_db_2
{args
} {
103 set default(-module) fts4
104 set default(-extra) ""
106 set nArg
[llength $args]
108 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\""
111 set n
[lindex $args [expr $nArg-1]]
112 array set opts
[array get
default]
113 array set opts
[lrange $args 0 [expr $nArg-2]]
114 foreach k
[array names opts
] {
115 if {0==[info exists
default($k)]} { error "unknown option: $k" }
118 if {$n > 100000} {error "n must be <= 100000"}
120 set sql
"CREATE VIRTUAL TABLE t2 USING $opts(-module) (content"
121 if {$opts(-extra) != ""} {
122 append sql
", " $opts(-extra)
127 set chars
[list a b c d e f g h i j k l m n o p q r s t u v w x y z
""]
129 for {set i
0} {$i < $n} {incr i
} {
131 set nChar
[llength $chars]
132 append word
[lindex $chars [expr {($i / 1) % $nChar}]]
133 append word
[lindex $chars [expr {($i / $nChar) % $nChar}]]
134 append word
[lindex $chars [expr {($i / ($nChar*$nChar)) % $nChar}]]
136 db
eval { INSERT INTO t2
(docid
, content
) VALUES
($i, $word) }
140 #-------------------------------------------------------------------------
141 # USAGE: fts3_integrity_check TBL
143 # This proc is used to verify that the full-text index is consistent with
144 # the contents of the fts3 table. In other words, it checks that the
145 # data in the %_contents table matches that in the %_segdir and %_segments
148 # This is not an efficient procedure. It uses a lot of memory and a lot
149 # of CPU. But it is better than not checking at all.
153 # 1) Read the entire full-text index from the %_segdir and %_segments
154 # tables into memory. For each entry in the index, the following is
157 # set C($iDocid,$iCol,$iPosition) $zTerm
159 # 2) Iterate through each column of each row of the %_content table.
160 # Tokenize all documents, and check that for each token there is
161 # a corresponding entry in the $C array. After checking a token,
162 # [unset] the $C array entry.
164 # 3) Check that array $C is now empty.
167 proc fts3_integrity_check
{tbl
} {
171 foreach zTerm
[array names A
] {
173 foreach doclist
$A($zTerm) {
175 while {[string length
$doclist]>0} {
181 # First varint of a doclist-entry is the docid. Delta-compressed
182 # with respect to the docid of the previous entry.
184 incr docid
[gobble_varint doclist
]
185 if {[info exists D
($zTerm,$docid)]} {
186 while {[set iDelta
[gobble_varint doclist
]] != 0} {}
189 set D
($zTerm,$docid) 1
191 # Gobble varints until the 0x00 that terminates the doclist-entry
193 while {[set iDelta
[gobble_varint doclist
]] > 0} {
195 set iCol
[gobble_varint doclist
]
200 set C
($docid,$iCol,$iPos) $zTerm
207 foreach key
[array names C
] {
208 #puts "$key -> $C($key)"
212 db
eval "SELECT * FROM ${tbl}_content" E
{
215 foreach col
[lrange $E(*) 1 end
] {
217 set sql
{SELECT fts3_tokenizer_test
('simple'
, $c)}
219 foreach {pos term dummy
} [db one
$sql] {
220 if {![info exists C
($iDoc,$iCol,$pos)]} {
221 set es
"Error at docid=$iDoc col=$iCol pos=$pos. Index is missing"
224 if {[string compare
$C($iDoc,$iCol,$pos) $term]} {
225 set es
"Error at docid=$iDoc col=$iCol pos=$pos. Index "
226 append es
"has \"$C($iDoc,$iCol,$pos)\", document has \"$term\""
229 unset C
($iDoc,$iCol,$pos)
236 foreach c
[array names C
] {
237 lappend errors
"Bad index entry: $c -> $C($c)"
240 if {[info exists errors
]} { return [join $errors "\n"] }
244 # USAGE: fts3_terms TBL WHERE
246 # Argument TBL must be the name of an FTS3 table. Argument WHERE is an
247 # SQL expression that will be used as the WHERE clause when scanning
248 # the %_segdir table. As in the following query:
250 # "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}"
252 # This function returns a list of all terms present in the segments
253 # selected by the statement above.
255 proc fts3_terms
{tbl where
} {
256 fts3_read
$tbl $where a
257 return [lsort [array names a
]]
261 # USAGE: fts3_doclist TBL TERM WHERE
263 # Argument TBL must be the name of an FTS3 table. TERM is a term that may
264 # or may not be present in the table. Argument WHERE is used to select a
265 # subset of the b-tree segments in the associated full-text index as
266 # described above for [fts3_terms].
268 # This function returns the results of merging the doclists associated
269 # with TERM in the selected segments. Each doclist is an element of the
270 # returned list. Each doclist is formatted as follows:
272 # [$docid ?$col[$off1 $off2...]?...]
274 # The formatting is odd for a Tcl command in order to be compatible with
275 # the original C-language implementation. If argument WHERE is "1", then
276 # any empty doclists are omitted from the returned list.
278 proc fts3_doclist
{tbl term where
} {
279 fts3_read
$tbl $where a
282 foreach doclist
$a($term) {
285 while {[string length
$doclist]>0} {
290 incr docid
[gobble_varint doclist
]
292 while {[set iDelta
[gobble_varint doclist
]] > 0} {
294 lappend lCol
[list $iCol $lPos]
297 set iCol
[gobble_varint doclist
]
305 if {[llength $lPos]>0} {
306 lappend lCol
[list $iCol $lPos]
309 if {$where != "1" ||
[llength $lCol]>0} {
310 set ret
($docid) $lCol
312 unset -nocomplain ret
($docid)
318 foreach docid
[lsort -integer [array names ret
]] {
321 foreach col
$ret($docid) {
322 foreach {iCol lPos
} $col {}
323 append cols
" $iCol\[[join $lPos { }]\]"
325 lappend lDoc
"\[${docid}${cols}\]"
331 ###########################################################################
333 proc gobble_varint
{varname
} {
335 set n
[read_fts3varint
$blob ret
]
336 set blob
[string range
$blob $n end
]
339 proc gobble_string
{varname nLength
} {
341 set ret
[string range
$blob 0 [expr $nLength-1]]
342 set blob
[string range
$blob $nLength end
]
346 # The argument is a blob of data representing an FTS3 segment leaf.
347 # Return a list consisting of alternating terms (strings) and doclists
350 proc fts3_readleaf
{blob
} {
354 while {[string length
$blob] > 0} {
355 set nPrefix
[gobble_varint blob
]
356 set nSuffix
[gobble_varint blob
]
358 set zTerm
[string range
$zPrev 0 [expr $nPrefix-1]]
359 append zTerm
[gobble_string blob
$nSuffix]
360 set nDoclist
[gobble_varint blob
]
361 set doclist
[gobble_string blob
$nDoclist]
363 lappend terms
$zTerm $doclist
370 proc fts3_read2
{tbl where varname
} {
373 db
eval " SELECT start_block, leaves_end_block, root
374 FROM ${tbl}_segdir WHERE $where
375 ORDER BY level ASC, idx DESC
378 binary scan $root c c
380 foreach {t d
} [fts3_readleaf
$root] { lappend a
($t) $d }
382 db
eval " SELECT block
384 WHERE blockid>=$start_block AND blockid<=$leaves_end_block
387 foreach {t d
} [fts3_readleaf
$block] { lappend a
($t) $d }
393 proc fts3_read
{tbl where varname
} {
396 db
eval " SELECT start_block, leaves_end_block, root
397 FROM ${tbl}_segdir WHERE $where
398 ORDER BY level DESC, idx ASC
400 if {$start_block == 0} {
401 foreach {t d
} [fts3_readleaf
$root] { lappend a
($t) $d }
403 db
eval " SELECT block
405 WHERE blockid>=$start_block AND blockid<$leaves_end_block
408 foreach {t d
} [fts3_readleaf
$block] { lappend a
($t) $d }
415 ##########################################################################