2 # Copyright (C) all contributors <meta@public-inbox.org>
3 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
5 use PublicInbox::TestCommon;
6 use PublicInbox::Config;
7 use PublicInbox::InboxWritable;
9 require_mods(qw(json DBD::SQLite Xapian));
10 use autodie qw(open rename truncate unlink);
11 require PublicInbox::Search;
12 use_ok 'PublicInbox::ExtSearch';
13 use_ok 'PublicInbox::ExtSearchIdx';
14 use_ok 'PublicInbox::OverIdx';
15 my ($home, $for_destroy) = tmpdir();
16 local $ENV{HOME} = $home;
17 mkdir "$home/.public-inbox" or BAIL_OUT $!;
18 my $cfg_path = "$home/.public-inbox/config";
19 PublicInbox::IO::write_file '>', $cfg_path, <<EOF;
23 my $v2addr = 'v2test@example.com';
24 my $v1addr = 'v1test@example.com';
25 ok(run_script([qw(-init -Lbasic -V2 v2test --newsgroup v2.example),
26 "$home/v2test", 'http://example.com/v2test', $v2addr ]), 'v2test init');
27 my $env = { ORIGINAL_RECIPIENT => $v2addr };
28 my $eml = eml_load('t/utf8.eml');
30 $eml->header_set('List-Id', '<v2.example.com>');
32 my $in = \($eml->as_string);
33 run_script(['-mda', '--no-precheck'], $env, { 0 => $in }) or BAIL_OUT '-mda';
35 ok(run_script([qw(-init -V1 v1test --newsgroup v1.example), "$home/v1test",
36 'http://example.com/v1test', $v1addr ]), 'v1test init');
38 $eml->header_set('List-Id', '<v1.example.com>');
39 $in = \$eml->as_string;
41 $env = { ORIGINAL_RECIPIENT => $v1addr };
42 run_script(['-mda', '--no-precheck'], $env, { 0 => $in }) or BAIL_OUT '-mda';
44 run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?";
46 ok(run_script([qw(-extindex --dangerous --all), "$home/extindex"]),
49 my $es = PublicInbox::ExtSearch->new("$home/extindex");
50 ok($es->has_threadid, '->has_threadid');
54 xsys([qw(git config publicinbox.v1test.boost), 10],
55 { GIT_CONFIG => $cfg_path });
56 ok(run_script([qw(-extindex --all), "$home/extindex-b"]),
57 'extindex init with boost');
58 my $es = PublicInbox::ExtSearch->new("$home/extindex-b");
59 my $smsg = $es->over->get_art(1);
60 ok($smsg, 'got first article');
61 my $xref3 = $es->over->get_xref3($smsg->{num});
62 my @v1 = grep(/\Av1/, @$xref3);
63 my @v2 = grep(/\Av2/, @$xref3);
64 like($v1[0], qr/\Av1\.example.*?\b\Q$smsg->{blob}\E\b/,
65 'smsg->{blob} respected boost');
66 is(scalar(@$xref3), 2, 'only to entries');
69 xsys([qw(git config publicinbox.v2test.boost), 20],
70 { GIT_CONFIG => $cfg_path });
71 ok(run_script([qw(-extindex --all --reindex), "$home/extindex-b"]),
72 'extindex --reindex with altered boost');
74 $es = PublicInbox::ExtSearch->new("$home/extindex-b");
75 $smsg = $es->over->get_art(1);
76 like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/,
77 'smsg->{blob} respects boost after reindex');
79 # high boost added later
80 my $b2 = "$home/extindex-bb";
81 ok(run_script([qw(-extindex), $b2, "$home/v1test"]),
82 'extindex with low boost inbox only');
83 ok(run_script([qw(-extindex), $b2, "$home/v2test"]),
84 'extindex with high boost inbox only');
85 $es = PublicInbox::ExtSearch->new($b2);
86 $smsg = $es->over->get_art(1);
87 $xref3 = $es->over->get_xref3($smsg->{num});
88 like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/,
89 'smsg->{blob} respected boost across 2 index runs');
91 xsys([qw(git config --unset publicinbox.v1test.boost)],
92 { GIT_CONFIG => $cfg_path });
93 xsys([qw(git config --unset publicinbox.v2test.boost)],
94 { GIT_CONFIG => $cfg_path });
97 { # TODO: -extindex should write this to config
98 PublicInbox::IO::write_file '>>', $cfg_path, <<EOF;
101 topdir = $home/extindex
103 my $pi_cfg = PublicInbox::Config->new;
105 ok($pi_cfg->ALL, '->ALL');
106 my $ibx = $pi_cfg->{-by_newsgroup}->{'v2.example'};
107 my $ret = $pi_cfg->ALL->nntp_xref_for($ibx, $ibx->over->get_art(1));
108 is_deeply($ret, { 'v1.example' => 1, 'v2.example' => 1 },
113 require_mods(qw(Net::NNTP), 1);
114 my $sock = tcp_server();
115 my $host_port = tcp_host_port($sock);
116 my ($out, $err) = ("$home/nntpd.out.log", "$home/nntpd.err.log");
117 my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ];
118 my $td = start_script($cmd, undef, { 3 => $sock });
119 my $n = Net::NNTP->new($host_port);
120 my @xp = $n->xpath('<testmessage@example.com>');
121 is_deeply(\@xp, [ qw(v1.example/1 v2.example/1) ]);
122 $n->group('v1.example');
123 my $res = $n->head(1);
124 @$res = grep(/^Xref: /, @$res);
125 like($res->[0], qr/ v1\.example:1 v2\.example:1/, 'nntp_xref works');
128 my $es = PublicInbox::ExtSearch->new("$home/extindex");
130 my $smsg = $es->over->get_art(1);
131 ok($smsg, 'got first article');
132 is($es->over->get_art(2), undef, 'only one added');
133 my $xref3 = $es->over->get_xref3(1);
134 like($xref3->[0], qr/\A\Qv2.example\E:1:/, 'order preserved 1');
135 like($xref3->[1], qr/\A\Qv1.example\E:1:/, 'order preserved 2');
136 is(scalar(@$xref3), 2, 'only to entries');
139 if ('inbox edited') {
140 my ($in, $out, $err);
141 $in = $out = $err = '';
142 my $opt = { 0 => \$in, 1 => \$out, 2 => \$err };
143 my $env = { MAIL_EDITOR => "$^X -w -i -p -e 's/test message/BEST MSG/'" };
144 my $cmd = [ qw(-edit -Ft/utf8.eml), "$home/v2test" ];
145 ok(run_script($cmd, $env, $opt), '-edit');
146 ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt),
148 like($err, qr/discontiguous range/, 'warned about discontiguous range');
149 my $msg1 = $es->over->get_art(1) or BAIL_OUT 'msg1 missing';
150 my $msg2 = $es->over->get_art(2) or BAIL_OUT 'msg2 missing';
151 is($msg1->{mid}, $msg2->{mid}, 'edited message indexed');
152 isnt($msg1->{blob}, $msg2->{blob}, 'blobs differ');
153 my $eml2 = $es->smsg_eml($msg2);
154 like($eml2->body, qr/BEST MSG/, 'edited body in #2');
155 unlike($eml2->body, qr/test message/, 'old body discarded in #2');
156 my $eml1 = $es->smsg_eml($msg1);
157 like($eml1->body, qr/test message/, 'original body in #1');
158 my $x1 = $es->over->get_xref3(1);
159 my $x2 = $es->over->get_xref3(2);
160 is(scalar(@$x1), 1, 'original only has one xref3');
161 is(scalar(@$x2), 1, 'new message has one xref3');
162 isnt($x1->[0], $x2->[0], 'xref3 differs');
164 my $mset = $es->mset('b:"BEST MSG"');
165 is($mset->size, 1, 'new message found');
166 $mset = $es->mset('b:"test message"');
167 is($mset->size, 1, 'old message found');
168 delete @$es{qw(git over xdb qp)}; # fork preparation
170 my $pi_cfg = PublicInbox::Config->new;
172 is(scalar($pi_cfg->ALL->mset('s:Testing')->items), 2,
173 '2 results in ->ALL');
176 $pi_cfg->each_inbox(sub {
179 local $SIG{__WARN__} = sub {}; # FIXME support --reindex
180 my $mset = $ibx->isrch->mset('s:Testing');
181 $res->{$ibx->eidx_key} = $ibx->isrch->mset_to_smsg($ibx, $mset);
183 is($nr, 2, 'two inboxes');
185 for my $v (qw(v1 v2)) {
186 my $ibx = $pi_cfg->lookup_newsgroup("$v.example");
187 my $smsg = $ibx->over->get_art(1);
189 $exp->{"$v.example"} = [ $smsg ];
191 is_deeply($res, $exp, 'isearch limited results');
192 $pi_cfg = $res = $exp = undef;
194 $opt->{0} = \($eml2->as_string);
195 ok(run_script([qw(-learn rm --all)], undef, $opt), '-learn rm');
197 ok(run_script([qw(-extindex --all), "$home/extindex"], undef, undef),
198 'extindex after rm');
199 is($es->over->get_art(2), undef, 'doc #2 gone');
200 $mset = $es->mset('b:"BEST MSG"');
201 is($mset->size, 0, 'new message gone');
204 my $misc = $es->misc;
205 my @it = $misc->mset('')->items;
206 is(scalar(@it), 2, 'two inboxes');
207 like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2');
208 like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1');
210 my $cfg = PublicInbox::Config->new;
211 my $schema_version = PublicInbox::Search::SCHEMA_VERSION();
212 my $f = "$home/extindex/ei$schema_version/over.sqlite3";
213 my $oidx = PublicInbox::OverIdx->new($f);
214 if ('inject w/o indexing') {
215 use PublicInbox::Import;
216 my $v1ibx = $cfg->lookup_name('v1test');
217 my $last_v1_commit = $v1ibx->mm->last_commit;
218 my $v2ibx = $cfg->lookup_name('v2test');
219 my $last_v2_commit = $v2ibx->mm->last_commit_xap($schema_version, 0);
220 my $git0 = PublicInbox::Git->new("$v2ibx->{inboxdir}/git/0.git");
221 chomp(my $cmt = $git0->qx(qw(rev-parse HEAD^0)));
222 is($last_v2_commit, $cmt, 'v2 index up-to-date');
224 my $v2im = PublicInbox::Import->new($git0, undef, undef, $v2ibx);
225 $v2im->{lock_path} = undef;
226 $v2im->{path_type} = 'v2';
227 $v2im->add(eml_load('t/mda-mime.eml'));
229 chomp(my $tip = $git0->qx(qw(rev-parse HEAD^0)));
230 isnt($tip, $cmt, '0.git v2 updated');
232 # inject a message w/o updating index
233 rename("$home/v1test/public-inbox", "$home/v1test/skip-index");
234 open(my $eh, '<', 't/iso-2202-jp.eml');
235 run_script(['-mda', '--no-precheck'], $env, { 0 => $eh}) or
237 rename("$home/v1test/skip-index", "$home/v1test/public-inbox");
239 my ($in, $out, $err);
240 $in = $out = $err = '';
241 my $opt = { 0 => \$in, 1 => \$out, 2 => \$err };
242 ok(run_script([qw(-extindex -v -v --all), "$home/extindex"],
243 undef, undef), 'extindex noop');
245 my $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example');
246 is($mset->size, 0, 'did not attempt to index unindexed v1 message');
247 $mset = $es->mset('mid:multipart-html-sucks@11');
248 is($mset->size, 0, 'did not attempt to index unindexed v2 message');
249 ok(run_script([qw(-index --all)]), 'indexed v1 and v2 inboxes');
251 isnt($v1ibx->mm->last_commit, $last_v1_commit, '-index v1 worked');
252 isnt($v2ibx->mm->last_commit_xap($schema_version, 0),
253 $last_v2_commit, '-index v2 worked');
254 ok(run_script([qw(-extindex --all), "$home/extindex"]),
258 $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example');
259 is($mset->size, 1, 'got v1 message');
260 $mset = $es->mset('mid:multipart-html-sucks@11');
261 is($mset->size, 1, 'got v2 message');
264 if ('reindex catches missed messages') {
265 my $v2ibx = $cfg->lookup_name('v2test');
266 $v2ibx->{-no_fsync} = 1;
267 my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
268 my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
269 my $eml = eml_load('t/data/0001.patch');
272 my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0);
273 isnt($cmt_a, $cmt_b, 'v2 0.git HEAD updated');
275 my $uv = $v2ibx->uidvalidity;
276 my $lc_key = "lc-v2:v2.example//$uv;0";
277 is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a,
278 'update lc-v2 meta, old is as expected');
279 my $max = $oidx->max;
281 ok(run_script([qw(-extindex), "$home/extindex", $v2ibx->{inboxdir}]),
283 is($oidx->max, $max, '->max unchanged');
284 is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 unchanged');
286 my $opt = { 2 => \(my $err = '') };
287 ok(run_script([qw(-extindex --reindex), "$home/extindex",
288 $v2ibx->{inboxdir}], undef, $opt),
289 '--reindex for unseen');
290 is($oidx->max, $max + 1, '->max bumped');
291 is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged');
292 my @err = split(/^/, $err);
293 is(scalar(@err), 1, 'only one warning') or diag "err=$err";
294 like($err[0], qr/# reindex_unseen/, 'got reindex_unseen message');
295 my $new = $oidx->get_art($max + 1);
296 is($new->{subject}, $eml->header('Subject'), 'new message added');
299 # git patch-id --stable <t/data/0001.patch | awk '{print $1}'
300 my $patchid = '91ee6b761fc7f47cad9f2b09b10489f313eb5b71';
301 my $mset = $es->search->mset("patchid:$patchid");
302 is($mset->size, 1, 'patchid search works');
304 $mset = $es->mset("mid:$new->{mid}");
305 is($mset->size, 1, 'previously unseen, now indexed in Xapian');
307 ok($im->remove($eml), 'remove new message from v2 inbox');
309 my $cmt_c = $v2ibx->mm->last_commit_xap($schema_version, 0);
310 is($oidx->eidx_meta($lc_key, $cmt_c), $cmt_b,
311 'bump lc-v2 meta again to skip v2 remove');
314 ok(run_script([qw(-extindex --reindex), "$home/extindex",
315 $v2ibx->{inboxdir}], undef, $opt),
316 '--reindex for stale');
317 @err = split(/^/, $err);
318 is(scalar(@err), 1, 'only one warning') or diag "err=$err";
319 like($err[0], qr/\(#$new->{num}\): stale/, 'got stale message warning');
320 is($oidx->get_art($new->{num}), undef,
321 'stale message gone from over');
322 is_deeply($oidx->get_xref3($new->{num}), [],
323 'stale message has no xref3');
325 $mset = $es->mset("mid:$new->{mid}");
326 is($mset->size, 0, 'stale mid gone Xapian');
328 ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"],
329 undef, $opt), '--reindex w/ --fast');
330 ok(!run_script([qw(-extindex --all --fast), "$home/extindex"],
331 undef, $opt), '--fast alone makes no sense');
334 if ('reindex catches content bifurcation') {
335 use PublicInbox::MID qw(mids);
336 my $v2ibx = $cfg->lookup_name('v2test');
337 $v2ibx->{-no_fsync} = 1;
338 my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
339 my $eml = eml_load('t/data/message_embed.eml');
340 my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
343 my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0);
344 my $uv = $v2ibx->uidvalidity;
345 my $lc_key = "lc-v2:v2.example//$uv;0";
347 is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a,
348 'update lc-v2 meta, old is as expected');
349 my $mid = mids($eml)->[0];
350 my $smsg = $v2ibx->over->next_by_mid($mid, \(my $id), \(my $prev));
351 my $oldmax = $oidx->max;
352 my $x3_orig = $oidx->get_xref3(3);
353 is(scalar(@$x3_orig), 1, '#3 has one xref');
354 $oidx->add_xref3(3, $smsg->{num}, $smsg->{blob}, 'v2.example');
355 my $x3 = $oidx->get_xref3(3);
356 is(scalar(@$x3), 2, 'injected xref3');
358 my $opt = { 2 => \(my $err = '') };
359 ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt),
360 'extindex --all is noop');
361 is($err, '', 'no warnings in index');
363 is($oidx->max, $oldmax, 'oidx->max unchanged');
365 ok(run_script([qw(-extindex --reindex --all), "$home/extindex"],
366 undef, $opt), 'extindex --reindex') or diag explain($opt);
368 ok($oidx->max > $oldmax, 'oidx->max bumped');
369 like($err, qr/split into 2 due to deduplication change/,
370 'bifurcation noted');
371 my $added = $oidx->get_art($oidx->max);
372 is($added->{blob}, $smsg->{blob}, 'new blob indexed');
373 is_deeply(["v2.example:$smsg->{num}:$smsg->{blob}"],
374 $oidx->get_xref3($added->{num}),
375 'xref3 corrected for bifurcated message');
376 is_deeply($oidx->get_xref3(3), $x3_orig, 'xref3 restored for #3');
379 if ('--reindex --rethread') {
380 my $before = $oidx->dbh->selectrow_array(<<'');
381 SELECT MAX(tid) FROM over WHERE num > 0
384 ok(run_script([qw(-extindex --reindex --rethread --all),
385 "$home/extindex"], undef, $opt),
387 my $after = $oidx->dbh->selectrow_array(<<'');
388 SELECT MIN(tid) FROM over WHERE num > 0
390 # actual rethread logic is identical to v1/v2 and tested elsewhere
391 ok($after > $before, '--rethread updates MIN(tid)');
394 if ('remove v1test and test gc') {
395 xsys([qw(git config --unset publicinbox.v1test.inboxdir)],
396 { GIT_CONFIG => $cfg_path });
397 my $opt = { 2 => \(my $err = '') };
398 ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $opt),
400 like($err, qr/^# remove #1 v1\.example /ms, 'removed v1 message');
401 is(scalar(grep(!/^#/, split(/^/m, $err))), 0,
402 'no non-informational messages');
403 $misc->{xdb}->reopen;
404 @it = $misc->mset('')->items;
405 is(scalar(@it), 1, 'only one inbox left');
408 if ('dedupe + dry-run') {
409 my @cmd = ('-extindex', "$home/extindex");
410 my $opt = { 2 => \(my $err = '') };
411 ok(run_script([@cmd, '--dedupe'], undef, $opt), '--dedupe');
412 ok(run_script([@cmd, qw(--dedupe --dry-run)], undef, $opt),
413 '--dry-run --dedupe');
414 is $err, '', 'no errors';
415 ok(!run_script([@cmd, qw(--dry-run)], undef, $opt),
416 '--dry-run alone fails');
419 # chmod 0755, $home or xbail "chmod: $!";
420 for my $j (1, 3, 6) {
421 my $o = { 2 => \(my $err = '') };
422 my $d = "$home/extindex-j$j";
423 ok(run_script(['-extindex', "-j$j", '--all', $d], undef, $o),
426 $max = 0 if $max < 0;
427 my @dirs = glob("$d/ei*/?");
428 like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works');
432 my $d = "$home/extindex-j1";
433 my $es = PublicInbox::ExtSearch->new($d);
434 ok(my $nresult0 = $es->mset('z:0..')->size, 'got results');
435 ok(ref($es->{xdb}), '{xdb} created');
436 my $nshards1 = $es->{nshard};
437 is($nshards1, 1, 'correct shard count');
439 my @ei_dir = glob("$d/ei*/");
440 chmod 0755, $ei_dir[0] or xbail "chmod: $!";
441 my $mode = sprintf('%04o', 07777 & (stat($ei_dir[0]))[2]);
442 is($mode, '0755', 'mode set on ei*/ dir');
443 my $o = { 2 => \(my $err = '') };
444 ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
445 my @dirs = glob("$d/ei*/?");
447 is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
448 my $m = sprintf('%04o', 07777 & (stat($dirs[$i]))[2]);
449 is($m, $mode, "shard [$i] mode");
451 delete @$es{qw(xdb qp)};
452 is($es->mset('z:0..')->size, $nresult0, 'new shards, same results');
455 is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
458 ok(run_script([qw(-xcpdb -R2), $d]), 'xcpdb -R2');
459 @dirs = glob("$d/ei*/?");
461 is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] kept");
464 is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
466 have_xapian_compact 1;
467 ok(run_script([qw(-compact), $d], undef, $o), 'compact');
468 # n.b. stderr contains xapian-compact output
470 my @d2 = glob("$d/ei*/?");
471 is_deeply(\@d2, \@dirs, 'dirs consistent after compact');
472 ok(run_script([qw(-extindex --dedupe --all), $d]),
473 '--dedupe works after compact');
474 ok(run_script([qw(-extindex --gc), $d], undef, $o),
475 '--gc works after compact');
478 { # ensure --gc removes non-xposted messages
479 my $old_size = -s $cfg_path // xbail "stat $cfg_path $!";
480 my $tmp_addr = 'v2tmp@example.com';
481 run_script([qw(-init v2tmp --indexlevel basic
482 --newsgroup v2tmp.example),
483 "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ])
485 $env = { ORIGINAL_RECIPIENT => $tmp_addr };
486 my $mid = 'tmpmsg@example.com';
492 Date: Tue, 19 Jan 2038 03:14:07 +0000
495 run_script([qw(-mda --no-precheck)], $env, {0 => $in}) or xbail '-mda';
496 ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update');
499 my $es = PublicInbox::ExtSearch->new("$home/extindex");
501 my $smsg = $es->over->next_by_mid($mid, \$id, \$prv);
502 ok($smsg, 'tmpmsg indexed');
503 my $mset = $es->search->mset("mid:$mid");
504 is($mset->size, 1, 'new message found');
505 $mset = $es->search->mset('z:0..');
508 truncate($cfg_path, $old_size);
509 my $rdr = { 2 => \(my $err) };
510 ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr),
511 'gc to get rid of removed inbox');
512 is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [],
513 'no non-informational errors in stderr');
515 my $es = PublicInbox::ExtSearch->new("$home/extindex");
516 my $mset = $es->search->mset("mid:$mid");
517 is($mset->size, 0, 'tmpmsg gone from search');
519 is($es->over->next_by_mid($mid, \$id, \$prv), undef,
520 'tmpmsg gone from over');
522 is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv),
523 undef, 'remaining message not indavderover');
524 $mset = $es->search->mset('z:0..');
525 is($mset->size, $nr - 1, 'existing messages not clobbered from search');
526 my $o = $es->over->{dbh}->selectall_arrayref(<<EOM);
527 SELECT num FROM over ORDER BY num
529 is(scalar(@$o), $mset->size, 'over row count matches Xapian');
530 my $x = $es->over->{dbh}->selectall_arrayref(<<EOM);
531 SELECT DISTINCT(docid) FROM xref3 ORDER BY docid
533 is_deeply($x, $o, 'xref3 and over docids match');
537 my $d = "$home/eidx-med";
538 ok(run_script([qw(-extindex --dangerous --all -L medium -j3), $d]),
539 'extindex medium init');
540 my $es = PublicInbox::ExtSearch->new($d);
541 is($es->xdb->get_metadata('indexlevel'), 'medium',
542 'es indexlevel before');
543 my @xdb = $es->xdb_shards_flat;
544 is($xdb[0]->get_metadata('indexlevel'), 'medium',
545 '0 indexlevel before');
548 ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard')
550 is($es->xdb->get_metadata('indexlevel'), 'medium', 'indexlevel before');
551 ok(run_script([qw(-xcpdb -R5), $d]), 'xcpdb R5');
552 $es = PublicInbox::ExtSearch->new($d);
553 is($es->xdb->get_metadata('indexlevel'), 'medium',
554 '0 indexlevel after');
555 @xdb = $es->xdb_shards_flat;
556 is(scalar(@xdb), 5, 'got 5 shards');
557 is($xdb[0]->get_metadata('indexlevel'), 'medium', '0 indexlevel after');
560 ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard')
565 my $d = "$home/extindex";
566 lei_ok('convert', '-o', "$home/md1", $d);
567 lei_ok('convert', '-o', "$home/md2", "extindex:$d");
569 my $cb = sub { push @$dst, $_[2]->as_string };
570 require PublicInbox::MdirReader;
571 PublicInbox::MdirReader->new->maildir_each_eml("$home/md1", $cb);
572 my @md1 = sort { $a cmp $b } @$dst;
573 ok(scalar(@md1), 'dumped messages to md1');
575 PublicInbox::MdirReader->new->maildir_each_eml("$home/md2", $cb);
576 @$dst = sort { $a cmp $b } @$dst;
577 is_deeply($dst, \@md1,
578 "convert from extindex w/ or w/o `extindex' prefix");
580 my @o = glob "$home/extindex/ei*/over.sqlite*";
582 ok(!lei('convert', '-o', "$home/fail", "extindex:$d"));
583 like($lei_err, qr/unindexed .*?not supported/,
584 'noted unindexed extindex is unsupported');