1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
6 use PublicInbox::MID qw(mid2path);
8 use PublicInbox::InboxWritable;
9 use PublicInbox::TestCommon;
10 use PublicInbox::Import;
11 use PublicInbox::IO qw(write_file);
12 use File::Path qw(remove_tree);
13 my ($tmpdir, $for_destroy) = tmpdir();
14 my $home = "$tmpdir/pi-home";
15 my $pi_home = "$home/.public-inbox";
16 my $pi_config = "$pi_home/config";
17 my $maindir = "$tmpdir/main.git";
18 my $main_bin = getcwd()."/t/main-bin";
19 my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
20 my $fail_bin = getcwd()."/t/fail-bin";
21 my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock
22 my $addr = 'test-public@example.com';
23 my $cfgpfx = "publicinbox.test";
24 my $faildir = "$home/faildir/";
25 my $git = PublicInbox::Git->new($maindir);
27 my $fail_bad_header = sub ($$$) {
28 my ($good_rev, $msg, $in) = @_;
29 my @f = glob("$faildir/*/*");
31 my ($out, $err) = ("", "");
32 my $opt = { 0 => \$in, 1 => \$out, 2 => \$err };
33 local $ENV{PATH} = $main_path;
34 ok(run_script(['-mda'], undef, $opt),
35 "no error on undeliverable ($msg)");
36 my $rev = $git->qx(qw(rev-list HEAD));
38 is($rev, $good_rev, "bad revision not committed ($msg)");
39 @f = glob("$faildir/*/*");
40 is(scalar @f, 1, "faildir written to");
45 ok(-x "$main_bin/spamc",
46 "spamc ham mock found (run in top of source tree");
47 ok(-x "$fail_bin/spamc",
48 "spamc mock found (run in top of source tree");
49 is(1, mkdir($home, 0755), "setup ~/ for testing");
50 is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
51 PublicInbox::Import::init_bare($maindir);
53 write_file '>>', $pi_config, <<EOF;
60 local $ENV{GIT_COMMITTER_NAME} = eval {
62 use PublicInbox::Address;
63 use Encode qw/encode/;
64 my $msg = eml_load 't/utf8.eml';
65 my $from = $msg->header('From');
66 my ($author) = PublicInbox::Address::names($from);
67 my ($email) = PublicInbox::Address::emails($from);
68 my $date = $msg->header('Date');
71 encode('us-ascii', my $tmp = $author, Encode::HTMLCREF),
72 'HTML conversion is correct');
73 is($email, 'e@example.com', 'email parsed correctly');
74 is($date, 'Thu, 01 Jan 1970 00:00:00 +0000',
75 'message date parsed correctly');
82 local $ENV{PI_EMERGENCY} = $faildir;
83 local $ENV{HOME} = $home;
84 local $ENV{ORIGINAL_RECIPIENT} = $addr;
85 ok(run_script([qw(-mda --help)], undef,
86 { 1 => \my $out, 2 => \my $err }), '-mda --help');
87 like $out, qr/usage:/, 'usage shown w/ --help';
88 ok(!run_script([qw(-mda --bogus)], undef,
89 { 1 => \$out, 2 => \$err }), '-mda --bogus fails');
90 like $err, qr/usage:/, 'usage shown on bogus switch';
93 From: Me <me\@example.com>
94 To: You <you\@example.com>
96 Message-Id: <blah\@example.com>
98 Date: Thu, 01 Jan 1970 00:00:00 +0000
102 local $ENV{PATH} = $main_path;
103 ok(!run_script(['-mda'], { ORIGINAL_RECIPIENT => undef },
104 { 0 => \$in, 2 => \$err }),
105 'missing ORIGINAL_RECIPIENT fails');
106 is($? >> 8, 67, 'got EX_NOUSER');
107 like $err, qr/\bORIGINAL_RECIPIENT\b/,
108 'ORIGINAL_RECIPIENT noted in stderr';
109 is unlink(glob("$faildir/*/*")), 1, 'unlinked failed message';
112 # ensure successful message delivery
114 local $ENV{PATH} = $main_path;
115 ok(run_script(['-mda'], undef, { 0 => \$in }));
116 my $rev = $git->qx(qw(rev-list HEAD));
117 like($rev, qr/\A[a-f0-9]{40,64}/, "good revision committed");
119 my $cmt = $git->cat_file($rev);
120 like($$cmt, qr/^author Me <me\@example\.com> 0 \+0000\n/m,
121 "author info set correctly");
122 like($$cmt, qr/^committer test <test-public\@example\.com>/m,
123 "committer info set correctly");
127 # ensure failures work, fail with bad spamc
129 my @prev = <$faildir/new/*>;
130 is(scalar @prev, 0 , "nothing in PI_EMERGENCY before");
131 local $ENV{PATH} = $fail_path;
132 ok(run_script(['-mda'], undef, { 0 => \$in }));
133 my @revs = $git->qx(qw(rev-list HEAD));
134 is(scalar @revs, 1, "bad revision not committed");
135 my @new = <$faildir/new/*>;
136 is(scalar @new, 1, "PI_EMERGENCY is written to");
139 $fail_bad_header->($good_rev, "bad recipient", <<"");
140 From: Me <me\@example.com>
141 To: You <you\@example.com>
142 Message-Id: <bad-recipient\@example.com>
144 Date: Thu, 01 Jan 1970 00:00:00 +0000
146 my $fail = $fail_bad_header->($good_rev, "duplicate Message-ID", <<"");
147 From: Me <me\@example.com>
148 To: You <you\@example.com>
150 Message-ID: <blah\@example.com>
152 Date: Thu, 01 Jan 1970 00:00:00 +0000
154 like($fail->[2], qr/CONFLICT/, "duplicate Message-ID message");
156 $fail_bad_header->($good_rev, "missing From:", <<"");
158 Message-ID: <missing-from\@example.com>
160 Date: Thu, 01 Jan 1970 00:00:00 +0000
162 $fail_bad_header->($good_rev, "short subject:", <<"");
164 From: cat\@example.com
165 Message-ID: <short-subject\@example.com>
167 Date: Thu, 01 Jan 1970 00:00:00 +0000
169 $fail_bad_header->($good_rev, "no date", <<"");
172 Message-ID: <no-date\@example.com>
175 $fail_bad_header->($good_rev, "bad date", <<"");
178 Message-ID: <bad-date\@example.com>
186 local $ENV{PI_EMERGENCY} = $faildir;
187 local $ENV{HOME} = $home;
188 local $ENV{ORIGINAL_RECIPIENT} = $addr;
189 local $ENV{PATH} = $main_path;
190 my $mid = 'spam-train@example.com';
192 From: Spammer <spammer\@example.com>
193 To: You <you\@example.com>
196 Subject: this message will be trained as spam
197 Date: Thu, 01 Jan 1970 00:00:00 +0000
201 # deliver the spam message, first
202 ok(run_script(['-mda'], undef, { 0 => \$in }));
203 my $path = mid2path($mid);
204 my $msg = $git->cat_file("HEAD:$path");
205 like($$msg, qr/\Q$mid\E/, "message delivered");
208 local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com';
209 local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com';
210 local $ENV{GIT_COMMITTER_NAME};
211 delete $ENV{GIT_COMMITTER_NAME};
212 ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }),
213 "no failure from learning spam");
214 ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }),
215 "no failure from learning spam idempotently");
221 local $ENV{PI_EMERGENCY} = $faildir;
222 local $ENV{HOME} = $home;
223 local $ENV{ORIGINAL_RECIPIENT} = $addr;
224 local $ENV{PATH} = $main_path;
225 my $mid = 'ham-train@example.com';
227 From: False-positive <hammer\@example.com>
228 To: You <you\@example.com>
231 Subject: this message will be trained as spam
232 Date: Thu, 01 Jan 1970 00:00:00 +0000
236 # these should be overridden
237 local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com';
238 local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com';
240 ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }),
241 "learned ham without failure");
242 my $path = mid2path($mid);
243 my $msg = $git->cat_file("HEAD:$path");
244 like($$msg, qr/\Q$mid\E/, "ham message delivered");
245 ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }),
246 "learned ham idempotently ");
248 # ensure trained email is filtered, too
249 my $mime = eml_load 't/mda-mime.eml';
250 ($mid) = ($mime->header_raw('message-id') =~ /<([^>]+)>/);
252 $in = $mime->as_string;
253 ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }),
254 "learned ham without failure");
255 my $path = mid2path($mid);
256 $msg = $git->cat_file("HEAD:$path");
257 like($$msg, qr/<\Q$mid\E>/, "ham message delivered");
258 unlike($$msg, qr/<html>/i, '<html> filtered');
262 # List-ID based delivery
264 local $ENV{PI_EMERGENCY} = $faildir;
265 local $ENV{HOME} = $home;
266 local $ENV{ORIGINAL_RECIPIENT} = undef;
267 delete $ENV{ORIGINAL_RECIPIENT};
268 local $ENV{PATH} = $main_path;
269 my $list_id = 'foo.example.com';
270 my $mid = 'list-id-delivery@example.com';
272 From: user <user\@example.com>
273 To: You <you\@example.com>
277 Subject: this message will be trained as spam
278 Date: Thu, 01 Jan 1970 00:00:00 +0000
281 xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", uc $list_id);
282 $? == 0 or die "failed to set listid $?";
283 ok(run_script(['-mda'], undef, { 0 => \$in }),
284 'mda OK with List-Id match');
285 my $path = mid2path($mid);
286 my $msg = $git->cat_file("HEAD:$path");
287 like($$msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches');
289 # try a message w/o precheck
291 To: You <you\@example.com>
294 this message would not be accepted without --no-precheck
296 my ($out, $err) = ('', '');
297 my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err };
298 ok(run_script(['-mda', '--no-precheck'], undef, $rdr),
299 'mda OK with List-Id match and --no-precheck');
300 my $cur = $git->qx(qw(diff HEAD~1..HEAD));
301 like($cur, qr/this message would not be accepted without --no-precheck/,
302 '--no-precheck delivered message anyways');
304 # try a message with multiple List-ID headers
308 Message-ID: <2lids\@example>
309 Subject: two List-IDs
310 From: user <user\@example.com>
312 Date: Fri, 02 Oct 1993 00:00:00 +0000
315 ($out, $err) = ('', '');
316 ok(run_script(['-mda'], undef, $rdr),
317 'mda OK with multiple List-Id matches');
318 $cur = $git->qx(qw(diff HEAD~1..HEAD));
319 like($cur, qr/^\+Message-ID: <2lids\@example>/sm,
320 'multi List-ID match delivered');
321 like($err, qr/multiple List-ID/, 'warned about multiple List-ID');
323 # ensure -learn rm works after inbox address is updated
324 ($out, $err) = ('', '');
325 xsys(qw(git config --file), $pi_config, "$cfgpfx.address",
326 $addr = 'updated-address@example.com');
327 ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn');
328 $cur = $git->qx(qw(diff HEAD~1..HEAD));
329 like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git');
331 # ensure we can strip List-Unsubscribe
333 To: You <you\@example.com>
335 Message-ID: <unsubscribe-1\@example>
336 Subject: unsubscribe-1
337 From: user <user\@example.com>
339 Date: Fri, 02 Oct 1993 00:00:00 +0000
340 List-Unsubscribe: <https://example.com/some-UUID-here/listname>
341 List-Unsubscribe-Post: List-Unsubscribe=One-Click
343 List-Unsubscribe should be stripped
345 write_file '>>', $pi_config, <<EOM;
347 dropUniqueUnsubscribe
350 ok(run_script([qw(-mda)], undef, $rdr), 'mda w/ dropUniqueUnsubscribe');
351 $cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD))));
352 like $cur, qr/Message-ID: <unsubscribe-1/, 'imported new message';
353 unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone';
354 unlike $cur, qr/List-Unsubscribe-Post/i, 'List-Unsubscribe-Post gone';
356 $in =~ s/unsubscribe-1/unsubscribe-2/g or xbail 'BUG: s// fail';
357 ok(run_script([qw(-learn ham)], undef, $rdr),
358 'learn ham w/ dropUniqueUnsubscribe');
359 $cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD))));
360 like $cur, qr/Message-ID: <unsubscribe-2/, 'learn ham';
361 unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone on learn ham';
362 unlike $cur, qr/List-Unsubscribe-Post/i,
363 'List-Unsubscribe-Post gone on learn ham';
367 require_mods(qw(DBD::SQLite Xapian), 1);
368 local $ENV{PI_EMERGENCY} = $faildir;
369 local $ENV{HOME} = $home;
370 local $ENV{PATH} = $main_path;
371 my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') };
372 ok(run_script([qw(-index -L medium), $maindir], undef, $rdr),
376 To: updated-address@example.com
377 Subject: this is a ham message for learn
378 Date: Fri, 02 Oct 1993 00:00:00 +0000
379 Message-ID: <medium-ham@example>
384 ok(run_script([qw(-learn ham)], undef, $rdr), 'learn medium ham');
385 is($err, '', 'nothing in stderr after medium -learn');
386 my $msg = $git->cat_file('HEAD:'.mid2path('medium-ham@example'));
387 like($$msg, qr/medium-ham/, 'medium ham added via -learn');
388 my @xap = grep(!m!/over\.sqlite3!,
389 glob("$maindir/public-inbox/xapian*/*"));
390 ok(remove_tree(@xap), 'rm Xapian files to convert to indexlevel=basic');
391 $in =~ s/medium-ham/basic-ham/g or xbail 'BUG: no s//';
392 ok(run_script([qw(-learn ham)], undef, $rdr), 'learn basic ham');
393 is($err, '', 'nothing in stderr after basic -learn');
394 $msg = $git->cat_file('HEAD:'.mid2path('basic-ham@example'));
395 like($$msg, qr/basic-ham/, 'basic ham added via -learn');
396 @xap = grep(!m!/over\.sqlite3!,
397 glob("$maindir/public-inbox/xapian*/*"));
398 is_deeply(\@xap, [], 'no Xapian files created by -learn');