NEWS: update for v14.5.1
[s-mailx.git] / spam.c
blob852b9c4ebf9d9eb3a074c3903e73c9c39c9b027c
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Spam related facilities.
4 * Copyright (c) 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #ifndef HAVE_AMALGAMATION
20 # include "nail.h"
21 #endif
23 EMPTY_FILE(spam)
24 #ifdef HAVE_SPAM
26 * TODO - We cannot use the spamc library because of our jumping behaviour.
27 * TODO We could nonetheless if we'd start a fork(2)ed child which would
28 * TODO use the spamc library.
29 * TODO -- In fact using a child process that is immune from the terrible
30 * TODO signal and jumping mess, and that controls further childs, and
31 * TODO gains file descriptors via sendmsg(2), and is started once it is
32 * TODO needed first, i have in mind for quite some time, for the transition
33 * TODO to a select(2) based implementation: we could slowly convert SMTP,
34 * TODO etc., finally IMAP, at which case we could rejoin back into a single
35 * TODO process program (unless we want to isolate the UI at that time, to
36 * TODO allow for some xUI protocol).
37 * TODO :: That is to say -- it's a horrible signal and jump mess ::
38 * TODO - We do not yet handle direct communication with spamd(1).
39 * TODO I.e., this could be a lean alternative to the first item;
40 * TODO the protocol is easy and we could support ALL operations easily.
41 * TODO - We do not yet update mails in place, i.e., replace the original
42 * TODO message with the updated one; we could easily do it as if `edit' has
43 * TODO been used, but the nail codebase doesn't truly support that for IMAP.
44 * TODO (And it seems a bit grazy to download a message, update it and upload
45 * TODO it again.)
48 enum spam_action {
49 _SPAM_RATE,
50 _SPAM_HAM,
51 _SPAM_SPAM,
52 _SPAM_FORGET
55 struct spam_vc {
56 struct message * mp;
57 size_t mno;
58 int action;
59 int __dummy;
60 char * comm_s;
61 char * buffer;
62 /* TODO This codebase jumps around and uses "stacks" of signal handling;
63 * TODO until some later time we have to play the same game */
64 sighandler_type otstp;
65 sighandler_type ottin;
66 sighandler_type ottou;
67 sighandler_type ohup;
68 sighandler_type opipe;
69 sighandler_type oint;
70 char const * comm_a[16];
73 /* Indices according to enum spam_action */
74 static char const _spam_comms[][16] = {
75 "spamrate", "spamham", "spamspam", "spamforget"
78 /* Shared action setup */
79 static bool_t _spam_action(enum spam_action sa, int *ip);
81 /* Convert a 2[.x]/whatever spam rate into message.m_spamscore */
82 static void _spam_rate2score(struct spam_vc *vc);
84 /* Interact with spamc(1) */
85 static bool_t _spam_interact(struct spam_vc *vc);
87 static bool_t
88 _spam_action(enum spam_action sa, int *ip)
90 struct spam_vc vc;
91 struct str str;
92 size_t maxsize;
93 char const *cp, **args;
94 bool_t ok = FAL0;
96 vc.action = sa;
98 if ((cp = voption("spam-command")) == NULL) {
99 #ifdef SPAMC_PATH
100 cp = SPAMC_PATH;
101 #else
102 fprintf(stderr, tr(514, "`%s': *spam-command* is not set\n"),
103 _spam_comms[sa]);
104 goto jleave;
105 #endif
108 /* Prepare the spamc(1) command line */
109 args = vc.comm_a;
110 *args++ = cp;
112 switch (sa) {
113 case _SPAM_RATE:
114 *args = "-c";
115 break;
116 case _SPAM_HAM:
117 args[1] = "ham";
118 goto jlearn;
119 case _SPAM_SPAM:
120 args[1] = "spam";
121 goto jlearn;
122 case _SPAM_FORGET:
123 args[1] = "forget";
124 jlearn:
125 *args = "-L";
126 ++args;
127 break;
129 ++args;
131 if ((cp = voption("spam-socket")) != NULL) {
132 *args++ = "-U";
133 *args++ = cp;
134 } else {
135 if ((cp = voption("spam-host")) != NULL) {
136 *args++ = "-d";
137 *args++ = cp;
139 if ((cp = voption("spam-port")) != NULL) {
140 *args++ = "-p";
141 *args++ = cp;
145 *args++ = "-l"; /* --log-to-stderr */
147 if ((cp = voption("spam-user")) != NULL) {
148 *args++ = "-u";
149 *args++ = cp;
152 *args = NULL;
153 vc.comm_s = str_concat_cpa(&str, vc.comm_a, " ")->s;
154 if (options & OPT_DEBUG)
155 fprintf(stderr, "spamc(1) via <%s>\n", vc.comm_s);
157 /* *spam-maxsize* we do handle ourselfs instead */
158 maxsize = 0;
159 if ((cp = voption("spam-maxsize")) != NULL)
160 maxsize = (size_t)strtol(cp, NULL, 10);
161 if (maxsize <= 0)
162 maxsize = SPAM_MAXSIZE;
164 /* Finally get an I/O buffer */
165 vc.buffer = salloc(BUFFER_SIZE);
167 for (ok = TRU1; *ip != 0; ++ip) {
168 vc.mno = (size_t)*ip - 1;
169 vc.mp = message + vc.mno;
170 if (sa == _SPAM_RATE)
171 vc.mp->m_spamscore = 0;
172 if (vc.mp->m_size > maxsize) {
173 if (options & OPT_VERBOSE)
174 fprintf(stderr, tr(515,
175 "`%s': message %lu exceeds maxsize (%lu > %lu), skip\n"),
176 _spam_comms[sa], (ul_it)vc.mno + 1,
177 (ul_it)vc.mp->m_size, (ul_it)maxsize);
178 continue;
180 if ((ok = _spam_interact(&vc)) == FAL0)
181 break;
183 #ifndef SPAMC_PATH
184 jleave:
185 #endif
186 return ! ok;
189 static void
190 _spam_rate2score(struct spam_vc *vc)
192 char *cp;
193 size_t size;
194 ui_it m, s;
196 cp = strchr(vc->buffer, '/');
197 if (cp == NULL)
198 goto jleave;
199 size = (size_t)(cp - vc->buffer);
200 vc->buffer[size] = '\0';
202 m = (ui_it)strtol(vc->buffer, &cp, 10);
203 if (cp == vc->buffer)
204 goto jleave;
206 s = (*cp == '\0') ? 0 : (ui_it)strtol(++cp, NULL, 10);
208 vc->mp->m_spamscore = (m << 8) | (s & 0xFF);
209 jleave:
213 static sigjmp_buf __spam_actjmp; /* TODO someday, we won't need it no more */
214 static int __spam_sig; /* TODO someday, we won't need it no more */
215 static void
216 __spam_onsig(int sig) /* TODO someday, we won't need it no more */
218 __spam_sig = sig;
219 siglongjmp(__spam_actjmp, 1);
222 static bool_t
223 _spam_interact(struct spam_vc *vc)
225 int p2c[2], c2p[2];
226 sigset_t cset;
227 size_t size;
228 pid_t pid;
229 FILE *ibuf;
230 enum {
231 _NONE = 0,
232 _SIGHOLD = 1<<0,
233 _P2C_0 = 1<<1,
234 _P2C_1 = 1<<2,
235 _P2C = _P2C_0 | _P2C_1,
236 _C2P_0 = 1<<3,
237 _C2P_1 = 1<<4,
238 _C2P = _C2P_0 | _C2P_1,
239 _JUMPED = 1<<5,
240 _RUNNING = 1<<6,
241 _GOODRUN = 1<<7,
242 _ERRORS = 1<<8
243 } state = _NONE;
245 setdot(vc->mp);
246 if ((ibuf = setinput(&mb, vc->mp, NEED_BODY)) == NULL) {
247 perror("setinput"); /* XXX tr() */
248 goto j_leave;
251 /* TODO Avoid that we jump away; yet necessary signal mess */
252 vc->otstp = safe_signal(SIGTSTP, SIG_DFL);
253 vc->ottin = safe_signal(SIGTTIN, SIG_DFL);
254 vc->ottou = safe_signal(SIGTTOU, SIG_DFL);
255 vc->opipe = safe_signal(SIGPIPE, SIG_IGN);
256 hold_sigs();
257 state |= _SIGHOLD;
258 vc->ohup = safe_signal(SIGHUP, &__spam_onsig);
259 vc->oint = safe_signal(SIGINT, &__spam_onsig);
260 /* Keep sigs blocked */
261 pid = 0; /* cc uninit */
263 if (! pipe_cloexec(p2c)) {
264 perror("pipe"); /* XXX tr() */
265 goto jleave;
267 state |= _P2C;
269 if (! pipe_cloexec(c2p)) {
270 perror("pipe"); /* XXX tr() */
271 goto jleave;
273 state |= _C2P;
275 if (sigsetjmp(__spam_actjmp, 1)) {
276 state |= _JUMPED;
277 goto jleave;
279 rele_sigs();
280 state &= ~_SIGHOLD;
282 sigemptyset(&cset);
283 pid = start_command(vc->comm_s, &cset, p2c[0], c2p[1], NULL, NULL, NULL);
284 state |= _RUNNING;
285 close(p2c[0]);
286 state &= ~_P2C_0;
288 /* Yes, we could sendmp(SEND_MBOX), but simply passing through the MBOX
289 * content does the same in effect, but is much more efficient.
290 * NOTE: this may mean we pass a message without From_ line! */
291 for (size = vc->mp->m_size; size > 0;) {
292 size_t i = fread(vc->buffer, 1, MIN(size, BUFFER_SIZE), ibuf);
293 if (i == 0) {
294 if (ferror(ibuf))
295 state |= _ERRORS;
296 break;
298 size -= i;
299 if (i != (size_t)write(p2c[1], vc->buffer, i)) {
300 state |= _ERRORS;
301 break;
305 jleave:
306 /* In what follows you see a lot of races; these can't be helped without
307 * atomic compare-and-swap; it only matters if we */
308 if (state & _SIGHOLD) {
309 state &= ~_SIGHOLD;
310 rele_sigs();
313 if (state & _P2C_0) {
314 state &= ~_P2C_0;
315 close(p2c[0]);
317 if (state & _C2P_1) {
318 state &= ~_C2P_1;
319 close(c2p[1]);
321 /* Close the write end, so that spamc(1) goes */
322 if (state & _P2C_1) {
323 state &= ~_P2C_1;
324 close(p2c[1]);
327 if (state & _RUNNING) {
328 state &= ~_RUNNING;
329 if (wait_child(pid, NULL))
330 state |= _GOODRUN;
333 /* XXX This only works because spamc(1) follows the clear protocol (1) read
334 * XXX everything until EOF on input, then (2) work, then (3) output
335 * XXX a single result line; otherwise we could deadlock here, but since
336 * TODO this is rather intermediate, go with it */
337 if (vc->action == _SPAM_RATE && ! (state & (_JUMPED | _ERRORS))) {
338 ssize_t i = read(c2p[0], vc->buffer, BUFFER_SIZE - 1);
339 if (i > 0) {
340 vc->buffer[i] = '\0';
341 _spam_rate2score(vc);
342 } else if (i != 0)
343 state |= _ERRORS;
346 if (state & _C2P_0) {
347 state &= ~_C2P_0;
348 close(c2p[0]);
351 if (vc->action == _SPAM_RATE) {
352 switch (state & (_JUMPED | _GOODRUN | _ERRORS)) {
353 case _GOODRUN:
354 vc->mp->m_flag &= ~MSPAM;
355 break;
356 case 0:
357 vc->mp->m_flag |= MSPAM;
358 default:
359 break;
361 } else {
362 if (state & (_JUMPED | _ERRORS))
363 /* xxx print message? */;
364 else if (vc->action == _SPAM_SPAM)
365 vc->mp->m_flag |= MSPAM;
366 else if (vc->action == _SPAM_HAM)
367 vc->mp->m_flag &= ~MSPAM;
370 safe_signal(SIGINT, vc->oint);
371 safe_signal(SIGHUP, vc->ohup);
372 safe_signal(SIGPIPE, vc->opipe);
373 safe_signal(SIGTSTP, vc->otstp);
374 safe_signal(SIGTTIN, vc->ottin);
375 safe_signal(SIGTTOU, vc->ottou);
377 /* Bounce jumps to the lex.c trampolines
378 * (i'd have never believed i'd ever say or even do something like this) */
379 if (state & _JUMPED) {
380 sigemptyset(&cset);
381 sigaddset(&cset, __spam_sig);
382 sigprocmask(SIG_UNBLOCK, &cset, NULL);
383 kill(0, __spam_sig);
385 j_leave:
386 return ! (state & _ERRORS);
389 FL int
390 cspam_clear(void *v)
392 int *ip;
394 for (ip = v; *ip != 0; ++ip)
395 message[(size_t)*ip - 1].m_flag &= ~MSPAM;
396 return 0;
399 FL int
400 cspam_set(void *v)
402 int *ip;
404 for (ip = v; *ip != 0; ++ip)
405 message[(size_t)*ip - 1].m_flag |= MSPAM;
406 return 0;
409 FL int
410 cspam_forget(void *v)
412 return _spam_action(_SPAM_FORGET, (int*)v) ? OKAY : STOP;
415 FL int
416 cspam_ham(void *v)
418 return _spam_action(_SPAM_HAM, (int*)v) ? OKAY : STOP;
421 FL int
422 cspam_rate(void *v)
424 return _spam_action(_SPAM_RATE, (int*)v) ? OKAY : STOP;
427 FL int
428 cspam_spam(void *v)
430 return _spam_action(_SPAM_SPAM, (int*)v) ? OKAY : STOP;
432 #endif /* HAVE_SPAM */
434 /* vim:set fenc=utf-8:s-it-mode */