Add -fno-strict-aliasing to prevent compile warnings on some systems.
[polipo.git] / forbidden.c
blobd89a72568e2f5e7a2701fe6225b0941b66b5be6c
1 /*
2 Copyright (c) 2003-2010 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 #ifndef NO_FORBIDDEN
27 #include <regex.h>
28 #include <assert.h>
30 typedef struct _Domain {
31 int length;
32 char domain[1];
33 } DomainRec, *DomainPtr;
35 AtomPtr forbiddenFile = NULL;
36 AtomPtr forbiddenUrl = NULL;
37 int forbiddenRedirectCode = 302;
39 AtomPtr redirector = NULL;
40 int redirectorRedirectCode = 302;
42 DomainPtr *forbiddenDomains = NULL;
43 regex_t *forbiddenRegex = NULL;
45 AtomPtr uncachableFile = NULL;
46 DomainPtr *uncachableDomains = NULL;
47 regex_t *uncachableRegex = NULL;
49 /* these three are only used internally by {parse,read}DomainFile */
50 /* to avoid having to pass it all as parameters */
51 static DomainPtr *domains;
52 static char *regexbuf;
53 static int rlen, rsize, dlen, dsize;
55 #ifndef NO_REDIRECTOR
56 static pid_t redirector_pid = 0;
57 static int redirector_read_fd = -1, redirector_write_fd = -1;
58 #define REDIRECTOR_BUFFER_SIZE 1024
59 static char *redirector_buffer = NULL;
60 RedirectRequestPtr redirector_request_first = NULL,
61 redirector_request_last = NULL;
62 #endif
64 static int atomSetterForbidden(ConfigVariablePtr, void*);
66 void
67 preinitForbidden(void)
69 CONFIG_VARIABLE_SETTABLE(forbiddenUrl, CONFIG_ATOM, configAtomSetter,
70 "URL to which forbidden requests "
71 "should be redirected.");
72 CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode, CONFIG_INT,
73 configIntSetter,
74 "Redirect code, 301 or 302.");
75 CONFIG_VARIABLE_SETTABLE(forbiddenFile, CONFIG_ATOM, atomSetterForbidden,
76 "File specifying forbidden URLs.");
77 #ifndef NO_REDIRECTOR
78 CONFIG_VARIABLE_SETTABLE(redirector, CONFIG_ATOM, atomSetterForbidden,
79 "Squid-style redirector.");
80 CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode, CONFIG_INT,
81 configIntSetter,
82 "Redirect code to use with redirector.");
83 #endif
84 CONFIG_VARIABLE_SETTABLE(uncachableFile, CONFIG_ATOM, atomSetterForbidden,
85 "File specifying uncachable URLs.");
88 static int
89 atomSetterForbidden(ConfigVariablePtr var, void *value)
91 initForbidden();
92 return configAtomSetter(var, value);
95 int
96 readDomainFile(char *filename)
98 FILE *in;
99 char buf[512];
100 char *rs;
101 int i, j, is_regex, start;
103 in = fopen(filename, "r");
104 if(in == NULL) {
105 if(errno != ENOENT)
106 do_log_error(L_ERROR, errno, "Couldn't open file %s", filename);
107 return -1;
110 while(1) {
111 rs = fgets(buf, 512, in);
112 if(rs == NULL)
113 break;
114 for(i = 0; i < 512; i++) {
115 if(buf[i] != ' ' && buf[i] != '\t')
116 break;
118 start = i;
119 for(i = start; i < 512; i++) {
120 if(buf[i] == '#' || buf[i] == '\r' || buf[i] == '\n')
121 break;
123 while(i > start) {
124 if(buf[i - 1] != ' ' && buf[i - 1] != '\t')
125 break;
126 i--;
129 if(i <= start)
130 continue;
132 /* The significant part of the line is now between start and i */
134 is_regex = 0;
135 for(j = start; j < i; j++) {
136 if(buf[j] == '\\' || buf[j] == '*' || buf[j] == '/') {
137 is_regex = 1;
138 break;
142 if(is_regex) {
143 while(rlen + i - start + 8 >= rsize) {
144 char *new_regexbuf;
145 new_regexbuf = realloc(regexbuf, rsize * 2 + 1);
146 if(new_regexbuf == NULL) {
147 do_log(L_ERROR, "Couldn't reallocate regex.\n");
148 fclose(in);
149 return -1;
151 regexbuf = new_regexbuf;
152 rsize = rsize * 2 + 1;
154 if(rlen != 0)
155 rlen = snnprintf(regexbuf, rlen, rsize, "|");
156 rlen = snnprintf(regexbuf, rlen, rsize, "(");
157 rlen = snnprint_n(regexbuf, rlen, rsize, buf + start, i - start);
158 rlen = snnprintf(regexbuf, rlen, rsize, ")");
159 } else {
160 DomainPtr new_domain;
161 if(dlen >= dsize - 1) {
162 DomainPtr *new_domains;
163 new_domains = realloc(domains, (dsize * 2 + 1) *
164 sizeof(DomainPtr));
165 if(new_domains == NULL) {
166 do_log(L_ERROR,
167 "Couldn't reallocate domain list.\n");
168 fclose(in);
169 return -1;
171 domains = new_domains;
172 dsize = dsize * 2 + 1;
174 new_domain = malloc(sizeof(DomainRec) - 1 + i - start);
175 if(new_domain == NULL) {
176 do_log(L_ERROR, "Couldn't allocate domain.\n");
177 fclose(in);
178 return -1;
180 new_domain->length = i - start;
181 memcpy(new_domain->domain, buf + start, i - start);
182 domains[dlen++] = new_domain;
185 fclose(in);
186 return 1;
189 void
190 parseDomainFile(AtomPtr file,
191 DomainPtr **domains_return, regex_t **regex_return)
193 struct stat ss;
194 int rc;
196 if(*domains_return) {
197 DomainPtr *domain = *domains_return;
198 while(*domain) {
199 free(*domain);
200 domain++;
202 free(*domains_return);
203 *domains_return = NULL;
206 if(*regex_return) {
207 regfree(*regex_return);
208 *regex_return = NULL;
211 if(!file || file->length == 0)
212 return;
214 domains = malloc(64 * sizeof(DomainPtr));
215 if(domains == NULL) {
216 do_log(L_ERROR, "Couldn't allocate domain list.\n");
217 return;
219 dlen = 0;
220 dsize = 64;
222 regexbuf = malloc(512);
223 if(regexbuf == NULL) {
224 do_log(L_ERROR, "Couldn't allocate regex.\n");
225 free(domains);
226 return;
228 rlen = 0;
229 rsize = 512;
231 rc = stat(file->string, &ss);
232 if(rc < 0) {
233 if(errno != ENOENT)
234 do_log_error(L_WARN, errno, "Couldn't stat file %s", file->string);
235 } else {
236 if(!S_ISDIR(ss.st_mode))
237 readDomainFile(file->string);
238 else {
239 char *fts_argv[2];
240 FTS *fts;
241 FTSENT *fe;
242 fts_argv[0] = file->string;
243 fts_argv[1] = NULL;
244 fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
245 if(fts) {
246 while(1) {
247 fe = fts_read(fts);
248 if(!fe) break;
249 if(fe->fts_info != FTS_D && fe->fts_info != FTS_DP &&
250 fe->fts_info != FTS_DC && fe->fts_info != FTS_DNR)
251 readDomainFile(fe->fts_accpath);
253 fts_close(fts);
254 } else {
255 do_log_error(L_ERROR, errno,
256 "Couldn't scan directory %s", file->string);
261 if(dlen > 0) {
262 domains[dlen] = NULL;
263 } else {
264 free(domains);
265 domains = NULL;
268 regex_t *regex;
270 if(rlen > 0) {
271 regex = malloc(sizeof(regex_t));
272 rc = regcomp(regex, regexbuf, REG_EXTENDED | REG_NOSUB);
273 if(rc != 0) {
274 do_log(L_ERROR, "Couldn't compile regex: %d.\n", rc);
275 free(regex);
276 regex = NULL;
278 } else {
279 regex = NULL;
281 free(regexbuf);
283 *domains_return = domains;
284 *regex_return = regex;
286 return;
289 void
290 initForbidden(void)
292 redirectorKill();
294 if(forbiddenFile)
295 forbiddenFile = expandTilde(forbiddenFile);
297 if(forbiddenFile == NULL) {
298 forbiddenFile = expandTilde(internAtom("~/.polipo-forbidden"));
299 if(forbiddenFile) {
300 if(access(forbiddenFile->string, F_OK) < 0) {
301 releaseAtom(forbiddenFile);
302 forbiddenFile = NULL;
307 if(forbiddenFile == NULL) {
308 if(access("/etc/polipo/forbidden", F_OK) >= 0)
309 forbiddenFile = internAtom("/etc/polipo/forbidden");
312 parseDomainFile(forbiddenFile, &forbiddenDomains, &forbiddenRegex);
315 if(uncachableFile)
316 uncachableFile = expandTilde(uncachableFile);
318 if(uncachableFile == NULL) {
319 uncachableFile = expandTilde(internAtom("~/.polipo-uncachable"));
320 if(uncachableFile) {
321 if(access(uncachableFile->string, F_OK) < 0) {
322 releaseAtom(uncachableFile);
323 uncachableFile = NULL;
328 if(uncachableFile == NULL) {
329 if(access("/etc/polipo/uncachable", F_OK) >= 0)
330 uncachableFile = internAtom("/etc/polipo/uncachable");
333 parseDomainFile(uncachableFile, &uncachableDomains, &uncachableRegex);
335 return;
339 urlIsMatched(char *url, int length, DomainPtr *domains, regex_t *regex)
341 /* This requires url to be NUL-terminated. */
342 assert(url[length] == '\0');
344 if(length < 8)
345 return 0;
347 if(memcmp(url, "http://", 7) != 0)
348 return 0;
350 if(domains) {
351 int i;
352 DomainPtr *domain;
353 for(i = 8; i < length; i++) {
354 if(url[i] == '/')
355 break;
357 domain = domains;
358 while(*domain) {
359 if((*domain)->length <= (i - 7) &&
360 (url[i - (*domain)->length - 1] == '.' ||
361 url[i - (*domain)->length - 1] == '/') &&
362 memcmp(url + i - (*domain)->length,
363 (*domain)->domain,
364 (*domain)->length) == 0)
365 return 1;
366 domain++;
370 if(regex)
371 return !regexec(regex, url, 0, NULL, 0);
373 return 0;
377 urlIsUncachable(char *url, int length)
379 return urlIsMatched(url, length, uncachableDomains, uncachableRegex);
383 urlForbidden(AtomPtr url,
384 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
385 void *closure)
387 int forbidden = urlIsMatched(url->string, url->length,
388 forbiddenDomains, forbiddenRegex);
389 int code = 0;
390 AtomPtr message = NULL, headers = NULL;
393 if(forbidden) {
394 message = internAtomF("Forbidden URL %s", url->string);
395 if(forbiddenUrl) {
396 code = forbiddenRedirectCode;
397 headers = internAtomF("\r\nLocation: %s", forbiddenUrl->string);
398 } else {
399 code = 403;
403 #ifndef NO_REDIRECTOR
404 if(code == 0 && redirector) {
405 RedirectRequestPtr request;
406 request = malloc(sizeof(RedirectRequestRec));
407 if(request == NULL) {
408 do_log(L_ERROR, "Couldn't allocate redirect request.\n");
409 goto done;
411 request->url = url;
412 request->handler = handler;
413 request->data = closure;
414 if(redirector_request_first == NULL)
415 redirector_request_first = request;
416 else
417 redirector_request_last->next = request;
418 redirector_request_last = request;
419 request->next = NULL;
420 if(request == redirector_request_first)
421 redirectorTrigger();
422 return 1;
425 #endif
427 done:
428 handler(code, url, message, headers, closure);
429 return 1;
432 #ifndef NO_REDIRECTOR
433 static void
434 logExitStatus(int status)
436 if(WIFEXITED(status) && WEXITSTATUS(status) == 142)
437 /* See child code in runRedirector */
438 do_log(L_ERROR, "Couldn't start redirector.\n");
439 else {
440 char *reason =
441 WIFEXITED(status) ? "with status" :
442 WIFSIGNALED(status) ? "on signal" :
443 "with unknown status";
444 int value =
445 WIFEXITED(status) ? WEXITSTATUS(status) :
446 WIFSIGNALED(status) ? WTERMSIG(status) :
447 status;
448 do_log(L_ERROR,
449 "Redirector exited %s %d.\n", reason, value);
453 void
454 redirectorKill(void)
456 int rc, status, dead;
458 if(redirector_read_fd >= 0) {
459 rc = waitpid(redirector_pid, &status, WNOHANG);
460 dead = (rc > 0);
461 close(redirector_read_fd);
462 redirector_read_fd = -1;
463 close(redirector_write_fd);
464 redirector_write_fd = -1;
465 if(!dead) {
466 rc = kill(redirector_pid, SIGTERM);
467 if(rc < 0 && errno != ESRCH) {
468 do_log_error(L_ERROR, errno, "Couldn't kill redirector");
469 redirector_pid = -1;
470 return;
472 do {
473 rc = waitpid(redirector_pid, &status, 0);
474 } while(rc < 0 && errno == EINTR);
475 if(rc < 0)
476 do_log_error(L_ERROR, errno,
477 "Couldn't wait for redirector's death");
478 } else
479 logExitStatus(status);
480 redirector_pid = -1;
484 static void
485 redirectorDestroyRequest(RedirectRequestPtr request)
487 assert(redirector_request_first == request);
488 redirector_request_first = request->next;
489 if(redirector_request_first == NULL)
490 redirector_request_last = NULL;
491 free(request);
494 void
495 redirectorTrigger(void)
497 RedirectRequestPtr request = redirector_request_first;
498 int rc;
500 if(!request)
501 return;
503 if(redirector_read_fd < 0) {
504 rc = runRedirector(&redirector_pid,
505 &redirector_read_fd, &redirector_write_fd);
506 if(rc < 0) {
507 request->handler(rc, request->url, NULL, NULL, request->data);
508 redirectorDestroyRequest(request);
509 return;
512 do_stream_2(IO_WRITE, redirector_write_fd, 0,
513 request->url->string, request->url->length,
514 "\n", 1,
515 redirectorStreamHandler1, request);
519 redirectorStreamHandler1(int status,
520 FdEventHandlerPtr event,
521 StreamRequestPtr srequest)
523 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
525 if(status) {
526 if(status >= 0)
527 status = -EPIPE;
528 do_log_error(L_ERROR, -status, "Write to redirector failed");
529 goto fail;
532 if(!streamRequestDone(srequest))
533 return 0;
535 do_stream(IO_READ, redirector_read_fd, 0,
536 redirector_buffer, REDIRECTOR_BUFFER_SIZE,
537 redirectorStreamHandler2, request);
538 return 1;
540 fail:
541 request->handler(status < 0 ? status : -EPIPE,
542 request->url, NULL, NULL, request->data);
543 redirectorDestroyRequest(request);
544 redirectorKill();
545 return 1;
549 redirectorStreamHandler2(int status,
550 FdEventHandlerPtr event,
551 StreamRequestPtr srequest)
553 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
554 char *c;
555 AtomPtr message;
556 AtomPtr headers;
557 int code;
559 if(status < 0) {
560 do_log_error(L_ERROR, -status, "Read from redirector failed");
561 request->handler(status, request->url, NULL, NULL, request->data);
562 goto kill;
564 c = memchr(redirector_buffer, '\n', srequest->offset);
565 if(!c) {
566 if(!status && srequest->offset < REDIRECTOR_BUFFER_SIZE)
567 return 0;
568 do_log(L_ERROR, "Redirector returned incomplete reply.\n");
569 request->handler(-EREDIRECTOR, request->url, NULL, NULL, request->data);
570 goto kill;
572 *c = '\0';
574 if(srequest->offset > c + 1 - redirector_buffer)
575 do_log(L_WARN, "Stray bytes in redirector output.\n");
577 if(c > redirector_buffer + 1 &&
578 (c - redirector_buffer != request->url->length ||
579 memcmp(redirector_buffer, request->url->string,
580 request->url->length) != 0)) {
581 code = redirectorRedirectCode;
582 message = internAtom("Redirected by external redirector");
583 if(message == NULL) {
584 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
585 goto kill;
588 headers = internAtomF("\r\nLocation: %s", redirector_buffer);
589 if(headers == NULL) {
590 releaseAtom(message);
591 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
592 goto kill;
594 } else {
595 code = 0;
596 message = NULL;
597 headers = NULL;
599 request->handler(code, request->url,
600 message, headers, request->data);
601 goto cont;
603 cont:
604 redirectorDestroyRequest(request);
605 redirectorTrigger();
606 return 1;
608 kill:
609 redirectorKill();
610 goto cont;
614 runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return)
616 int rc, rc2, status;
617 pid_t pid;
618 int filedes1[2], filedes2[2];
619 sigset_t ss, old_mask;
621 assert(redirector);
623 if(redirector_buffer == NULL) {
624 redirector_buffer = malloc(REDIRECTOR_BUFFER_SIZE);
625 if(redirector_buffer == NULL)
626 return -errno;
629 rc = pipe(filedes1);
630 if(rc < 0) {
631 rc = -errno;
632 goto fail1;
636 rc = pipe(filedes2);
637 if(rc < 0) {
638 rc = -errno;
639 goto fail2;
642 fflush(stdout);
643 fflush(stderr);
644 flushLog();
646 interestingSignals(&ss);
647 do {
648 rc = sigprocmask(SIG_BLOCK, &ss, &old_mask);
649 } while (rc < 0 && errno == EINTR);
650 if(rc < 0) {
651 rc = -errno;
652 goto fail3;
655 pid = fork();
656 if(pid < 0) {
657 rc = -errno;
658 goto fail4;
661 if(pid > 0) {
662 do {
663 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
664 } while(rc < 0 && errno == EINTR);
666 if(rc < 0) {
667 rc = -errno;
668 goto fail4;
671 rc = setNonblocking(filedes1[1], 1);
672 if(rc >= 0)
673 rc = setNonblocking(filedes2[0], 1);
674 if(rc < 0) {
675 rc = -errno;
676 goto fail4;
679 /* This is completely unnecesary -- if the redirector cannot be
680 started, redirectorStreamHandler1 will get EPIPE straight away --,
681 but it improves error messages somewhat. */
682 rc = waitpid(pid, &status, WNOHANG);
683 if(rc > 0) {
684 logExitStatus(status);
685 rc = -EREDIRECTOR;
686 goto fail4;
687 } else if(rc < 0) {
688 rc = -errno;
689 goto fail4;
692 *read_fd_return = filedes2[0];
693 *write_fd_return = filedes1[1];
695 *pid_return = pid;
696 /* This comes at the end so that the fail* labels can work */
697 close(filedes1[0]);
698 close(filedes2[1]);
699 } else {
700 close(filedes1[1]);
701 close(filedes2[0]);
702 uninitEvents();
703 do {
704 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
705 } while (rc < 0 && errno == EINTR);
706 if(rc < 0)
707 exit(142);
709 if(filedes1[0] != 0)
710 dup2(filedes1[0], 0);
711 if(filedes2[1] != 1)
712 dup2(filedes2[1], 1);
714 execlp(redirector->string, redirector->string, NULL);
715 exit(142);
716 /* NOTREACHED */
718 return 1;
720 fail4:
721 do {
722 rc2 = sigprocmask(SIG_SETMASK, &old_mask, NULL);
723 } while(rc2 < 0 && errno == EINTR);
724 fail3:
725 close(filedes2[0]);
726 close(filedes2[1]);
727 fail2:
728 close(filedes1[0]);
729 close(filedes1[1]);
730 fail1:
731 free(redirector_buffer);
732 redirector_buffer = NULL;
733 return rc;
736 #else
738 void
739 redirectorKill(void)
741 return;
744 #endif
746 #else
748 void
749 preinitForbidden()
751 return;
754 void
755 initForbidden()
757 return;
761 urlIsUncachable(char *url, int length)
763 return 0;
767 urlForbidden(AtomPtr url,
768 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
769 void *closure)
771 handler(0, url, NULL, NULL, closure);
772 return 1;
775 #endif