Add -fno-strict-aliasing to prevent compile warnings on some systems.
[polipo.git] / http_parse.c
blob63dca7e7393de1c78da1aa2b1ebeeb926a7f3063
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 static int getNextWord(const char *buf, int i, int *x_return, int *y_return);
26 static int getNextToken(const char *buf, int i, int *x_return, int *y_return);
27 static int getNextTokenInList(const char *buf, int i,
28 int *x_return, int *y_return,
29 int *z_return, int *t_return,
30 int *end_return);
32 static AtomPtr atomConnection, atomProxyConnection, atomContentLength,
33 atomHost, atomAcceptRange, atomTE,
34 atomReferer, atomProxyAuthenticate, atomProxyAuthorization,
35 atomKeepAlive, atomTrailer, atomUpgrade, atomDate, atomExpires,
36 atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified,
37 atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding,
38 atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange,
39 atomVia, atomVary, atomExpect, atomAuthorization,
40 atomSetCookie, atomCookie, atomCookie2,
41 atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation,
42 atomXPolipoBodyOffset;
44 AtomPtr atomContentType, atomContentEncoding;
46 int censorReferer = 0;
47 int laxHttpParser = 1;
49 static AtomListPtr censoredHeaders;
51 void
52 preinitHttpParser()
54 CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter,
55 "Censor referer headers.");
56 censoredHeaders = makeAtomList(NULL, 0);
57 if(censoredHeaders == NULL) {
58 do_log(L_ERROR, "Couldn't allocate censored atoms.\n");
59 exit(1);
61 CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER,
62 "Headers to censor.");
63 CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter,
64 "Ignore unknown HTTP headers.");
67 void
68 initHttpParser()
70 #define A(name, value) name = internAtom(value); if(!name) goto fail;
71 /* These must be in lower-case */
72 A(atomConnection, "connection");
73 A(atomProxyConnection, "proxy-connection");
74 A(atomContentLength, "content-length");
75 A(atomHost, "host");
76 A(atomAcceptRange, "accept-range");
77 A(atomTE, "te");
78 A(atomReferer, "referer");
79 A(atomProxyAuthenticate, "proxy-authenticate");
80 A(atomProxyAuthorization, "proxy-authorization");
81 A(atomKeepAlive, "keep-alive");
82 A(atomTrailer, "trailer");
83 A(atomUpgrade, "upgrade");
84 A(atomDate, "date");
85 A(atomExpires, "expires");
86 A(atomIfModifiedSince, "if-modified-since");
87 A(atomIfUnmodifiedSince, "if-unmodified-since");
88 A(atomIfRange, "if-range");
89 A(atomLastModified, "last-modified");
90 A(atomIfMatch, "if-match");
91 A(atomIfNoneMatch, "if-none-match");
92 A(atomAge, "age");
93 A(atomTransferEncoding, "transfer-encoding");
94 A(atomETag, "etag");
95 A(atomCacheControl, "cache-control");
96 A(atomPragma, "pragma");
97 A(atomContentRange, "content-range");
98 A(atomRange, "range");
99 A(atomVia, "via");
100 A(atomContentType, "content-type");
101 A(atomContentEncoding, "content-encoding");
102 A(atomVary, "vary");
103 A(atomExpect, "expect");
104 A(atomAuthorization, "authorization");
105 A(atomSetCookie, "set-cookie");
106 A(atomCookie, "cookie");
107 A(atomCookie2, "cookie2");
108 A(atomXPolipoDate, "x-polipo-date");
109 A(atomXPolipoAccess, "x-polipo-access");
110 A(atomXPolipoLocation, "x-polipo-location");
111 A(atomXPolipoBodyOffset, "x-polipo-body-offset");
112 #undef A
113 return;
115 fail:
116 do_log(L_ERROR, "Couldn't allocate atom.\n");
117 exit(1);
120 static int
121 getNextWord(const char *restrict buf, int i, int *x_return, int *y_return)
123 int x, y;
124 while(buf[i] == ' ') i++;
125 if(buf[i] == '\n' || buf[i] == '\r') return -1;
126 x = i;
127 while(buf[i] > 32 && buf[i] < 127) i++;
128 y = i;
130 *x_return = x;
131 *y_return = y;
133 return 0;
136 static int
137 skipComment(const char *restrict buf, int i)
139 assert(buf[i] == '(');
141 i++;
142 while(1) {
143 if(buf[i] == '\\' && buf[i + 1] == ')') i+=2;
144 else if(buf[i] == ')') return i + 1;
145 else if(buf[i] == '\n') {
146 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
147 i += 2;
148 else
149 return -1;
150 } else if(buf[i] == '\r') {
151 if(buf[i + 1] != '\n') return -1;
152 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
153 i += 3;
154 else
155 return -1;
156 } else {
157 i++;
160 return i;
164 static int
165 skipWhitespace(const char *restrict buf, int i)
167 while(1) {
168 if(buf[i] == ' ' || buf[i] == '\t')
169 i++;
170 else if(buf[i] == '(') {
171 i = skipComment(buf, i);
172 if(i < 0) return -1;
173 } else if(buf[i] == '\n') {
174 if(buf[i + 1] == ' ' || buf[i + 1] == '\t')
175 i += 2;
176 else
177 return i;
178 } else if(buf[i] == '\r' && buf[i + 1] == '\n') {
179 if(buf[i + 2] == ' ' || buf[i + 2] == '\t')
180 i += 3;
181 else
182 return i;
183 } else
184 return i;
188 static int
189 getNextToken(const char *restrict buf, int i, int *x_return, int *y_return)
191 int x, y;
192 again:
193 while(buf[i] == ' ' || buf[i] == '\t')
194 i++;
195 if(buf[i] == '(') {
196 i++;
197 while(buf[i] != ')') {
198 if(buf[i] == '\n' || buf[i] == '\r')
199 return -1;
200 if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r')
201 buf += 2;
202 else
203 buf++;
205 goto again;
207 if(buf[i] == '\n') {
208 if(buf[i + 1] == ' ' || buf[i + 1] == '\t') {
209 i += 2;
210 goto again;
211 } else {
212 return -1;
215 if(buf[i] == '\r') {
216 if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) {
217 i += 3;
218 goto again;
219 } else {
220 return -1;
223 x = i;
224 while(buf[i] > 32 && buf[i] < 127) {
225 switch(buf[i]) {
226 case '(': case ')': case '<': case '>': case '@':
227 case ',': case ';': case ':': case '\\': case '/':
228 case '[': case ']': case '?': case '=':
229 case '{': case '}': case ' ': case '\t':
230 goto out;
231 default:
232 i++;
235 out:
236 y = i;
238 *x_return = x;
239 *y_return = y;
241 return y;
244 static int
245 getNextETag(const char * restrict buf, int i,
246 int *x_return, int *y_return, int *weak_return)
248 int weak = 0;
249 int x, y;
250 while(buf[i] == ' ' || buf[i] == '\t')
251 i++;
252 if(buf[i] == 'W' && buf[i + 1] == '/') {
253 weak = 1;
254 i += 2;
256 if(buf[i] == '"')
257 i++;
258 else
259 return -1;
261 x = i;
262 while(buf[i] != '"') {
263 if(buf[i] == '\r' || buf[i] == '\n')
264 return -1;
265 i++;
267 y = i;
268 i++;
270 *x_return = x;
271 *y_return = y;
272 *weak_return = weak;
273 return i;
276 static int
277 getNextTokenInList(const char *restrict buf, int i,
278 int *x_return, int *y_return,
279 int *z_return, int *t_return,
280 int *end_return)
282 int j, x, y, z = -1, t = -1, end;
283 j = getNextToken(buf, i, &x, &y);
284 if(j < 0)
285 return -1;
286 while(buf[j] == ' ' || buf[j] == '\t')
287 j++;
289 if(buf[j] == '=') {
290 j++;
291 while(buf[j] == ' ' || buf[j] == '\t')
292 j++;
293 z = j;
294 while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r')
295 j++;
298 if(buf[j] == '\n' || buf[j] == '\r') {
299 if(buf[j] == '\r') {
300 if(buf[j + 1] != '\n')
301 return -1;
302 j += 2;
303 } else
304 j++;
305 end = 1;
306 if(buf[j] == ' ' || buf[j] == '\t') {
307 while(buf[j] == ' ' || buf[j] == '\t')
308 j++;
309 end = 0;
311 } else if(buf[j] == ',') {
312 j++;
313 while(buf[j] == ' ' || buf[j] == '\t')
314 j++;
315 end = 0;
316 } else {
317 return -1;
320 *x_return = x;
321 *y_return = y;
322 if(z_return)
323 *z_return = z;
324 if(t_return)
325 *t_return = t;
326 *end_return = end;
327 return j;
330 static inline int
331 token_compare(const char *buf, int start, int end, const char *s)
333 return (strcasecmp_n(s, buf + start, end - start) == 0);
336 static int
337 skipEol(const char *restrict buf, int i)
339 while(buf[i] == ' ')
340 i++;
341 if(buf[i] == '\n')
342 return i + 1;
343 else if(buf[i] == '\r') {
344 if(buf[i + 1] == '\n')
345 return i + 2;
346 else
347 return -1;
348 } else {
349 return -1;
353 static int
354 skipToEol(const char *restrict buf, int i, int *start_return)
356 while(buf[i] != '\n' && buf[i] != '\r')
357 i++;
358 if(buf[i] == '\n') {
359 *start_return = i;
360 return i + 1;
361 } else if(buf[i] == '\r') {
362 if(buf[i + 1] == '\n') {
363 *start_return = i;
364 return i + 2;
365 } else {
366 return -1;
369 return -1;
372 static int
373 getHeaderValue(const char *restrict buf, int start,
374 int *value_start_return, int *value_end_return)
376 int i, j, k;
378 while(buf[start] == ' ' || buf[start] == '\t')
379 start++;
380 i = start;
381 again:
382 j = skipToEol(buf, i, &k);
383 if(j < 0)
384 return -1;
385 if(buf[j] == ' ' || buf[j] == '\t') {
386 i = j + 1;
387 goto again;
389 *value_start_return = start;
390 *value_end_return = k;
391 return j;
395 httpParseClientFirstLine(const char *restrict buf, int offset,
396 int *method_return,
397 AtomPtr *url_return,
398 int *version_return)
400 int i = 0;
401 int x, y;
402 int method;
403 AtomPtr url;
404 int version = HTTP_UNKNOWN;
405 int eol;
407 i = offset;
408 i = getNextWord(buf, i, &x, &y);
409 if(i < 0) return -1;
410 if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0)
411 method = METHOD_GET;
412 else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0)
413 method = METHOD_HEAD;
414 else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0)
415 method = METHOD_POST;
416 else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0)
417 method = METHOD_PUT;
418 else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0)
419 method = METHOD_CONNECT;
420 else
421 method = METHOD_UNKNOWN;
423 i = getNextWord(buf, y + 1, &x, &y);
424 if(i < 0) return -1;
426 url = internAtomN(buf + x, y - x);
428 i = getNextWord(buf, y + 1, &x, &y);
429 if(i < 0) {
430 releaseAtom(url);
431 return -1;
434 if(y == x + 8) {
435 if(memcmp(buf + x, "HTTP/1.", 7) != 0)
436 version = HTTP_UNKNOWN;
437 else if(buf[x + 7] == '0')
438 version = HTTP_10;
439 else if(buf[x + 7] >= '1' && buf[x + 7] <= '9')
440 version = HTTP_11;
441 else
442 version = HTTP_UNKNOWN;
445 eol = skipEol(buf, y);
446 if(eol < 0) return -1;
448 *method_return = method;
449 if(url_return)
450 *url_return = url;
451 else
452 releaseAtom(url);
453 *version_return = version;
454 return eol;
458 httpParseServerFirstLine(const char *restrict buf,
459 int *status_return,
460 int *version_return,
461 AtomPtr *message_return)
463 int i = 0;
464 int x, y, eol;
465 int status;
466 int version = HTTP_UNKNOWN;
468 i = getNextWord(buf, 0, &x, &y);
469 if(i < 0)
470 return -1;
471 if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0)
472 version = HTTP_10;
473 else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0)
474 version = HTTP_11;
475 else
476 version = HTTP_UNKNOWN;
478 i = getNextWord(buf, y + 1, &x, &y);
479 if(i < 0) return -1;
480 if(y == x + 3)
481 status = atol(buf + x);
482 else return -1;
484 i = skipToEol(buf, y, &eol);
485 if(i < 0) return -1;
487 *status_return = status;
488 *version_return = version;
489 if(message_return) {
490 /* Netscape enterprise bug */
491 if(eol > y)
492 *message_return = internAtomN(buf + y + 1, eol - y - 1);
493 else
494 *message_return = internAtom("No message");
496 return i;
499 /* Returned *name_start_return is -1 at end of headers, -2 if the line
500 couldn't be parsed. */
501 static int
502 parseHeaderLine(const char *restrict buf, int start,
503 int *name_start_return, int *name_end_return,
504 int *value_start_return, int *value_end_return)
506 int i;
507 int name_start, name_end, value_start, value_end;
509 if(buf[start] == '\n') {
510 *name_start_return = -1;
511 return start + 1;
513 if(buf[start] == '\r' && buf[start + 1] == '\n') {
514 *name_start_return = -1;
515 return start + 2;
518 i = getNextToken(buf, start, &name_start, &name_end);
519 if(i < 0 || buf[i] != ':')
520 goto syntax;
521 i++;
522 while(buf[i] == ' ' || buf[i] == '\t')
523 i++;
525 i = getHeaderValue(buf, i, &value_start, &value_end);
526 if(i < 0)
527 goto syntax;
529 *name_start_return = name_start;
530 *name_end_return = name_end;
531 *value_start_return = value_start;
532 *value_end_return = value_end;
533 return i;
535 syntax:
536 i = start;
537 while(1) {
538 if(buf[i] == '\n') {
539 i++;
540 break;
542 if(buf[i] == '\r' && buf[i + 1] == '\n') {
543 i += 2;
544 break;
546 i++;
548 *name_start_return = -2;
549 return i;
553 findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return)
555 int i = from;
556 int eol = 0;
557 while(i < to) {
558 if(buf[i] == '\n') {
559 if(eol) {
560 *body_return = i + 1;
561 return eol;
563 eol = i;
564 i++;
565 } else if(buf[i] == '\r') {
566 if(i < to - 1 && buf[i + 1] == '\n') {
567 if(eol) {
568 *body_return = eol;
569 return i + 2;
571 eol = i;
572 i += 2;
573 } else {
574 eol = 0;
575 i++;
577 } else {
578 eol = 0;
579 i++;
582 return -1;
585 static int
586 parseContentRange(const char *restrict buf, int i,
587 int *from_return, int *to_return, int *full_len_return)
589 int j;
590 int from, to, full_len;
592 i = skipWhitespace(buf, i);
593 if(i < 0) return -1;
594 if(!token_compare(buf, i, i + 5, "bytes")) {
595 do_log(L_WARN, "Incorrect Content-Range header -- chugging along.\n");
596 } else {
597 i += 5;
599 i = skipWhitespace(buf, i);
600 if(buf[i] == '*') {
601 from = 0;
602 to = -1;
603 i++;
604 } else {
605 i = parseInt(buf, i, 0, INT_MAX, 10, &from);
606 if(i < 0) return -1;
607 if(buf[i] != '-') return -1;
608 i++;
609 i = parseInt(buf, i, 0, INT_MAX, 10, &to);
610 if(i < 0) return -1;
611 to = to + 1;
613 if(buf[i] != '/')
614 return -1;
615 i++;
616 if(buf[i] == '*')
617 full_len = -1;
618 else {
619 i = parseInt(buf, i, 0, INT_MAX, 10, &full_len);
620 if(i < 0) return -1;
622 j = skipEol(buf, i);
623 if(j < 0)
624 return -1;
626 *from_return = from;
627 *to_return = to;
628 *full_len_return = full_len;
629 return i;
632 static int
633 parseRange(const char *restrict buf, int i,
634 int *from_return, int *to_return)
636 int j;
637 int from, to;
639 i = skipWhitespace(buf, i);
640 if(i < 0)
641 return -1;
642 if(!token_compare(buf, i, i + 6, "bytes="))
643 return -1;
644 i += 6;
645 i = skipWhitespace(buf, i);
646 if(buf[i] == '-') {
647 from = 0;
648 } else {
649 i = parseInt(buf, i, 0, INT_MAX, 10, &from);
650 if(i < 0) return -1;
652 if(buf[i] != '-')
653 return -1;
654 i++;
655 j = parseInt(buf, i, 0, INT_MAX, 10, &to);
656 if(j < 0)
657 to = -1;
658 else {
659 to = to + 1;
660 i = j;
662 j = skipEol(buf, i);
663 if(j < 0) return -1;
664 *from_return = from;
665 *to_return = to;
666 return i;
669 static void
670 parseCacheControl(const char *restrict buf,
671 int token_start, int token_end,
672 int v_start, int v_end, int *age_return)
674 int ret = -1;
676 if(v_start > 0)
677 ret = parseInt(buf, v_start, 0, INT_MAX, 10, age_return);
679 if(ret < 0) {
680 do_log(L_WARN, "Couldn't parse Cache-Control: ");
681 do_log_n(L_WARN, buf + token_start,
682 (v_end >= 0 ? v_end : token_end) -
683 token_start);
684 do_log(L_WARN, "\n");
688 static int
689 urlSameHost(const char *url1, int len1, const char *url2, int len2)
691 int i;
692 if(len1 < 7 || len2 < 7)
693 return 0;
694 if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0)
695 return 0;
697 i = 7;
698 while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') {
699 if((url1[i] | 0x20) != (url2[i] | 0x20))
700 break;
701 i++;
704 if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/')))
705 return 1;
706 return 0;
709 static char *
710 resize_hbuf(char *hbuf, int *size, char *hbuf_small)
712 int new_size = 2 * *size;
713 char *new_hbuf;
715 if(new_size <= *size)
716 goto fail;
718 if(hbuf == hbuf_small) {
719 new_hbuf = malloc(new_size);
720 if(new_hbuf == NULL) goto fail;
721 memcpy(new_hbuf, hbuf, *size);
722 } else {
723 new_hbuf = realloc(hbuf, new_size);
724 if(new_hbuf == NULL) goto fail;
726 *size = new_size;
727 return new_hbuf;
729 fail:
730 if(hbuf != hbuf_small)
731 free(hbuf);
732 *size = 0;
733 return NULL;
737 httpParseHeaders(int client, AtomPtr url,
738 const char *buf, int start, HTTPRequestPtr request,
739 AtomPtr *headers_return,
740 int *len_return, CacheControlPtr cache_control_return,
741 HTTPConditionPtr *condition_return, int *te_return,
742 time_t *date_return, time_t *last_modified_return,
743 time_t *expires_return, time_t *polipo_age_return,
744 time_t *polipo_access_return, int *polipo_body_offset_return,
745 int *age_return, char **etag_return, AtomPtr *expect_return,
746 HTTPRangePtr range_return, HTTPRangePtr content_range_return,
747 char **location_return, AtomPtr *via_return,
748 AtomPtr *auth_return)
750 int local = url ? urlIsLocal(url->string, url->length) : 0;
751 char hbuf_small[512];
752 char *hbuf = hbuf_small;
753 int hbuf_size = 512, hbuf_length = 0;
754 int i, j,
755 name_start, name_end, value_start, value_end,
756 token_start, token_end, end;
757 AtomPtr name = NULL;
758 time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1,
759 polipo_access = -1;
760 int len = -1, polipo_body_offset = -1;
761 CacheControlRec cache_control;
762 int te = TE_IDENTITY;
763 int age = -1;
764 char *etag = NULL, *ifrange = NULL;
765 int persistent = (!request || (request->connection->version != HTTP_10));
766 char *location = NULL;
767 AtomPtr via = NULL;
768 AtomPtr auth = NULL;
769 AtomPtr expect = NULL;
770 HTTPConditionPtr condition;
771 time_t ims = -1, inms = -1;
772 char *im = NULL, *inm = NULL;
773 AtomListPtr hopToHop = NULL;
774 HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1};
775 int haveCacheControl = 0;
777 #define RESIZE_HBUF() \
778 do { \
779 hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \
780 if(hbuf == NULL) \
781 goto fail; \
782 } while(0)
784 cache_control.flags = 0;
785 cache_control.max_age = -1;
786 cache_control.s_maxage = -1;
787 cache_control.min_fresh = -1;
788 cache_control.max_stale = -1;
790 i = start;
792 while(1) {
793 i = parseHeaderLine(buf, i,
794 &name_start, &name_end, &value_start, &value_end);
795 if(i < 0) {
796 do_log(L_ERROR, "Couldn't find end of header line.\n");
797 goto fail;
800 if(name_start == -1)
801 break;
803 if(name_start < 0)
804 continue;
806 name = internAtomLowerN(buf + name_start, name_end - name_start);
808 if(name == atomConnection) {
809 j = getNextTokenInList(buf, value_start,
810 &token_start, &token_end, NULL, NULL,
811 &end);
812 while(1) {
813 if(j < 0) {
814 do_log(L_ERROR, "Couldn't parse Connection: ");
815 do_log_n(L_ERROR, buf + value_start,
816 value_end - value_start);
817 do_log(L_ERROR, ".\n");
818 goto fail;
820 if(token_compare(buf, token_start, token_end, "close")) {
821 persistent = 0;
822 } else if(token_compare(buf, token_start, token_end,
823 "keep-alive")) {
824 persistent = 1;
825 } else {
826 if(hopToHop == NULL)
827 hopToHop = makeAtomList(NULL, 0);
828 if(hopToHop == NULL) {
829 do_log(L_ERROR, "Couldn't allocate atom list.\n");
830 goto fail;
832 atomListCons(internAtomLowerN(buf + token_start,
833 token_end - token_start),
834 hopToHop);
836 if(end)
837 break;
838 j = getNextTokenInList(buf, j,
839 &token_start, &token_end, NULL, NULL,
840 &end);
842 } else if(name == atomCacheControl)
843 haveCacheControl = 1;
845 releaseAtom(name);
846 name = NULL;
849 i = start;
851 while(1) {
852 i = parseHeaderLine(buf, i,
853 &name_start, &name_end, &value_start, &value_end);
854 if(i < 0) {
855 do_log(L_ERROR, "Couldn't find end of header line.\n");
856 goto fail;
859 if(name_start == -1)
860 break;
862 if(name_start < 0) {
863 do_log(L_WARN, "Couldn't parse header line.\n");
864 if(laxHttpParser)
865 continue;
866 else
867 goto fail;
870 name = internAtomLowerN(buf + name_start, name_end - name_start);
872 if(name == atomProxyConnection) {
873 j = getNextTokenInList(buf, value_start,
874 &token_start, &token_end, NULL, NULL,
875 &end);
876 while(1) {
877 if(j < 0) {
878 do_log(L_WARN, "Couldn't parse Proxy-Connection:");
879 do_log_n(L_WARN, buf + value_start,
880 value_end - value_start);
881 do_log(L_WARN, ".\n");
882 persistent = 0;
883 break;
885 if(token_compare(buf, token_start, token_end, "close")) {
886 persistent = 0;
887 } else if(token_compare(buf, token_start, token_end,
888 "keep-alive")) {
889 persistent = 1;
891 if(end)
892 break;
893 j = getNextTokenInList(buf, j,
894 &token_start, &token_end, NULL, NULL,
895 &end);
897 } else if(name == atomContentLength) {
898 j = skipWhitespace(buf, value_start);
899 if(j < 0) {
900 do_log(L_WARN, "Couldn't parse Content-Length: \n");
901 do_log_n(L_WARN, buf + value_start, value_end - value_start);
902 do_log(L_WARN, ".\n");
903 len = -1;
904 } else {
905 if(parseInt(buf, value_start, 0, INT_MAX, 10, &len) < 0) {
906 do_log(L_WARN, "Couldn't parse Content-Length: \n");
907 do_log_n(L_WARN, buf + value_start,
908 value_end - value_start);
909 do_log(L_WARN, ".\n");
910 len = -1;
913 } else if((!local && name == atomProxyAuthorization) ||
914 (local && name == atomAuthorization)) {
915 if(auth_return) {
916 auth = internAtomN(buf + value_start, value_end - value_start);
917 if(auth == NULL) {
918 do_log(L_ERROR, "Couldn't allocate authorization.\n");
919 goto fail;
922 } else if(name == atomReferer) {
923 int h;
924 if(censorReferer == 0 ||
925 (censorReferer == 1 && url != NULL &&
926 urlSameHost(url->string, url->length,
927 buf + value_start, value_end - value_start))) {
928 while(hbuf_length > hbuf_size - 2)
929 RESIZE_HBUF();
930 hbuf[hbuf_length++] = '\r';
931 hbuf[hbuf_length++] = '\n';
932 do {
933 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
934 buf + name_start, value_end - name_start);
935 if(h < 0) RESIZE_HBUF();
936 } while(h < 0);
937 hbuf_length = h;
939 } else if(name == atomTrailer || name == atomUpgrade) {
940 do_log(L_ERROR, "Trailers or upgrade present.\n");
941 goto fail;
942 } else if(name == atomDate || name == atomExpires ||
943 name == atomIfModifiedSince ||
944 name == atomIfUnmodifiedSince ||
945 name == atomLastModified ||
946 name == atomXPolipoDate || name == atomXPolipoAccess) {
947 time_t t;
948 j = parse_time(buf, value_start, value_end, &t);
949 if(j < 0) {
950 if(name != atomExpires) {
951 do_log(L_WARN, "Couldn't parse %s: ", name->string);
952 do_log_n(L_WARN, buf + value_start,
953 value_end - value_start);
954 do_log(L_WARN, "\n");
956 t = -1;
958 if(name == atomDate) {
959 if(t >= 0)
960 date = t;
961 } else if(name == atomExpires) {
962 if(t >= 0)
963 expires = t;
964 else
965 expires = 0;
966 } else if(name == atomLastModified)
967 last_modified = t;
968 else if(name == atomIfModifiedSince)
969 ims = t;
970 else if(name == atomIfUnmodifiedSince)
971 inms = t;
972 else if(name == atomXPolipoDate)
973 polipo_age = t;
974 else if(name == atomXPolipoAccess)
975 polipo_access = t;
976 } else if(name == atomAge) {
977 j = skipWhitespace(buf, value_start);
978 if(j < 0) {
979 age = -1;
980 } else {
981 if(parseInt(buf, value_start, 0, INT_MAX, 10, &age) < 0)
982 age = -1;
984 if(age < 0) {
985 do_log(L_WARN, "Couldn't parse age: \n");
986 do_log_n(L_WARN, buf + value_start, value_end - value_start);
987 do_log(L_WARN, " -- ignored.\n");
989 } else if(name == atomXPolipoBodyOffset) {
990 j = skipWhitespace(buf, value_start);
991 if(j < 0) {
992 do_log(L_ERROR, "Couldn't parse body offset.\n");
993 goto fail;
994 } else {
995 if(parseInt(buf, value_start, 0, INT_MAX, 10,
996 &polipo_body_offset) < 0) {
997 do_log(L_ERROR, "Couldn't parse body offset.\n");
998 goto fail;
1001 } else if(name == atomTransferEncoding) {
1002 if(token_compare(buf, value_start, value_end, "identity"))
1003 te = TE_IDENTITY;
1004 else if(token_compare(buf, value_start, value_end, "chunked"))
1005 te = TE_CHUNKED;
1006 else
1007 te = TE_UNKNOWN;
1008 } else if(name == atomETag ||
1009 name == atomIfNoneMatch || name == atomIfMatch ||
1010 name == atomIfRange) {
1011 int x, y;
1012 int weak;
1013 char *e;
1014 j = getNextETag(buf, value_start, &x, &y, &weak);
1015 if(j < 0) {
1016 if(buf[value_start] != '\r' && buf[value_start] != '\n')
1017 do_log(L_ERROR, "Couldn't parse ETag.\n");
1018 } else if(weak) {
1019 do_log(L_WARN, "Server returned weak ETag -- ignored.\n");
1020 } else {
1021 e = strdup_n(buf + x, y - x);
1022 if(e == NULL) goto fail;
1023 if(name == atomETag) {
1024 if(!etag)
1025 etag = e;
1026 else
1027 free(e);
1028 } else if(name == atomIfNoneMatch) {
1029 if(!inm)
1030 inm = e;
1031 else
1032 free(e);
1033 } else if(name == atomIfMatch) {
1034 if(!im)
1035 im = e;
1036 else
1037 free(e);
1038 } else if(name == atomIfRange) {
1039 if(!ifrange)
1040 ifrange = e;
1041 else
1042 free(e);
1043 } else {
1044 abort();
1047 } else if(name == atomCacheControl) {
1048 int v_start, v_end;
1049 j = getNextTokenInList(buf, value_start,
1050 &token_start, &token_end,
1051 &v_start, &v_end,
1052 &end);
1053 while(1) {
1054 if(j < 0) {
1055 do_log(L_WARN, "Couldn't parse Cache-Control.\n");
1056 cache_control.flags |= CACHE_NO;
1057 break;
1059 if(token_compare(buf, token_start, token_end, "no-cache")) {
1060 cache_control.flags |= CACHE_NO;
1061 } else if(token_compare(buf, token_start, token_end,
1062 "public")) {
1063 cache_control.flags |= CACHE_PUBLIC;
1064 } else if(token_compare(buf, token_start, token_end,
1065 "private")) {
1066 cache_control.flags |= CACHE_PRIVATE;
1067 } else if(token_compare(buf, token_start, token_end,
1068 "no-store")) {
1069 cache_control.flags |= CACHE_NO_STORE;
1070 } else if(token_compare(buf, token_start, token_end,
1071 "no-transform")) {
1072 cache_control.flags |= CACHE_NO_TRANSFORM;
1073 } else if(token_compare(buf, token_start, token_end,
1074 "must-revalidate") ||
1075 token_compare(buf, token_start, token_end,
1076 "must-validate")) { /* losers */
1077 cache_control.flags |= CACHE_MUST_REVALIDATE;
1078 } else if(token_compare(buf, token_start, token_end,
1079 "proxy-revalidate")) {
1080 cache_control.flags |= CACHE_PROXY_REVALIDATE;
1081 } else if(token_compare(buf, token_start, token_end,
1082 "only-if-cached")) {
1083 cache_control.flags |= CACHE_ONLY_IF_CACHED;
1084 } else if(token_compare(buf, token_start, token_end,
1085 "max-age") ||
1086 token_compare(buf, token_start, token_end,
1087 "maxage") || /* losers */
1088 token_compare(buf, token_start, token_end,
1089 "s-maxage") ||
1090 token_compare(buf, token_start, token_end,
1091 "min-fresh")) {
1092 parseCacheControl(buf, token_start, token_end,
1093 v_start, v_end,
1094 &cache_control.max_age);
1095 } else if(token_compare(buf, token_start, token_end,
1096 "max-stale")) {
1097 parseCacheControl(buf, token_start, token_end,
1098 v_start, v_end,
1099 &cache_control.max_stale);
1100 } else {
1101 do_log(L_WARN, "Unsupported Cache-Control directive ");
1102 do_log_n(L_WARN, buf + token_start,
1103 (v_end >= 0 ? v_end : token_end) - token_start);
1104 do_log(L_WARN, " -- ignored.\n");
1106 if(end)
1107 break;
1108 j = getNextTokenInList(buf, j,
1109 &token_start, &token_end,
1110 &v_start, &v_end,
1111 &end);
1113 } else if(name == atomContentRange) {
1114 if(!client) {
1115 j = parseContentRange(buf, value_start,
1116 &content_range.from, &content_range.to,
1117 &content_range.full_length);
1118 if(j < 0) {
1119 do_log(L_ERROR, "Couldn't parse Content-Range: ");
1120 do_log_n(L_ERROR, buf + value_start,
1121 value_end - value_start);
1122 do_log(L_ERROR, "\n");
1123 goto fail;
1125 } else {
1126 do_log(L_ERROR, "Content-Range from client.\n");
1127 goto fail;
1129 } else if(name == atomRange) {
1130 if(client) {
1131 j = parseRange(buf, value_start, &range.from, &range.to);
1132 if(j < 0) {
1133 do_log(L_WARN, "Couldn't parse Range -- ignored.\n");
1134 range.from = -1;
1135 range.to = -1;
1137 } else {
1138 do_log(L_WARN, "Range from server -- ignored\n");
1140 } else if(name == atomXPolipoLocation) {
1141 if(location_return) {
1142 location =
1143 strdup_n(buf + value_start, value_end - value_start);
1144 if(location == NULL) {
1145 do_log(L_ERROR, "Couldn't allocate location.\n");
1146 goto fail;
1149 } else if(name == atomVia) {
1150 if(via_return) {
1151 AtomPtr new_via, full_via;
1152 new_via =
1153 internAtomN(buf + value_start, value_end - value_start);
1154 if(new_via == NULL) {
1155 do_log(L_ERROR, "Couldn't allocate via.\n");
1156 goto fail;
1158 if(via) {
1159 full_via =
1160 internAtomF("%s, %s", via->string, new_via->string);
1161 releaseAtom(new_via);
1162 if(full_via == NULL) {
1163 do_log(L_ERROR, "Couldn't allocate via");
1164 goto fail;
1166 releaseAtom(via);
1167 via = full_via;
1168 } else {
1169 via = new_via;
1172 } else if(name == atomExpect) {
1173 if(expect_return) {
1174 expect = internAtomLowerN(buf + value_start,
1175 value_end - value_start);
1176 if(expect == NULL) {
1177 do_log(L_ERROR, "Couldn't allocate expect.\n");
1178 goto fail;
1181 } else {
1182 if(!client && name == atomContentType) {
1183 if(token_compare(buf, value_start, value_end,
1184 "multipart/byteranges")) {
1185 do_log(L_ERROR,
1186 "Server returned multipart/byteranges -- yuck!\n");
1187 goto fail;
1190 if(name == atomVary) {
1191 if(!token_compare(buf, value_start, value_end, "host") &&
1192 !token_compare(buf, value_start, value_end, "*")) {
1193 /* What other vary headers should be ignored? */
1194 do_log(L_VARY, "Vary header present (");
1195 do_log_n(L_VARY,
1196 buf + value_start, value_end - value_start);
1197 do_log(L_VARY, ").\n");
1199 cache_control.flags |= CACHE_VARY;
1200 } else if(name == atomAuthorization) {
1201 cache_control.flags |= CACHE_AUTHORIZATION;
1204 if(name == atomPragma) {
1205 /* Pragma is only defined for the client, and the only
1206 standard value is no-cache (RFC 1945, 10.12).
1207 However, we honour a Pragma: no-cache for both the client
1208 and the server when there's no Cache-Control header. In
1209 all cases, we pass the Pragma header to the next hop. */
1210 if(!haveCacheControl) {
1211 j = getNextTokenInList(buf, value_start,
1212 &token_start, &token_end, NULL, NULL,
1213 &end);
1214 while(1) {
1215 if(j < 0) {
1216 do_log(L_WARN, "Couldn't parse Pragma.\n");
1217 cache_control.flags |= CACHE_NO;
1218 break;
1220 if(token_compare(buf, token_start, token_end,
1221 "no-cache"))
1222 cache_control.flags = CACHE_NO;
1223 if(end)
1224 break;
1225 j = getNextTokenInList(buf, j, &token_start, &token_end,
1226 NULL, NULL, &end);
1230 if(!client &&
1231 (name == atomSetCookie ||
1232 name == atomCookie || name == atomCookie2))
1233 cache_control.flags |= CACHE_COOKIE;
1235 if(hbuf) {
1236 if(name != atomConnection && name != atomHost &&
1237 name != atomAcceptRange && name != atomTE &&
1238 name != atomProxyAuthenticate &&
1239 name != atomKeepAlive &&
1240 (!hopToHop || !atomListMember(name, hopToHop)) &&
1241 !atomListMember(name, censoredHeaders)) {
1242 int h;
1243 while(hbuf_length > hbuf_size - 2)
1244 RESIZE_HBUF();
1245 hbuf[hbuf_length++] = '\r';
1246 hbuf[hbuf_length++] = '\n';
1247 do {
1248 h = snnprint_n(hbuf, hbuf_length, hbuf_size,
1249 buf + name_start,
1250 value_end - name_start);
1251 if(h < 0) RESIZE_HBUF();
1252 } while(h < 0);
1253 hbuf_length = h;
1257 releaseAtom(name);
1258 name = NULL;
1261 if(headers_return) {
1262 AtomPtr pheaders = NULL;
1263 pheaders = internAtomN(hbuf, hbuf_length);
1264 if(!pheaders)
1265 goto fail;
1266 *headers_return = pheaders;
1268 if(hbuf != hbuf_small)
1269 free(hbuf);
1270 hbuf = NULL;
1271 hbuf_size = 0;
1273 if(request)
1274 if(!persistent)
1275 request->flags &= ~REQUEST_PERSISTENT;
1277 if(te != TE_IDENTITY) len = -1;
1278 if(len_return) *len_return = len;
1279 if(cache_control_return) *cache_control_return = cache_control;
1280 if(condition_return) {
1281 if(ims >= 0 || inms >= 0 || im || inm || ifrange) {
1282 condition = httpMakeCondition();
1283 if(condition) {
1284 condition->ims = ims;
1285 condition->inms = inms;
1286 condition->im = im;
1287 condition->inm = inm;
1288 condition->ifrange = ifrange;
1289 } else {
1290 do_log(L_ERROR, "Couldn't allocate condition.\n");
1291 if(im) free(im);
1292 if(inm) free(inm);
1294 } else {
1295 condition = NULL;
1297 *condition_return = condition;
1298 } else {
1299 assert(!im && !inm);
1302 if(te_return) *te_return = te;
1303 if(date_return) *date_return = date;
1304 if(last_modified_return) *last_modified_return = last_modified;
1305 if(expires_return) *expires_return = expires;
1306 if(polipo_age_return) *polipo_age_return = polipo_age;
1307 if(polipo_access_return) *polipo_access_return = polipo_access;
1308 if(polipo_body_offset_return)
1309 *polipo_body_offset_return = polipo_body_offset;
1310 if(age_return) *age_return = age;
1311 if(etag_return)
1312 *etag_return = etag;
1313 else {
1314 if(etag) free(etag);
1316 if(range_return) *range_return = range;
1317 if(content_range_return) *content_range_return = content_range;
1318 if(location_return) {
1319 *location_return = location;
1320 } else {
1321 if(location)
1322 free(location);
1324 if(via_return)
1325 *via_return = via;
1326 else {
1327 if(via)
1328 releaseAtom(via);
1330 if(expect_return)
1331 *expect_return = expect;
1332 else {
1333 if(expect)
1334 releaseAtom(expect);
1336 if(auth_return)
1337 *auth_return = auth;
1338 else {
1339 if(auth)
1340 releaseAtom(auth);
1342 if(hopToHop) destroyAtomList(hopToHop);
1343 return i;
1345 fail:
1346 if(hbuf && hbuf != hbuf_small) free(hbuf);
1347 if(name) releaseAtom(name);
1348 if(etag) free(etag);
1349 if(location) free(location);
1350 if(via) releaseAtom(via);
1351 if(expect) releaseAtom(expect);
1352 if(auth) releaseAtom(auth);
1353 if(hopToHop) destroyAtomList(hopToHop);
1355 return -1;
1356 #undef RESIZE_HBUF
1360 httpFindHeader(AtomPtr header, const char *headers, int hlen,
1361 int *value_begin_return, int *value_end_return)
1363 int len = header->length;
1364 int i = 0;
1366 while(i + len + 1 < hlen) {
1367 if(headers[i + len] == ':' &&
1368 lwrcmp(headers + i, header->string, len) == 0) {
1369 int j = i + len + 1, k;
1370 while(j < hlen && headers[j] == ' ')
1371 j++;
1372 k = j;
1373 while(k < hlen && headers[k] != '\n' && headers[k] != '\r')
1374 k++;
1375 *value_begin_return = j;
1376 *value_end_return = k;
1377 return 1;
1378 } else {
1379 while(i < hlen && headers[i] != '\n' && headers[i] != '\r')
1380 i++;
1381 i++;
1382 if(i < hlen && headers[i] == '\n')
1383 i++;
1386 return 0;
1390 parseUrl(const char *url, int len,
1391 int *x_return, int *y_return, int *port_return, int *z_return)
1393 int x, y, z, port = -1, i = 0;
1395 if(len >= 7 && lwrcmp(url, "http://", 7) == 0) {
1396 x = 7;
1397 if(x < len && url[x] == '[') {
1398 /* RFC 2732 */
1399 for(i = x + 1; i < len; i++) {
1400 if(url[i] == ']') {
1401 i++;
1402 break;
1404 if((url[i] != ':') && !letter(url[i]) && !digit(url[i]))
1405 break;
1407 } else {
1408 for(i = x; i < len; i++)
1409 if(url[i] == ':' || url[i] == '/')
1410 break;
1412 y = i;
1414 if(i < len && url[i] == ':') {
1415 int j;
1416 j = parseIntN(url, i + 1, len, 0, 65535, 10, &port);
1417 if(j < 0) {
1418 port = 80;
1419 } else {
1420 i = j;
1422 } else {
1423 port = 80;
1425 } else {
1426 x = -1;
1427 y = -1;
1430 z = i;
1432 *x_return = x;
1433 *y_return = y;
1434 *port_return = port;
1435 *z_return = z;
1436 return 0;
1440 urlIsLocal(const char *url, int len)
1442 return (len > 0 && url[0] == '/');
1446 urlIsSpecial(const char *url, int len)
1448 return (len >= 8 && memcmp(url, "/polipo/", 8) == 0);
1452 parseChunkSize(const char *restrict buf, int i, int end,
1453 int *chunk_size_return)
1455 int v;
1457 i = parseIntN(buf, i, end, 0, INT_MAX, 16, &v);
1458 if(i < 0)
1459 return -1;
1461 while(i < end) {
1462 if(buf[i] == ' ' || buf[i] == '\t')
1463 i++;
1464 else
1465 break;
1468 if(i >= end - 1)
1469 return 0;
1471 if(buf[i] != '\r' || buf[i + 1] != '\n')
1472 return -1;
1474 i += 2;
1476 if(v == 0) {
1477 if(i >= end - 1)
1478 return 0;
1479 if(buf[i] != '\r') {
1480 do_log(L_ERROR, "Trailers present!\n");
1481 return -1;
1483 i++;
1484 if(buf[i] != '\n')
1485 return -1;
1486 i++;
1489 *chunk_size_return = v;
1490 return i;
1495 checkVia(AtomPtr name, AtomPtr via)
1497 int i;
1498 char *v;
1499 if(via == NULL || via->length == 0)
1500 return 1;
1502 v = via->string;
1504 i = 0;
1505 while(i < via->length) {
1506 while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' ||
1507 v[i] == '\r' || v[i] == '\n' ||
1508 digit(v[i]) || v[i] == '.')
1509 i++;
1510 if(i + name->length > via->length)
1511 break;
1512 if(memcmp(v + i, name->string, name->length) == 0) {
1513 char c = v[i + name->length];
1514 if(c == '\0' || c == ' ' || c == '\t' || c == ',' ||
1515 c == '\r' || c == '\n')
1516 return 0;
1518 i++;
1519 while(letter(v[i]) || digit(v[i]) || v[i] == '.')
1520 i++;
1522 return 1;