nginx 0.1.19
[nginx-catap.git] / src / http / ngx_http_parse.c
blobca84d1627188114868268977628e6a7d8cd699ef
2 /*
3 * Copyright (C) Igor Sysoev
4 */
7 #include <ngx_config.h>
8 #include <ngx_core.h>
9 #include <ngx_http.h>
12 ngx_int_t ngx_http_parse_request_line(ngx_http_request_t *r, ngx_buf_t *b)
14 u_char ch, *p, *m;
15 enum {
16 sw_start = 0,
17 sw_method,
18 sw_space_after_method,
19 sw_spaces_before_uri,
20 sw_schema,
21 sw_schema_slash,
22 sw_schema_slash_slash,
23 sw_host,
24 sw_port,
25 sw_after_slash_in_uri,
26 sw_check_uri,
27 sw_uri,
28 sw_http_09,
29 sw_http_H,
30 sw_http_HT,
31 sw_http_HTT,
32 sw_http_HTTP,
33 sw_first_major_digit,
34 sw_major_digit,
35 sw_first_minor_digit,
36 sw_minor_digit,
37 sw_almost_done
38 } state;
40 state = r->state;
42 for (p = b->pos; p < b->last; p++) {
43 ch = *p;
45 /* gcc 2.95.2 and msvc 6.0 compile this switch as an jump table */
47 switch (state) {
49 /* HTTP methods: GET, HEAD, POST */
50 case sw_start:
51 r->request_start = p;
53 if (ch == CR || ch == LF) {
54 break;
57 if (ch < 'A' || ch > 'Z') {
58 return NGX_HTTP_PARSE_INVALID_METHOD;
61 state = sw_method;
62 break;
64 case sw_method:
65 if (ch == ' ') {
66 r->method_end = p;
67 m = r->request_start;
69 if (p - m == 3) {
71 if (m[0] == 'G' && m[1] == 'E' && m[2] == 'T') {
72 r->method = NGX_HTTP_GET;
75 } else if (p - m == 4) {
77 if (m[0] == 'P' && m[1] == 'O'
78 && m[2] == 'S' && m[3] == 'T')
80 r->method = NGX_HTTP_POST;
82 } else if (m[0] == 'H' && m[1] == 'E'
83 && m[2] == 'A' && m[3] == 'D')
85 r->method = NGX_HTTP_HEAD;
89 state = sw_spaces_before_uri;
90 break;
93 if (ch < 'A' || ch > 'Z') {
94 return NGX_HTTP_PARSE_INVALID_METHOD;
97 break;
99 /* single space after method */
100 case sw_space_after_method:
101 switch (ch) {
102 case ' ':
103 state = sw_spaces_before_uri;
104 break;
105 default:
106 return NGX_HTTP_PARSE_INVALID_METHOD;
108 break;
110 /* space* before URI */
111 case sw_spaces_before_uri:
112 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
113 r->schema_start = p;
114 state = sw_schema;
115 break;
118 switch (ch) {
119 case '/':
120 r->uri_start = p;
121 state = sw_after_slash_in_uri;
122 break;
123 case ' ':
124 break;
125 default:
126 return NGX_HTTP_PARSE_INVALID_REQUEST;
128 break;
130 case sw_schema:
131 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
132 break;
135 switch (ch) {
136 case ':':
137 r->schema_end = p;
138 state = sw_schema_slash;
139 break;
140 default:
141 return NGX_HTTP_PARSE_INVALID_REQUEST;
143 break;
145 case sw_schema_slash:
146 switch (ch) {
147 case '/':
148 state = sw_schema_slash_slash;
149 break;
150 default:
151 return NGX_HTTP_PARSE_INVALID_REQUEST;
153 break;
155 case sw_schema_slash_slash:
156 switch (ch) {
157 case '/':
158 r->host_start = p;
159 state = sw_host;
160 break;
161 default:
162 return NGX_HTTP_PARSE_INVALID_REQUEST;
164 break;
166 case sw_host:
167 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')
168 || (ch >= '0' && ch <= '9') || ch == '.' || ch == '-')
170 break;
173 switch (ch) {
174 case ':':
175 r->host_end = p;
176 state = sw_port;
177 break;
178 case '/':
179 r->host_end = p;
180 r->uri_start = p;
181 state = sw_after_slash_in_uri;
182 break;
183 default:
184 return NGX_HTTP_PARSE_INVALID_REQUEST;
186 break;
188 case sw_port:
189 if (ch >= '0' && ch <= '9') {
190 break;
193 switch (ch) {
194 case '/':
195 r->port_end = p;
196 r->uri_start = p;
197 state = sw_after_slash_in_uri;
198 break;
199 default:
200 return NGX_HTTP_PARSE_INVALID_REQUEST;
202 break;
204 /* check "/.", "//", "%", and "\" (Win32) in URI */
205 case sw_after_slash_in_uri:
207 if ((ch >= 'a' && ch <= 'z')
208 || (ch >= 'A' && ch <= 'Z')
209 || (ch >= '0' && ch <= '9'))
211 state = sw_check_uri;
212 break;
215 switch (ch) {
216 case ' ':
217 r->uri_end = p;
218 state = sw_http_09;
219 break;
220 case CR:
221 r->uri_end = p;
222 r->http_minor = 9;
223 state = sw_almost_done;
224 break;
225 case LF:
226 r->uri_end = p;
227 r->http_minor = 9;
228 goto done;
229 case '.':
230 r->complex_uri = 1;
231 state = sw_uri;
232 break;
233 case '%':
234 r->quoted_uri = 1;
235 state = sw_uri;
236 break;
237 case '/':
238 r->complex_uri = 1;
239 state = sw_uri;
240 break;
241 #if (NGX_WIN32)
242 case '\\':
243 r->complex_uri = 1;
244 state = sw_uri;
245 break;
246 #endif
247 case '?':
248 r->args_start = p + 1;
249 state = sw_uri;
250 break;
251 case '+':
252 r->plus_in_uri = 1;
253 break;
254 case '\0':
255 r->zero_in_uri = 1;
256 break;
257 default:
258 state = sw_check_uri;
259 break;
261 break;
263 /* check "/", "%" and "\" (Win32) in URI */
264 case sw_check_uri:
266 if ((ch >= 'a' && ch <= 'z')
267 || (ch >= 'A' && ch <= 'Z')
268 || (ch >= '0' && ch <= '9'))
270 break;
273 switch (ch) {
274 case '/':
275 r->uri_ext = NULL;
276 state = sw_after_slash_in_uri;
277 break;
278 case '.':
279 r->uri_ext = p + 1;
280 break;
281 case ' ':
282 r->uri_end = p;
283 state = sw_http_09;
284 break;
285 case CR:
286 r->uri_end = p;
287 r->http_minor = 9;
288 state = sw_almost_done;
289 break;
290 case LF:
291 r->uri_end = p;
292 r->http_minor = 9;
293 goto done;
294 #if (NGX_WIN32)
295 case '\\':
296 r->complex_uri = 1;
297 state = sw_after_slash_in_uri;
298 break;
299 #endif
300 case '%':
301 r->quoted_uri = 1;
302 state = sw_uri;
303 break;
304 case '+':
305 r->plus_in_uri = 1;
306 break;
307 case '?':
308 r->args_start = p + 1;
309 state = sw_uri;
310 break;
311 case '\0':
312 r->zero_in_uri = 1;
313 break;
315 break;
317 /* URI */
318 case sw_uri:
319 switch (ch) {
320 case ' ':
321 r->uri_end = p;
322 state = sw_http_09;
323 break;
324 case CR:
325 r->uri_end = p;
326 r->http_minor = 9;
327 state = sw_almost_done;
328 break;
329 case LF:
330 r->uri_end = p;
331 r->http_minor = 9;
332 goto done;
333 case '+':
334 r->plus_in_uri = 1;
335 break;
336 case '\0':
337 r->zero_in_uri = 1;
338 break;
340 break;
342 /* space+ after URI */
343 case sw_http_09:
344 switch (ch) {
345 case ' ':
346 break;
347 case CR:
348 r->http_minor = 9;
349 state = sw_almost_done;
350 break;
351 case LF:
352 r->http_minor = 9;
353 goto done;
354 case 'H':
355 r->http_protocol.data = p;
356 state = sw_http_H;
357 break;
358 default:
359 return NGX_HTTP_PARSE_INVALID_REQUEST;
361 break;
363 case sw_http_H:
364 switch (ch) {
365 case 'T':
366 state = sw_http_HT;
367 break;
368 default:
369 return NGX_HTTP_PARSE_INVALID_REQUEST;
371 break;
373 case sw_http_HT:
374 switch (ch) {
375 case 'T':
376 state = sw_http_HTT;
377 break;
378 default:
379 return NGX_HTTP_PARSE_INVALID_REQUEST;
381 break;
383 case sw_http_HTT:
384 switch (ch) {
385 case 'P':
386 state = sw_http_HTTP;
387 break;
388 default:
389 return NGX_HTTP_PARSE_INVALID_REQUEST;
391 break;
393 case sw_http_HTTP:
394 switch (ch) {
395 case '/':
396 state = sw_first_major_digit;
397 break;
398 default:
399 return NGX_HTTP_PARSE_INVALID_REQUEST;
401 break;
403 /* first digit of major HTTP version */
404 case sw_first_major_digit:
405 if (ch < '1' || ch > '9') {
406 return NGX_HTTP_PARSE_INVALID_REQUEST;
409 r->http_major = ch - '0';
410 state = sw_major_digit;
411 break;
413 /* major HTTP version or dot */
414 case sw_major_digit:
415 if (ch == '.') {
416 state = sw_first_minor_digit;
417 break;
420 if (ch < '0' || ch > '9') {
421 return NGX_HTTP_PARSE_INVALID_REQUEST;
424 r->http_major = r->http_major * 10 + ch - '0';
425 break;
427 /* first digit of minor HTTP version */
428 case sw_first_minor_digit:
429 if (ch < '0' || ch > '9') {
430 return NGX_HTTP_PARSE_INVALID_REQUEST;
433 r->http_minor = ch - '0';
434 state = sw_minor_digit;
435 break;
437 /* minor HTTP version or end of request line */
438 case sw_minor_digit:
439 if (ch == CR) {
440 state = sw_almost_done;
441 break;
444 if (ch == LF) {
445 goto done;
448 if (ch < '0' || ch > '9') {
449 return NGX_HTTP_PARSE_INVALID_REQUEST;
452 r->http_minor = r->http_minor * 10 + ch - '0';
453 break;
455 /* end of request line */
456 case sw_almost_done:
457 r->request_end = p - 1;
458 switch (ch) {
459 case LF:
460 goto done;
461 default:
462 return NGX_HTTP_PARSE_INVALID_REQUEST;
464 break;
468 b->pos = p;
469 r->state = state;
471 return NGX_AGAIN;
473 done:
475 b->pos = p + 1;
477 if (r->request_end == NULL) {
478 r->request_end = p;
481 r->http_version = r->http_major * 1000 + r->http_minor;
482 r->state = sw_start;
484 if (r->http_version == 9 && r->method != NGX_HTTP_GET) {
485 return NGX_HTTP_PARSE_INVALID_09_METHOD;
488 return NGX_OK;
492 ngx_int_t ngx_http_parse_header_line(ngx_http_request_t *r, ngx_buf_t *b)
494 u_char c, ch, *p;
495 enum {
496 sw_start = 0,
497 sw_name,
498 sw_space_before_value,
499 sw_value,
500 sw_space_after_value,
501 sw_almost_done,
502 sw_header_almost_done,
503 sw_ignore_line
504 } state;
506 state = r->state;
508 for (p = b->pos; p < b->last; p++) {
509 ch = *p;
511 switch (state) {
513 /* first char */
514 case sw_start:
515 switch (ch) {
516 case CR:
517 r->header_end = p;
518 state = sw_header_almost_done;
519 break;
520 case LF:
521 r->header_end = p;
522 goto header_done;
523 default:
524 state = sw_name;
525 r->header_name_start = p;
527 c = (u_char) (ch | 0x20);
528 if (c >= 'a' && c <= 'z') {
529 break;
532 if (ch == '-' || ch == '_' || ch == '~' || ch == '.') {
533 break;
536 if (ch >= '0' && ch <= '9') {
537 break;
540 return NGX_HTTP_PARSE_INVALID_HEADER;
543 break;
545 /* header name */
546 case sw_name:
547 c = (u_char) (ch | 0x20);
548 if (c >= 'a' && c <= 'z') {
549 break;
552 if (ch == ':') {
553 r->header_name_end = p;
554 state = sw_space_before_value;
555 break;
558 if (ch == '-' || ch == '_' || ch == '~' || ch == '.') {
559 break;
562 if (ch >= '0' && ch <= '9') {
563 break;
566 /* IIS may send the duplicate "HTTP/1.1 ..." lines */
567 if (ch == '/'
568 && r->proxy
569 && p - r->header_start == 4
570 && ngx_strncmp(r->header_start, "HTTP", 4) == 0)
572 state = sw_ignore_line;
573 break;
576 return NGX_HTTP_PARSE_INVALID_HEADER;
578 /* space* before header value */
579 case sw_space_before_value:
580 switch (ch) {
581 case ' ':
582 break;
583 case CR:
584 r->header_start = r->header_end = p;
585 state = sw_almost_done;
586 break;
587 case LF:
588 r->header_start = r->header_end = p;
589 goto done;
590 default:
591 r->header_start = p;
592 state = sw_value;
593 break;
595 break;
597 /* header value */
598 case sw_value:
599 switch (ch) {
600 case ' ':
601 r->header_end = p;
602 state = sw_space_after_value;
603 break;
604 case CR:
605 r->header_end = p;
606 state = sw_almost_done;
607 break;
608 case LF:
609 r->header_end = p;
610 goto done;
612 break;
614 /* space* before end of header line */
615 case sw_space_after_value:
616 switch (ch) {
617 case ' ':
618 break;
619 case CR:
620 state = sw_almost_done;
621 break;
622 case LF:
623 goto done;
624 default:
625 state = sw_value;
626 break;
628 break;
630 /* ignore header line */
631 case sw_ignore_line:
632 switch (ch) {
633 case LF:
634 state = sw_start;
635 break;
636 default:
637 break;
639 break;
641 /* end of header line */
642 case sw_almost_done:
643 switch (ch) {
644 case LF:
645 goto done;
646 default:
647 return NGX_HTTP_PARSE_INVALID_HEADER;
649 break;
651 /* end of header */
652 case sw_header_almost_done:
653 switch (ch) {
654 case LF:
655 goto header_done;
656 default:
657 return NGX_HTTP_PARSE_INVALID_HEADER;
659 break;
663 b->pos = p;
664 r->state = state;
666 return NGX_AGAIN;
668 done:
670 b->pos = p + 1;
671 r->state = sw_start;
673 return NGX_OK;
675 header_done:
677 b->pos = p + 1;
678 r->state = sw_start;
680 return NGX_HTTP_PARSE_HEADER_DONE;
684 ngx_int_t ngx_http_parse_complex_uri(ngx_http_request_t *r)
686 u_char c, ch, decoded, *p, *u;
687 enum {
688 sw_usual = 0,
689 sw_colon,
690 sw_colon_slash,
691 sw_slash,
692 sw_dot,
693 sw_dot_dot,
694 #if (NGX_WIN32)
695 sw_dot_dot_dot,
696 #endif
697 sw_quoted,
698 sw_quoted_second
699 } state, quoted_state;
701 #if (NGX_SUPPRESS_WARN)
702 decoded = '\0';
703 quoted_state = sw_usual;
704 #endif
706 state = sw_usual;
707 p = r->uri_start;
708 u = r->uri.data;
709 r->uri_ext = NULL;
710 r->args_start = NULL;
712 ch = *p++;
714 while (p <= r->uri_end) {
717 * we use "ch = *p++" inside the cycle, but this operation is safe,
718 * because after the URI there is always at least one charcter:
719 * the line feed
722 ngx_log_debug4(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
723 "s:%d in:'%Xd:%c', out:'%c'", state, ch, ch, *u);
725 switch (state) {
726 case sw_usual:
727 switch(ch) {
728 #if (NGX_WIN32)
729 case '\\':
730 r->uri_ext = NULL;
732 if (p == r->uri_start + r->uri.len) {
735 * we omit the last "\" to cause redirect because
736 * the browsers do not treat "\" as "/" in relative URL path
739 break;
742 state = sw_slash;
743 *u++ = '/';
744 break;
745 #endif
746 case '/':
747 r->uri_ext = NULL;
748 state = sw_slash;
749 *u++ = ch;
750 break;
751 case '%':
752 quoted_state = state;
753 state = sw_quoted;
754 break;
755 case '?':
756 r->args_start = p;
757 goto done;
758 case ':':
759 state = sw_colon;
760 *u++ = ch;
761 break;
762 case '.':
763 r->uri_ext = u + 1;
764 *u++ = ch;
765 break;
766 default:
767 *u++ = ch;
768 break;
770 ch = *p++;
771 break;
773 case sw_colon:
774 switch(ch) {
775 #if (NGX_WIN32)
776 case '\\':
777 state = sw_colon_slash;
778 *u++ = '/';
779 break;
780 #endif
781 case '/':
782 state = sw_colon_slash;
783 *u++ = ch;
784 break;
785 case ':':
786 *u++ = ch;
787 break;
788 case '%':
789 quoted_state = state;
790 state = sw_quoted;
791 break;
792 case '?':
793 r->args_start = p;
794 goto done;
795 default:
796 state = sw_usual;
797 *u++ = ch;
798 break;
800 ch = *p++;
801 break;
803 case sw_colon_slash:
804 switch(ch) {
805 #if (NGX_WIN32)
806 case '\\':
807 state = sw_slash;
808 *u++ = '/';
809 break;
810 #endif
811 case '/':
812 state = sw_slash;
813 *u++ = ch;
814 break;
815 case '.':
816 state = sw_dot;
817 *u++ = ch;
818 break;
819 case '%':
820 quoted_state = state;
821 state = sw_quoted;
822 break;
823 case '?':
824 r->args_start = p;
825 goto done;
826 default:
827 state = sw_usual;
828 *u++ = ch;
829 break;
831 ch = *p++;
832 break;
834 case sw_slash:
835 switch(ch) {
836 #if (NGX_WIN32)
837 case '\\':
838 break;
839 #endif
840 case '/':
841 break;
842 case '.':
843 state = sw_dot;
844 *u++ = ch;
845 break;
846 case '%':
847 quoted_state = state;
848 state = sw_quoted;
849 break;
850 case '?':
851 r->args_start = p;
852 goto done;
853 default:
854 state = sw_usual;
855 *u++ = ch;
856 break;
858 ch = *p++;
859 break;
861 case sw_dot:
862 switch(ch) {
863 #if (NGX_WIN32)
864 case '\\':
865 /* fall through */
866 #endif
867 case '/':
868 state = sw_slash;
869 u--;
870 break;
871 case '.':
872 state = sw_dot_dot;
873 *u++ = ch;
874 break;
875 case '%':
876 quoted_state = state;
877 state = sw_quoted;
878 break;
879 case '?':
880 r->args_start = p;
881 goto done;
882 default:
883 state = sw_usual;
884 *u++ = ch;
885 break;
887 ch = *p++;
888 break;
890 case sw_dot_dot:
891 switch(ch) {
892 #if (NGX_WIN32)
893 case '\\':
894 /* fall through */
895 #endif
896 case '/':
897 state = sw_slash;
898 u -= 4;
899 if (u < r->uri.data) {
900 return NGX_HTTP_PARSE_INVALID_REQUEST;
902 while (*(u - 1) != '/') {
903 u--;
905 break;
906 case '%':
907 quoted_state = state;
908 state = sw_quoted;
909 break;
910 case '?':
911 r->args_start = p;
912 goto done;
913 #if (NGX_WIN32)
914 case '.':
915 state = sw_dot_dot_dot;
916 *u++ = ch;
917 break;
918 #endif
919 default:
920 state = sw_usual;
921 *u++ = ch;
922 break;
924 ch = *p++;
925 break;
927 #if (NGX_WIN32)
928 case sw_dot_dot_dot:
929 switch(ch) {
930 case '\\':
931 case '/':
932 state = sw_slash;
933 u -= 5;
934 if (u < r->uri.data) {
935 return NGX_HTTP_PARSE_INVALID_REQUEST;
937 while (*u != '/') {
938 u--;
940 if (u < r->uri.data) {
941 return NGX_HTTP_PARSE_INVALID_REQUEST;
943 while (*(u - 1) != '/') {
944 u--;
946 break;
947 case '%':
948 quoted_state = state;
949 state = sw_quoted;
950 break;
951 default:
952 state = sw_usual;
953 *u++ = ch;
954 break;
956 ch = *p++;
957 break;
958 #endif
960 case sw_quoted:
961 if (ch >= '0' && ch <= '9') {
962 decoded = (u_char) (ch - '0');
963 state = sw_quoted_second;
964 ch = *p++;
965 break;
968 c = (u_char) (ch | 0x20);
969 if (c >= 'a' && c <= 'f') {
970 decoded = (u_char) (c - 'a' + 10);
971 state = sw_quoted_second;
972 ch = *p++;
973 break;
976 return NGX_HTTP_PARSE_INVALID_REQUEST;
978 case sw_quoted_second:
979 if (ch >= '0' && ch <= '9') {
980 ch = (u_char) ((decoded << 4) + ch - '0');
982 if (ch == '%') {
983 state = sw_usual;
984 *u++ = ch;
985 ch = *p++;
986 break;
989 if (ch == '\0') {
990 r->zero_in_uri = 1;
991 *u++ = ch;
992 ch = *p++;
995 state = quoted_state;
996 break;
999 c = (u_char) (ch | 0x20);
1000 if (c >= 'a' && c <= 'f') {
1001 ch = (u_char) ((decoded << 4) + c - 'a' + 10);
1002 if (ch == '?') {
1003 *u++ = ch;
1004 ch = *p++;
1006 state = quoted_state;
1007 break;
1010 return NGX_HTTP_PARSE_INVALID_REQUEST;
1014 done:
1016 r->uri.len = u - r->uri.data;
1017 r->uri.data[r->uri.len] = '\0';
1019 if (r->uri_ext) {
1020 r->exten.len = u - r->uri_ext;
1021 r->exten.data = r->uri_ext;
1024 r->uri_ext = NULL;
1026 return NGX_OK;