3 * Copyright (c) 2010, Zed A. Shaw and Mongrel2 Project Contributors.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * * Neither the name of the Mongrel2 Project, Zed A. Shaw, nor the names
18 * of its contributors may be used to endorse or promote products
19 * derived from this software without specific prior written
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
23 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include "http11_parser.h"
42 #define LEN(AT, FPC) (FPC - buffer - parser->AT)
43 #define MARK(M,FPC) (parser->M = (FPC) - buffer)
44 #define PTR_TO(F) (buffer + parser->F)
52 action mark {MARK(mark, fpc); }
55 action start_field { MARK(field_start, fpc); }
57 parser->field_len = LEN(field_start, fpc);
60 action start_value { MARK(mark, fpc); }
63 if(parser->http_field != NULL) {
64 parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, fpc));
69 if(!apply_element(parser, CONTENT_TYPE, PTR_TO(mark), fpc, 10*1024))
74 if(!apply_element(parser, FRAGMENT, PTR_TO(mark), fpc, 10*1024))
79 if(!apply_element(parser, HTTP_VERSION, PTR_TO(mark), fpc, 10))
84 if(!apply_element(parser, REQUEST_PATH, PTR_TO(mark), fpc, 1024))
88 action request_method {
89 if(!apply_element(parser, REQUEST_METHOD, PTR_TO(mark), fpc, 1024))
94 if(!apply_element(parser, REQUEST_URI, PTR_TO(mark), fpc, 12*1024))
98 action start_query {MARK(query_start, fpc); }
100 action query_string {
101 if(!apply_element(parser, QUERY_STRING, PTR_TO(query_start), fpc, 10*1024))
106 parser->body_start = fpc - buffer + 1;
110 #### HTTP PROTOCOL GRAMMAR
112 CRLF = ("\r\n" | "\n");
116 safe = ("$" | "-" | "_" | ".");
117 extra = ("!" | "*" | "'" | "(" | ")" | ",");
118 reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
119 unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
120 national = any -- (alpha | digit | reserved | extra | safe | unsafe);
121 unreserved = (alpha | digit | safe | extra | national);
122 escape = ("%" xdigit xdigit);
123 uchar = (unreserved | escape);
124 pchar = (uchar | ":" | "@" | "&" | "=" | "+");
125 tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
128 token = (ascii -- (CTL | tspecials));
130 # URI schemes and absolute paths
132 absolute_uri = (scheme ":" (uchar | reserved )*);
134 path = ( pchar+ ( "/" pchar* )* ) ;
135 query = ( uchar | reserved )* %query_string ;
136 param = ( pchar | "/" )* ;
137 params = ( param ( ";" param )* ) ;
138 rel_path = ( path? %request_path (";" params)? ) ("?" %start_query query)?;
139 absolute_path = ( "/"+ rel_path );
141 Request_URI = ( "*" | absolute_uri | absolute_path ) >mark %request_uri;
142 Fragment = ( uchar | reserved )* >mark %fragment;
143 Method = ( upper | digit | safe ){1,20} >mark %request_method;
145 http_number = ( "1." ("0" | "1") ) ;
146 HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
147 Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " HTTP_Version CRLF ) ;
149 field_name = ( token -- ":" )+ >start_field %write_field;
151 field_value = any* >start_value %write_value;
153 message_header = field_name ":" " "* field_value :> CRLF;
155 Request = Request_Line ( message_header )* ( CRLF );
157 main := (Request ) @done;
164 static int apply_element(http_parser *parser, int type, const char *begin, const char *end, int max_length)
166 int len = (int)(end-begin);
167 if(len > max_length) {
170 if(parser->on_element)
171 parser->on_element(parser->data, type, begin, len);
177 int http_parser_init(http_parser *parser) {
181 parser->body_start = 0;
182 parser->content_len = 0;
185 parser->field_len = 0;
186 parser->field_start = 0;
192 size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off)
194 if(len == 0) return 0;
199 assert(off <= len && "offset past end of buffer");
204 assert(pe - p == (int)len - (int)off && "pointers aren't same distance");
208 assert(p <= pe && "Buffer overflow after parsing.");
210 if (!http_parser_has_error(parser)) {
214 parser->nread += p - (buffer + off);
216 assert(parser->nread <= len && "nread longer than length");
217 assert(parser->body_start <= len && "body starts after buffer end");
218 assert(parser->mark < len && "mark is after buffer end");
219 assert(parser->field_len <= len && "field has length longer than whole buffer");
220 assert(parser->field_start < len && "field starts after buffer end");
222 return(parser->nread);
225 int http_parser_finish(http_parser *parser)
227 if (http_parser_has_error(parser) ) {
229 } else if (http_parser_is_finished(parser) ) {
236 int http_parser_has_error(http_parser *parser) {
237 return parser->cs == http_parser_error;
240 int http_parser_is_finished(http_parser *parser) {
241 return parser->cs >= http_parser_first_final;