Disable forced gzip by default
[opentracker.git] / scan_urlencoded_query.c
blob38d544a879efe1c61b7b433f5a14fd3e21aa9cee
1 /* This software was written by Dirk Engling <erdgeist@erdgeist.org>
2 It is considered beerware. Prost. Skol. Cheers or whatever.
4 $id$ */
6 /* Opentracker */
7 #include "scan_urlencoded_query.h"
9 /* Libwofat */
10 #include "scan.h"
12 /* System */
13 #include <string.h>
15 /* Idea is to do a in place replacement or guarantee at least
16 strlen( string ) bytes in deststring
17 watch http://www.ietf.org/rfc/rfc2396.txt
18 unreserved = alphanum | mark
19 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
20 we add '%' to the matrix to not stop at encoded chars.
21 After losing too many requests to being too strict, add the following characters to reserved matrix
22 relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
25 /* This matrix holds for each ascii character the information,
26 whether it is a non-terminating character for on of the three
27 scan states we are in, that is 'path', 'param' and 'value' from
28 /path?param=value&param=value, it is encoded in bit 0, 1 and 2
29 respectively
31 The top bit of lower nibble indicates, whether this character is
32 a hard terminator, ie. \0, \n or \s, where the whole scanning
33 process should terminate
35 static const unsigned char is_unreserved[256] = {
36 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 8,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
38 4,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,7,
39 8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,7,0,
40 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
41 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
42 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
46 /* Do a fast nibble to hex representation conversion */
47 static unsigned char fromhex(unsigned char x) {
48 x -= '0';
49 if (x <= 9)
50 return x;
51 x &= ~0x20;
52 x -= 'A' - '0';
53 if (x < 6)
54 return x + 10;
55 return 0xff;
58 /* Skip the value of a param=value pair */
59 void scan_urlencoded_skipvalue(char **string) {
60 const unsigned char *s = *(const unsigned char **)string;
61 unsigned char f;
63 /* Since we are asked to skip the 'value', we assume to stop at
64 terminators for a 'value' string position */
65 while ((f = is_unreserved[*s++]) & SCAN_SEARCHPATH_VALUE)
68 /* If we stopped at a hard terminator like \0 or \n, make the
69 next scan_urlencoded_query encounter it again */
70 if (f & SCAN_SEARCHPATH_TERMINATOR)
71 --s;
73 *string = (char *)s;
76 int scan_find_keywords(const ot_keywords *keywords, char **string, SCAN_SEARCHPATH_FLAG flags) {
77 char *deststring = *string;
78 ssize_t match_length = scan_urlencoded_query(string, deststring, flags);
80 if (match_length < 0)
81 return match_length;
82 if (match_length == 0)
83 return -3;
85 while (keywords->key) {
86 if (!strncmp(keywords->key, deststring, match_length) && !keywords->key[match_length])
87 return keywords->value;
88 keywords++;
91 return -3;
94 ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags) {
95 const unsigned char *s = *(const unsigned char **)string;
96 unsigned char *d = (unsigned char *)deststring;
97 unsigned char b, c;
99 /* This is the main decoding loop.
100 'flag' determines, which characters are non-terminating in current context
101 (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
103 while (is_unreserved[c = *s++] & flags) {
105 /* When encountering an url escaped character, try to decode */
106 if (c == '%') {
107 if ((b = fromhex(*s++)) == 0xff)
108 return -1;
109 if ((c = fromhex(*s++)) == 0xff)
110 return -1;
111 c |= (b << 4);
114 /* Write (possibly decoded) character to output */
115 *d++ = c;
118 switch (c) {
119 case 0:
120 case '\r':
121 case '\n':
122 case ' ':
123 /* If we started scanning on a hard terminator, indicate we've finished */
124 if (d == (unsigned char *)deststring)
125 return -2;
127 /* Else make the next call to scan_urlencoded_param encounter it again */
128 --s;
129 break;
130 case '?':
131 if (flags != SCAN_PATH)
132 return -1;
133 break;
134 case '=':
135 if (flags != SCAN_SEARCHPATH_PARAM)
136 return -1;
137 break;
138 case '&':
139 if (flags == SCAN_PATH)
140 return -1;
141 if (flags == SCAN_SEARCHPATH_PARAM)
142 --s;
143 break;
144 default:
145 return -1;
148 *string = (char *)s;
149 return d - (unsigned char *)deststring;
152 ssize_t scan_fixed_int(char *data, size_t len, int *tmp) {
153 int minus = 0;
154 *tmp = 0;
155 if (*data == '-')
156 --len, ++data, ++minus;
157 while ((len > 0) && (*data >= '0') && (*data <= '9')) {
158 --len;
159 *tmp = 10 * *tmp + *data++ - '0';
161 if (minus)
162 *tmp = -*tmp;
163 return len;