Cache control file
[tor/appveyor.git] / src / or / parsecommon.c
blob6c3dd3100ec76770b529298ef8dd2291c4405bf0
1 /* Copyright (c) 2016-2017, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file parsecommon.c
6 * \brief Common code to parse and validate various type of descriptors.
7 **/
9 #include "parsecommon.h"
10 #include "torlog.h"
11 #include "util_format.h"
13 #define MIN_ANNOTATION A_PURPOSE
14 #define MAX_ANNOTATION A_UNKNOWN_
16 #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
17 #define ALLOC(sz) memarea_alloc(area,sz)
18 #define STRDUP(str) memarea_strdup(area,str)
19 #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
21 #define RET_ERR(msg) \
22 STMT_BEGIN \
23 if (tok) token_clear(tok); \
24 tok = ALLOC_ZERO(sizeof(directory_token_t)); \
25 tok->tp = ERR_; \
26 tok->error = STRDUP(msg); \
27 goto done_tokenizing; \
28 STMT_END
30 /** Free all resources allocated for <b>tok</b> */
31 void
32 token_clear(directory_token_t *tok)
34 if (tok->key)
35 crypto_pk_free(tok->key);
38 /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
39 * them to <b>out</b>. Parse according to the token rules in <b>table</b>.
40 * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the
41 * entire string.
43 int
44 tokenize_string(memarea_t *area,
45 const char *start, const char *end, smartlist_t *out,
46 token_rule_t *table, int flags)
48 const char **s;
49 directory_token_t *tok = NULL;
50 int counts[NIL_];
51 int i;
52 int first_nonannotation;
53 int prev_len = smartlist_len(out);
54 tor_assert(area);
56 s = &start;
57 if (!end) {
58 end = start+strlen(start);
59 } else {
60 /* it's only meaningful to check for nuls if we got an end-of-string ptr */
61 if (memchr(start, '\0', end-start)) {
62 log_warn(LD_DIR, "parse error: internal NUL character.");
63 return -1;
66 for (i = 0; i < NIL_; ++i)
67 counts[i] = 0;
69 SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
71 while (*s < end && (!tok || tok->tp != EOF_)) {
72 tok = get_next_token(area, s, end, table);
73 if (tok->tp == ERR_) {
74 log_warn(LD_DIR, "parse error: %s", tok->error);
75 token_clear(tok);
76 return -1;
78 ++counts[tok->tp];
79 smartlist_add(out, tok);
80 *s = eat_whitespace_eos(*s, end);
83 if (flags & TS_NOCHECK)
84 return 0;
86 if ((flags & TS_ANNOTATIONS_OK)) {
87 first_nonannotation = -1;
88 for (i = 0; i < smartlist_len(out); ++i) {
89 tok = smartlist_get(out, i);
90 if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
91 first_nonannotation = i;
92 break;
95 if (first_nonannotation < 0) {
96 log_warn(LD_DIR, "parse error: item contains only annotations");
97 return -1;
99 for (i=first_nonannotation; i < smartlist_len(out); ++i) {
100 tok = smartlist_get(out, i);
101 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
102 log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
103 return -1;
106 if ((flags & TS_NO_NEW_ANNOTATIONS)) {
107 if (first_nonannotation != prev_len) {
108 log_warn(LD_DIR, "parse error: Unexpected annotations.");
109 return -1;
112 } else {
113 for (i=0; i < smartlist_len(out); ++i) {
114 tok = smartlist_get(out, i);
115 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
116 log_warn(LD_DIR, "parse error: no annotations allowed.");
117 return -1;
120 first_nonannotation = 0;
122 for (i = 0; table[i].t; ++i) {
123 if (counts[table[i].v] < table[i].min_cnt) {
124 log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
125 return -1;
127 if (counts[table[i].v] > table[i].max_cnt) {
128 log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
129 return -1;
131 if (table[i].pos & AT_START) {
132 if (smartlist_len(out) < 1 ||
133 (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
134 log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
135 return -1;
138 if (table[i].pos & AT_END) {
139 if (smartlist_len(out) < 1 ||
140 (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
141 log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
142 return -1;
146 return 0;
149 /** Helper: parse space-separated arguments from the string <b>s</b> ending at
150 * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the
151 * number of parsed elements into the n_args field of <b>tok</b>. Allocate
152 * all storage in <b>area</b>. Return the number of arguments parsed, or
153 * return -1 if there was an insanely high number of arguments. */
154 static inline int
155 get_token_arguments(memarea_t *area, directory_token_t *tok,
156 const char *s, const char *eol)
158 /** Largest number of arguments we'll accept to any token, ever. */
159 #define MAX_ARGS 512
160 char *mem = memarea_strndup(area, s, eol-s);
161 char *cp = mem;
162 int j = 0;
163 char *args[MAX_ARGS];
164 memset(args, 0, sizeof(args));
165 while (*cp) {
166 if (j == MAX_ARGS)
167 return -1;
168 args[j++] = cp;
169 cp = (char*)find_whitespace(cp);
170 if (!cp || !*cp)
171 break; /* End of the line. */
172 *cp++ = '\0';
173 cp = (char*)eat_whitespace(cp);
175 tok->n_args = j;
176 tok->args = memarea_memdup(area, args, j*sizeof(char*));
177 return j;
178 #undef MAX_ARGS
181 /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
182 * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>.
183 * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
184 * conform to the syntax we wanted.
186 static inline directory_token_t *
187 token_check_object(memarea_t *area, const char *kwd,
188 directory_token_t *tok, obj_syntax o_syn)
190 char ebuf[128];
191 switch (o_syn) {
192 case NO_OBJ:
193 /* No object is allowed for this token. */
194 if (tok->object_body) {
195 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
196 RET_ERR(ebuf);
198 if (tok->key) {
199 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
200 RET_ERR(ebuf);
202 break;
203 case NEED_OBJ:
204 /* There must be a (non-key) object. */
205 if (!tok->object_body) {
206 tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
207 RET_ERR(ebuf);
209 break;
210 case NEED_KEY_1024: /* There must be a 1024-bit public key. */
211 case NEED_SKEY_1024: /* There must be a 1024-bit private key. */
212 if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
213 tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
214 kwd, crypto_pk_num_bits(tok->key));
215 RET_ERR(ebuf);
217 /* fall through */
218 case NEED_KEY: /* There must be some kind of key. */
219 if (!tok->key) {
220 tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
221 RET_ERR(ebuf);
223 if (o_syn != NEED_SKEY_1024) {
224 if (crypto_pk_key_is_private(tok->key)) {
225 tor_snprintf(ebuf, sizeof(ebuf),
226 "Private key given for %s, which wants a public key", kwd);
227 RET_ERR(ebuf);
229 } else { /* o_syn == NEED_SKEY_1024 */
230 if (!crypto_pk_key_is_private(tok->key)) {
231 tor_snprintf(ebuf, sizeof(ebuf),
232 "Public key given for %s, which wants a private key", kwd);
233 RET_ERR(ebuf);
236 break;
237 case OBJ_OK:
238 /* Anything goes with this token. */
239 break;
242 done_tokenizing:
243 return tok;
246 /** Helper function: read the next token from *s, advance *s to the end of the
247 * token, and return the parsed token. Parse *<b>s</b> according to the list
248 * of tokens in <b>table</b>.
250 directory_token_t *
251 get_next_token(memarea_t *area,
252 const char **s, const char *eos, token_rule_t *table)
254 /** Reject any object at least this big; it is probably an overflow, an
255 * attack, a bug, or some other nonsense. */
256 #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
257 /** Reject any line at least this big; it is probably an overflow, an
258 * attack, a bug, or some other nonsense. */
259 #define MAX_LINE_LENGTH (128*1024)
261 const char *next, *eol, *obstart;
262 size_t obname_len;
263 int i;
264 directory_token_t *tok;
265 obj_syntax o_syn = NO_OBJ;
266 char ebuf[128];
267 const char *kwd = "";
269 tor_assert(area);
270 tok = ALLOC_ZERO(sizeof(directory_token_t));
271 tok->tp = ERR_;
273 /* Set *s to first token, eol to end-of-line, next to after first token */
274 *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
275 tor_assert(eos >= *s);
276 eol = memchr(*s, '\n', eos-*s);
277 if (!eol)
278 eol = eos;
279 if (eol - *s > MAX_LINE_LENGTH) {
280 RET_ERR("Line far too long");
283 next = find_whitespace_eos(*s, eol);
285 if (!strcmp_len(*s, "opt", next-*s)) {
286 /* Skip past an "opt" at the start of the line. */
287 *s = eat_whitespace_eos_no_nl(next, eol);
288 next = find_whitespace_eos(*s, eol);
289 } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */
290 RET_ERR("Unexpected EOF");
293 /* Search the table for the appropriate entry. (I tried a binary search
294 * instead, but it wasn't any faster.) */
295 for (i = 0; table[i].t ; ++i) {
296 if (!strcmp_len(*s, table[i].t, next-*s)) {
297 /* We've found the keyword. */
298 kwd = table[i].t;
299 tok->tp = table[i].v;
300 o_syn = table[i].os;
301 *s = eat_whitespace_eos_no_nl(next, eol);
302 /* We go ahead whether there are arguments or not, so that tok->args is
303 * always set if we want arguments. */
304 if (table[i].concat_args) {
305 /* The keyword takes the line as a single argument */
306 tok->args = ALLOC(sizeof(char*));
307 tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
308 tok->n_args = 1;
309 } else {
310 /* This keyword takes multiple arguments. */
311 if (get_token_arguments(area, tok, *s, eol)<0) {
312 tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
313 RET_ERR(ebuf);
315 *s = eol;
317 if (tok->n_args < table[i].min_args) {
318 tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
319 RET_ERR(ebuf);
320 } else if (tok->n_args > table[i].max_args) {
321 tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
322 RET_ERR(ebuf);
324 break;
328 if (tok->tp == ERR_) {
329 /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
330 if (*s < eol && **s == '@')
331 tok->tp = A_UNKNOWN_;
332 else
333 tok->tp = K_OPT;
334 tok->args = ALLOC(sizeof(char*));
335 tok->args[0] = STRNDUP(*s, eol-*s);
336 tok->n_args = 1;
337 o_syn = OBJ_OK;
340 /* Check whether there's an object present */
341 *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */
342 tor_assert(eos >= *s);
343 eol = memchr(*s, '\n', eos-*s);
344 if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
345 goto check_object;
347 obstart = *s; /* Set obstart to start of object spec */
348 if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
349 strcmp_len(eol-5, "-----", 5) || /* nuls or invalid endings */
350 (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */
351 RET_ERR("Malformed object: bad begin line");
353 tok->object_type = STRNDUP(*s+11, eol-*s-16);
354 obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
355 *s = eol+1; /* Set *s to possible start of object data (could be eos) */
357 /* Go to the end of the object */
358 next = tor_memstr(*s, eos-*s, "-----END ");
359 if (!next) {
360 RET_ERR("Malformed object: missing object end line");
362 tor_assert(eos >= next);
363 eol = memchr(next, '\n', eos-next);
364 if (!eol) /* end-of-line marker, or eos if there's no '\n' */
365 eol = eos;
366 /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
367 if ((size_t)(eol-next) != 9+obname_len+5 ||
368 strcmp_len(next+9, tok->object_type, obname_len) ||
369 strcmp_len(eol-5, "-----", 5)) {
370 tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
371 tok->object_type);
372 ebuf[sizeof(ebuf)-1] = '\0';
373 RET_ERR(ebuf);
375 if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
376 RET_ERR("Couldn't parse object: missing footer or object much too big.");
378 if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
379 tok->key = crypto_pk_new();
380 if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart))
381 RET_ERR("Couldn't parse public key.");
382 } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */
383 tok->key = crypto_pk_new();
384 if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart))
385 RET_ERR("Couldn't parse private key.");
386 } else { /* If it's something else, try to base64-decode it */
387 int r;
388 tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */
389 r = base64_decode(tok->object_body, next-*s, *s, next-*s);
390 if (r<0)
391 RET_ERR("Malformed object: bad base64-encoded data");
392 tok->object_size = r;
394 *s = eol;
396 check_object:
397 tok = token_check_object(area, kwd, tok, o_syn);
399 done_tokenizing:
400 return tok;
402 #undef RET_ERR
403 #undef ALLOC
404 #undef ALLOC_ZERO
405 #undef STRDUP
406 #undef STRNDUP
409 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
410 * with an assert if no such keyword is found.
412 directory_token_t *
413 find_by_keyword_(smartlist_t *s, directory_keyword keyword,
414 const char *keyword_as_string)
416 directory_token_t *tok = find_opt_by_keyword(s, keyword);
417 if (PREDICT_UNLIKELY(!tok)) {
418 log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
419 "been validated. Internal error.", keyword_as_string, (int)keyword);
420 tor_assert(tok);
422 return tok;
425 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
426 * NULL if no such keyword is found.
428 directory_token_t *
429 find_opt_by_keyword(smartlist_t *s, directory_keyword keyword)
431 SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
432 return NULL;
435 /** If there are any directory_token_t entries in <b>s</b> whose keyword is
436 * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
437 * in the same order in which they occur in <b>s</b>. Otherwise return
438 * NULL. */
439 smartlist_t *
440 find_all_by_keyword(const smartlist_t *s, directory_keyword k)
442 smartlist_t *out = NULL;
443 SMARTLIST_FOREACH(s, directory_token_t *, t,
444 if (t->tp == k) {
445 if (!out)
446 out = smartlist_new();
447 smartlist_add(out, t);
449 return out;