src/or/parsecommon.c

   1 /* Copyright (c) 2016-2017, The Tor Project, Inc. */
   2 /* See LICENSE for licensing information */
   3
   4 /**
   5  * \file parsecommon.c
   6  * \brief Common code to parse and validate various type of descriptors.
   7  **/
   8
   9 #include "parsecommon.h"
  10 #include "torlog.h"
  11 #include "util_format.h"
  12
  13 #define MIN_ANNOTATION A_PURPOSE
  14 #define MAX_ANNOTATION A_UNKNOWN_
  15
  16 #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
  17 #define ALLOC(sz) memarea_alloc(area,sz)
  18 #define STRDUP(str) memarea_strdup(area,str)
  19 #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
  20
  21 #define RET_ERR(msg)                                               \
  22   STMT_BEGIN                                                       \
  23     if (tok) token_clear(tok);                                      \
  24     tok = ALLOC_ZERO(sizeof(directory_token_t));                   \
  25     tok->tp = ERR_;                                                \
  26     tok->error = STRDUP(msg);                                      \
  27     goto done_tokenizing;                                          \
  28   STMT_END
  29
  30 /** Free all resources allocated for <b>tok</b> */
  31 void
  32 token_clear(directory_token_t *tok)
  33 {
  34   if (tok->key)
  35     crypto_pk_free(tok->key);
  36 }
  37
  38 /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
  39  * them to <b>out</b>.  Parse according to the token rules in <b>table</b>.
  40  * Caller must free tokens in <b>out</b>.  If <b>end</b> is NULL, use the
  41  * entire string.
  42  */
  43 int
  44 tokenize_string(memarea_t *area,
  45                 const char *start, const char *end, smartlist_t *out,
  46                 token_rule_t *table, int flags)
  47 {
  48   const char **s;
  49   directory_token_t *tok = NULL;
  50   int counts[NIL_];
  51   int i;
  52   int first_nonannotation;
  53   int prev_len = smartlist_len(out);
  54   tor_assert(area);
  55
  56   s = &start;
  57   if (!end) {
  58     end = start+strlen(start);
  59   } else {
  60     /* it's only meaningful to check for nuls if we got an end-of-string ptr */
  61     if (memchr(start, '\0', end-start)) {
  62       log_warn(LD_DIR, "parse error: internal NUL character.");
  63       return -1;
  64     }
  65   }
  66   for (i = 0; i < NIL_; ++i)
  67     counts[i] = 0;
  68
  69   SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
  70
  71   while (*s < end && (!tok || tok->tp != EOF_)) {
  72     tok = get_next_token(area, s, end, table);
  73     if (tok->tp == ERR_) {
  74       log_warn(LD_DIR, "parse error: %s", tok->error);
  75       token_clear(tok);
  76       return -1;
  77     }
  78     ++counts[tok->tp];
  79     smartlist_add(out, tok);
  80     *s = eat_whitespace_eos(*s, end);
  81   }
  82
  83   if (flags & TS_NOCHECK)
  84     return 0;
  85
  86   if ((flags & TS_ANNOTATIONS_OK)) {
  87     first_nonannotation = -1;
  88     for (i = 0; i < smartlist_len(out); ++i) {
  89       tok = smartlist_get(out, i);
  90       if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
  91         first_nonannotation = i;
  92         break;
  93       }
  94     }
  95     if (first_nonannotation < 0) {
  96       log_warn(LD_DIR, "parse error: item contains only annotations");
  97       return -1;
  98     }
  99     for (i=first_nonannotation;  i < smartlist_len(out); ++i) {
 100       tok = smartlist_get(out, i);
 101       if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
 102         log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
 103         return -1;
 104       }
 105     }
 106     if ((flags & TS_NO_NEW_ANNOTATIONS)) {
 107       if (first_nonannotation != prev_len) {
 108         log_warn(LD_DIR, "parse error: Unexpected annotations.");
 109         return -1;
 110       }
 111     }
 112   } else {
 113     for (i=0;  i < smartlist_len(out); ++i) {
 114       tok = smartlist_get(out, i);
 115       if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
 116         log_warn(LD_DIR, "parse error: no annotations allowed.");
 117         return -1;
 118       }
 119     }
 120     first_nonannotation = 0;
 121   }
 122   for (i = 0; table[i].t; ++i) {
 123     if (counts[table[i].v] < table[i].min_cnt) {
 124       log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
 125       return -1;
 126     }
 127     if (counts[table[i].v] > table[i].max_cnt) {
 128       log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
 129       return -1;
 130     }
 131     if (table[i].pos & AT_START) {
 132       if (smartlist_len(out) < 1 ||
 133           (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
 134         log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
 135         return -1;
 136       }
 137     }
 138     if (table[i].pos & AT_END) {
 139       if (smartlist_len(out) < 1 ||
 140           (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
 141         log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
 142         return -1;
 143       }
 144     }
 145   }
 146   return 0;
 147 }
 148
 149 /** Helper: parse space-separated arguments from the string <b>s</b> ending at
 150  * <b>eol</b>, and store them in the args field of <b>tok</b>.  Store the
 151  * number of parsed elements into the n_args field of <b>tok</b>.  Allocate
 152  * all storage in <b>area</b>.  Return the number of arguments parsed, or
 153  * return -1 if there was an insanely high number of arguments. */
 154 static inline int
 155 get_token_arguments(memarea_t *area, directory_token_t *tok,
 156                     const char *s, const char *eol)
 157 {
 158 /** Largest number of arguments we'll accept to any token, ever. */
 159 #define MAX_ARGS 512
 160   char *mem = memarea_strndup(area, s, eol-s);
 161   char *cp = mem;
 162   int j = 0;
 163   char *args[MAX_ARGS];
 164   memset(args, 0, sizeof(args));
 165   while (*cp) {
 166     if (j == MAX_ARGS)
 167       return -1;
 168     args[j++] = cp;
 169     cp = (char*)find_whitespace(cp);
 170     if (!cp || !*cp)
 171       break; /* End of the line. */
 172     *cp++ = '\0';
 173     cp = (char*)eat_whitespace(cp);
 174   }
 175   tok->n_args = j;
 176   tok->args = memarea_memdup(area, args, j*sizeof(char*));
 177   return j;
 178 #undef MAX_ARGS
 179 }
 180
 181 /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
 182  * the object syntax of <b>o_syn</b>.  Allocate all storage in <b>area</b>.
 183  * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
 184  * conform to the syntax we wanted.
 185  **/
 186 static inline directory_token_t *
 187 token_check_object(memarea_t *area, const char *kwd,
 188                    directory_token_t *tok, obj_syntax o_syn)
 189 {
 190   char ebuf[128];
 191   switch (o_syn) {
 192     case NO_OBJ:
 193       /* No object is allowed for this token. */
 194       if (tok->object_body) {
 195         tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
 196         RET_ERR(ebuf);
 197       }
 198       if (tok->key) {
 199         tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
 200         RET_ERR(ebuf);
 201       }
 202       break;
 203     case NEED_OBJ:
 204       /* There must be a (non-key) object. */
 205       if (!tok->object_body) {
 206         tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
 207         RET_ERR(ebuf);
 208       }
 209       break;
 210     case NEED_KEY_1024: /* There must be a 1024-bit public key. */
 211     case NEED_SKEY_1024: /* There must be a 1024-bit private key. */
 212       if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
 213         tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
 214                      kwd, crypto_pk_num_bits(tok->key));
 215         RET_ERR(ebuf);
 216       }
 217       /* fall through */
 218     case NEED_KEY: /* There must be some kind of key. */
 219       if (!tok->key) {
 220         tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
 221         RET_ERR(ebuf);
 222       }
 223       if (o_syn != NEED_SKEY_1024) {
 224         if (crypto_pk_key_is_private(tok->key)) {
 225           tor_snprintf(ebuf, sizeof(ebuf),
 226                "Private key given for %s, which wants a public key", kwd);
 227           RET_ERR(ebuf);
 228         }
 229       } else { /* o_syn == NEED_SKEY_1024 */
 230         if (!crypto_pk_key_is_private(tok->key)) {
 231           tor_snprintf(ebuf, sizeof(ebuf),
 232                "Public key given for %s, which wants a private key", kwd);
 233           RET_ERR(ebuf);
 234         }
 235       }
 236       break;
 237     case OBJ_OK:
 238       /* Anything goes with this token. */
 239       break;
 240   }
 241
 242  done_tokenizing:
 243   return tok;
 244 }
 245
 246 /** Helper function: read the next token from *s, advance *s to the end of the
 247  * token, and return the parsed token.  Parse *<b>s</b> according to the list
 248  * of tokens in <b>table</b>.
 249  */
 250 directory_token_t *
 251 get_next_token(memarea_t *area,
 252                const char **s, const char *eos, token_rule_t *table)
 253 {
 254   /** Reject any object at least this big; it is probably an overflow, an
 255    * attack, a bug, or some other nonsense. */
 256 #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
 257   /** Reject any line at least this big; it is probably an overflow, an
 258    * attack, a bug, or some other nonsense. */
 259 #define MAX_LINE_LENGTH (128*1024)
 260
 261   const char *next, *eol, *obstart;
 262   size_t obname_len;
 263   int i;
 264   directory_token_t *tok;
 265   obj_syntax o_syn = NO_OBJ;
 266   char ebuf[128];
 267   const char *kwd = "";
 268
 269   tor_assert(area);
 270   tok = ALLOC_ZERO(sizeof(directory_token_t));
 271   tok->tp = ERR_;
 272
 273   /* Set *s to first token, eol to end-of-line, next to after first token */
 274   *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
 275   tor_assert(eos >= *s);
 276   eol = memchr(*s, '\n', eos-*s);
 277   if (!eol)
 278     eol = eos;
 279   if (eol - *s > MAX_LINE_LENGTH) {
 280     RET_ERR("Line far too long");
 281   }
 282
 283   next = find_whitespace_eos(*s, eol);
 284
 285   if (!strcmp_len(*s, "opt", next-*s)) {
 286     /* Skip past an "opt" at the start of the line. */
 287     *s = eat_whitespace_eos_no_nl(next, eol);
 288     next = find_whitespace_eos(*s, eol);
 289   } else if (*s == eos) {  /* If no "opt", and end-of-line, line is invalid */
 290     RET_ERR("Unexpected EOF");
 291   }
 292
 293   /* Search the table for the appropriate entry.  (I tried a binary search
 294    * instead, but it wasn't any faster.) */
 295   for (i = 0; table[i].t ; ++i) {
 296     if (!strcmp_len(*s, table[i].t, next-*s)) {
 297       /* We've found the keyword. */
 298       kwd = table[i].t;
 299       tok->tp = table[i].v;
 300       o_syn = table[i].os;
 301       *s = eat_whitespace_eos_no_nl(next, eol);
 302       /* We go ahead whether there are arguments or not, so that tok->args is
 303        * always set if we want arguments. */
 304       if (table[i].concat_args) {
 305         /* The keyword takes the line as a single argument */
 306         tok->args = ALLOC(sizeof(char*));
 307         tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
 308         tok->n_args = 1;
 309       } else {
 310         /* This keyword takes multiple arguments. */
 311         if (get_token_arguments(area, tok, *s, eol)<0) {
 312           tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
 313           RET_ERR(ebuf);
 314         }
 315         *s = eol;
 316       }
 317       if (tok->n_args < table[i].min_args) {
 318         tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
 319         RET_ERR(ebuf);
 320       } else if (tok->n_args > table[i].max_args) {
 321         tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
 322         RET_ERR(ebuf);
 323       }
 324       break;
 325     }
 326   }
 327
 328   if (tok->tp == ERR_) {
 329     /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
 330     if (*s < eol && **s == '@')
 331       tok->tp = A_UNKNOWN_;
 332     else
 333       tok->tp = K_OPT;
 334     tok->args = ALLOC(sizeof(char*));
 335     tok->args[0] = STRNDUP(*s, eol-*s);
 336     tok->n_args = 1;
 337     o_syn = OBJ_OK;
 338   }
 339
 340   /* Check whether there's an object present */
 341   *s = eat_whitespace_eos(eol, eos);  /* Scan from end of first line */
 342   tor_assert(eos >= *s);
 343   eol = memchr(*s, '\n', eos-*s);
 344   if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
 345     goto check_object;
 346
 347   obstart = *s; /* Set obstart to start of object spec */
 348   if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
 349       strcmp_len(eol-5, "-----", 5) ||           /* nuls or invalid endings */
 350       (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) {     /* name too long */
 351     RET_ERR("Malformed object: bad begin line");
 352   }
 353   tok->object_type = STRNDUP(*s+11, eol-*s-16);
 354   obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
 355   *s = eol+1;    /* Set *s to possible start of object data (could be eos) */
 356
 357   /* Go to the end of the object */
 358   next = tor_memstr(*s, eos-*s, "-----END ");
 359   if (!next) {
 360     RET_ERR("Malformed object: missing object end line");
 361   }
 362   tor_assert(eos >= next);
 363   eol = memchr(next, '\n', eos-next);
 364   if (!eol)  /* end-of-line marker, or eos if there's no '\n' */
 365     eol = eos;
 366   /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
 367   if ((size_t)(eol-next) != 9+obname_len+5 ||
 368       strcmp_len(next+9, tok->object_type, obname_len) ||
 369       strcmp_len(eol-5, "-----", 5)) {
 370     tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
 371              tok->object_type);
 372     ebuf[sizeof(ebuf)-1] = '\0';
 373     RET_ERR(ebuf);
 374   }
 375   if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
 376     RET_ERR("Couldn't parse object: missing footer or object much too big.");
 377
 378   if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
 379     tok->key = crypto_pk_new();
 380     if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart))
 381       RET_ERR("Couldn't parse public key.");
 382   } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */
 383     tok->key = crypto_pk_new();
 384     if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart))
 385       RET_ERR("Couldn't parse private key.");
 386   } else { /* If it's something else, try to base64-decode it */
 387     int r;
 388     tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */
 389     r = base64_decode(tok->object_body, next-*s, *s, next-*s);
 390     if (r<0)
 391       RET_ERR("Malformed object: bad base64-encoded data");
 392     tok->object_size = r;
 393   }
 394   *s = eol;
 395
 396  check_object:
 397   tok = token_check_object(area, kwd, tok, o_syn);
 398
 399  done_tokenizing:
 400   return tok;
 401
 402 #undef RET_ERR
 403 #undef ALLOC
 404 #undef ALLOC_ZERO
 405 #undef STRDUP
 406 #undef STRNDUP
 407 }
 408
 409 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
 410  * with an assert if no such keyword is found.
 411  */
 412 directory_token_t *
 413 find_by_keyword_(smartlist_t *s, directory_keyword keyword,
 414                  const char *keyword_as_string)
 415 {
 416   directory_token_t *tok = find_opt_by_keyword(s, keyword);
 417   if (PREDICT_UNLIKELY(!tok)) {
 418     log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
 419          "been validated. Internal error.", keyword_as_string, (int)keyword);
 420     tor_assert(tok);
 421   }
 422   return tok;
 423 }
 424
 425 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
 426  * NULL if no such keyword is found.
 427  */
 428 directory_token_t *
 429 find_opt_by_keyword(smartlist_t *s, directory_keyword keyword)
 430 {
 431   SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
 432   return NULL;
 433 }
 434
 435 /** If there are any directory_token_t entries in <b>s</b> whose keyword is
 436  * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
 437  * in the same order in which they occur in <b>s</b>.  Otherwise return
 438  * NULL. */
 439 smartlist_t *
 440 find_all_by_keyword(const smartlist_t *s, directory_keyword k)
 441 {
 442   smartlist_t *out = NULL;
 443   SMARTLIST_FOREACH(s, directory_token_t *, t,
 444                     if (t->tp == k) {
 445                     if (!out)
 446                     out = smartlist_new();
 447                     smartlist_add(out, t);
 448                     });
 449   return out;
 450 }
 451