src/feature/dircommon/consdiff.c

   1 /* Copyright (c) 2014, Daniel Martí
   2  * Copyright (c) 2014-2021, The Tor Project, Inc. */
   3 /* See LICENSE for licensing information */
   4
   5 /**
   6  * \file consdiff.c
   7  * \brief Consensus diff implementation, including both the generation and the
   8  * application of diffs in a minimal ed format.
   9  *
  10  * The consensus diff application is done in consdiff_apply_diff, which relies
  11  * on apply_ed_diff for the main ed diff part and on some digest helper
  12  * functions to check the digest hashes found in the consensus diff header.
  13  *
  14  * The consensus diff generation is more complex. consdiff_gen_diff generates
  15  * it, relying on gen_ed_diff to generate the ed diff and some digest helper
  16  * functions to generate the digest hashes.
  17  *
  18  * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
  19  * time and linear space to generate an ed diff given two smartlists. As shown
  20  * in its comment section, calling calc_changes on the entire two consensuses
  21  * will calculate what is to be added and what is to be deleted in the diff.
  22  * Its comment section briefly explains how it works.
  23  *
  24  * In our case specific to consensuses, we take advantage of the fact that
  25  * consensuses list routers sorted by their identities. We use that
  26  * information to avoid running calc_changes on the whole smartlists.
  27  * gen_ed_diff will navigate through the two consensuses identity by identity
  28  * and will send small couples of slices to calc_changes, keeping the running
  29  * time near-linear. This is explained in more detail in the gen_ed_diff
  30  * comments.
  31  *
  32  * The allocation strategy tries to save time and memory by avoiding needless
  33  * copies.  Instead of actually splitting the inputs into separate strings, we
  34  * allocate cdline_t objects, each of which represents a line in the original
  35  * object or in the output.  We use memarea_t allocators to manage the
  36  * temporary memory we use when generating or applying diffs.
  37  **/
  38
  39 #define CONSDIFF_PRIVATE
  40
  41 #include "core/or/or.h"
  42 #include "feature/dircommon/consdiff.h"
  43 #include "lib/memarea/memarea.h"
  44 #include "feature/dirparse/ns_parse.h"
  45
  46 static const char* ns_diff_version = "network-status-diff-version 1";
  47 static const char* hash_token = "hash";
  48
  49 static char *consensus_join_lines(const smartlist_t *inp);
  50
  51 /** Return true iff a and b have the same contents. */
  52 STATIC int
  53 lines_eq(const cdline_t *a, const cdline_t *b)
  54 {
  55   return a->len == b->len && fast_memeq(a->s, b->s, a->len);
  56 }
  57
  58 /** Return true iff a has the same contents as the nul-terminated string b. */
  59 STATIC int
  60 line_str_eq(const cdline_t *a, const char *b)
  61 {
  62   const size_t len = strlen(b);
  63   tor_assert(len <= UINT32_MAX);
  64   cdline_t bline = { b, (uint32_t)len };
  65   return lines_eq(a, &bline);
  66 }
  67
  68 /** Return true iff a begins with the same contents as the nul-terminated
  69  * string b. */
  70 static int
  71 line_starts_with_str(const cdline_t *a, const char *b)
  72 {
  73   const size_t len = strlen(b);
  74   tor_assert(len <= UINT32_MAX);
  75   return a->len >= len && fast_memeq(a->s, b, len);
  76 }
  77
  78 /** Return a new cdline_t holding as its contents the nul-terminated
  79  * string s. Use the provided memory area for storage. */
  80 static cdline_t *
  81 cdline_linecpy(memarea_t *area, const char *s)
  82 {
  83   size_t len = strlen(s);
  84   const char *ss = memarea_memdup(area, s, len);
  85   cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
  86   line->s = ss;
  87   line->len = (uint32_t)len;
  88   return line;
  89 }
  90
  91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
  92  * string s.  Use the provided memory area for storage. */
  93 STATIC void
  94 smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
  95 {
  96   smartlist_add(lst, cdline_linecpy(area, s));
  97 }
  98
  99 /** Compute the digest of <b>cons</b>, and store the result in
 100  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
 101 /* This is a separate, mockable function so that we can override it when
 102  * fuzzing. */
 103 MOCK_IMPL(STATIC int,
 104 consensus_compute_digest,(const char *cons, size_t len,
 105                           consensus_digest_t *digest_out))
 106 {
 107   int r = crypto_digest256((char*)digest_out->sha3_256,
 108                            cons, len, DIGEST_SHA3_256);
 109   return r;
 110 }
 111
 112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
 113  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
 114 /* This is a separate, mockable function so that we can override it when
 115  * fuzzing. */
 116 MOCK_IMPL(STATIC int,
 117 consensus_compute_digest_as_signed,(const char *cons, size_t len,
 118                                     consensus_digest_t *digest_out))
 119 {
 120   return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
 121                                                     cons, len);
 122 }
 123
 124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
 125 /* This is a separate, mockable function so that we can override it when
 126  * fuzzing. */
 127 MOCK_IMPL(STATIC int,
 128 consensus_digest_eq,(const uint8_t *d1,
 129                      const uint8_t *d2))
 130 {
 131   return fast_memeq(d1, d2, DIGEST256_LEN);
 132 }
 133
 134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
 135  * and the end indexes are within the bounds of the initial smartlist. The end
 136  * element is not part of the resulting slice. If end is -1, the slice is to
 137  * reach the end of the smartlist.
 138  */
 139 STATIC smartlist_slice_t *
 140 smartlist_slice(const smartlist_t *list, int start, int end)
 141 {
 142   int list_len = smartlist_len(list);
 143   tor_assert(start >= 0);
 144   tor_assert(start <= list_len);
 145   if (end == -1) {
 146     end = list_len;
 147   }
 148   tor_assert(start <= end);
 149
 150   smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
 151   slice->list = list;
 152   slice->offset = start;
 153   slice->len = end - start;
 154   return slice;
 155 }
 156
 157 /** Helper: Compute the longest common subsequence lengths for the two slices.
 158  * Used as part of the diff generation to find the column at which to split
 159  * slice2 while still having the optimal solution.
 160  * If direction is -1, the navigation is reversed. Otherwise it must be 1.
 161  * The length of the resulting integer array is that of the second slice plus
 162  * one.
 163  */
 164 STATIC int *
 165 lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
 166             int direction)
 167 {
 168   size_t a_size = sizeof(int) * (slice2->len+1);
 169
 170   /* Resulting lcs lengths. */
 171   int *result = tor_malloc_zero(a_size);
 172   /* Copy of the lcs lengths from the last iteration. */
 173   int *prev = tor_malloc(a_size);
 174
 175   tor_assert(direction == 1 || direction == -1);
 176
 177   int si = slice1->offset;
 178   if (direction == -1) {
 179     si += (slice1->len-1);
 180   }
 181
 182   for (int i = 0; i < slice1->len; ++i, si+=direction) {
 183
 184     const cdline_t *line1 = smartlist_get(slice1->list, si);
 185     /* Store the last results. */
 186     memcpy(prev, result, a_size);
 187
 188     int sj = slice2->offset;
 189     if (direction == -1) {
 190       sj += (slice2->len-1);
 191     }
 192
 193     for (int j = 0; j < slice2->len; ++j, sj+=direction) {
 194
 195       const cdline_t *line2 = smartlist_get(slice2->list, sj);
 196       if (lines_eq(line1, line2)) {
 197         /* If the lines are equal, the lcs is one line longer. */
 198         result[j + 1] = prev[j] + 1;
 199       } else {
 200         /* If not, see what lcs parent path is longer. */
 201         result[j + 1] = MAX(result[j], prev[j + 1]);
 202       }
 203     }
 204   }
 205   tor_free(prev);
 206   return result;
 207 }
 208
 209 /** Helper: Trim any number of lines that are equally at the start or the end
 210  * of both slices.
 211  */
 212 STATIC void
 213 trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
 214 {
 215   while (slice1->len>0 && slice2->len>0) {
 216     const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
 217     const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
 218     if (!lines_eq(line1, line2)) {
 219       break;
 220     }
 221     slice1->offset++; slice1->len--;
 222     slice2->offset++; slice2->len--;
 223   }
 224
 225   int i1 = (slice1->offset+slice1->len)-1;
 226   int i2 = (slice2->offset+slice2->len)-1;
 227
 228   while (slice1->len>0 && slice2->len>0) {
 229     const cdline_t *line1 = smartlist_get(slice1->list, i1);
 230     const cdline_t *line2 = smartlist_get(slice2->list, i2);
 231     if (!lines_eq(line1, line2)) {
 232       break;
 233     }
 234     i1--;
 235     slice1->len--;
 236     i2--;
 237     slice2->len--;
 238   }
 239 }
 240
 241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
 242  * bounds of the slice.
 243  */
 244 STATIC int
 245 smartlist_slice_string_pos(const smartlist_slice_t *slice,
 246                            const cdline_t *string)
 247 {
 248   int end = slice->offset + slice->len;
 249   for (int i = slice->offset; i < end; ++i) {
 250     const cdline_t *el = smartlist_get(slice->list, i);
 251     if (lines_eq(el, string)) {
 252       return i;
 253     }
 254   }
 255   return -1;
 256 }
 257
 258 /** Helper: Set all the appropriate changed booleans to true. The first slice
 259  * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
 260  * present in the other slice will be set to changed in their bool array.
 261  * The two changed bool arrays are passed in the same order as the slices.
 262  */
 263 STATIC void
 264 set_changed(bitarray_t *changed1, bitarray_t *changed2,
 265             const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
 266 {
 267   int toskip = -1;
 268   tor_assert(slice1->len == 0 || slice1->len == 1);
 269
 270   if (slice1->len == 1) {
 271     const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
 272     toskip = smartlist_slice_string_pos(slice2, line_common);
 273     if (toskip == -1) {
 274       bitarray_set(changed1, slice1->offset);
 275     }
 276   }
 277   int end = slice2->offset + slice2->len;
 278   for (int i = slice2->offset; i < end; ++i) {
 279     if (i != toskip) {
 280       bitarray_set(changed2, i);
 281     }
 282   }
 283 }
 284
 285 /*
 286  * Helper: Given that slice1 has been split by half into top and bot, we want
 287  * to fetch the column at which to split slice2 so that we are still on track
 288  * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
 289  * since the shortest diff is just another way to say the longest common
 290  * subsequence.
 291  */
 292 static int
 293 optimal_column_to_split(const smartlist_slice_t *top,
 294                         const smartlist_slice_t *bot,
 295                         const smartlist_slice_t *slice2)
 296 {
 297   int *lens_top = lcs_lengths(top, slice2, 1);
 298   int *lens_bot = lcs_lengths(bot, slice2, -1);
 299   int column=0, max_sum=-1;
 300
 301   for (int i = 0; i < slice2->len+1; ++i) {
 302     int sum = lens_top[i] + lens_bot[slice2->len-i];
 303     if (sum > max_sum) {
 304       column = i;
 305       max_sum = sum;
 306     }
 307   }
 308   tor_free(lens_top);
 309   tor_free(lens_bot);
 310
 311   return column;
 312 }
 313
 314 /**
 315  * Helper: Figure out what elements are new or gone on the second smartlist
 316  * relative to the first smartlist, and store the booleans in the bitarrays.
 317  * True on the first bitarray means the element is gone, true on the second
 318  * bitarray means it's new.
 319  *
 320  * In its base case, either of the smartlists is of length <= 1 and we can
 321  * quickly see what elements are new or are gone. In the other case, we will
 322  * split one smartlist by half and we'll use optimal_column_to_split to find
 323  * the optimal column at which to split the second smartlist so that we are
 324  * finding the smallest diff possible.
 325  */
 326 STATIC void
 327 calc_changes(smartlist_slice_t *slice1,
 328              smartlist_slice_t *slice2,
 329              bitarray_t *changed1, bitarray_t *changed2)
 330 {
 331   trim_slices(slice1, slice2);
 332
 333   if (slice1->len <= 1) {
 334     set_changed(changed1, changed2, slice1, slice2);
 335
 336   } else if (slice2->len <= 1) {
 337     set_changed(changed2, changed1, slice2, slice1);
 338
 339   /* Keep on splitting the slices in two. */
 340   } else {
 341     smartlist_slice_t *top, *bot, *left, *right;
 342
 343     /* Split the first slice in half. */
 344     int mid = slice1->len/2;
 345     top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
 346     bot = smartlist_slice(slice1->list, slice1->offset+mid,
 347         slice1->offset+slice1->len);
 348
 349     /* Split the second slice by the optimal column. */
 350     int mid2 = optimal_column_to_split(top, bot, slice2);
 351     left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
 352     right = smartlist_slice(slice2->list, slice2->offset+mid2,
 353         slice2->offset+slice2->len);
 354
 355     calc_changes(top, left, changed1, changed2);
 356     calc_changes(bot, right, changed1, changed2);
 357     tor_free(top);
 358     tor_free(bot);
 359     tor_free(left);
 360     tor_free(right);
 361   }
 362 }
 363
 364 /* This table is from crypto.c. The SP and PAD defines are different. */
 365 #define NOT_VALID_BASE64 255
 366 #define X NOT_VALID_BASE64
 367 #define SP NOT_VALID_BASE64
 368 #define PAD NOT_VALID_BASE64
 369 static const uint8_t base64_compare_table[256] = {
 370   X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
 371   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 372   SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
 373   52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
 374   X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
 375   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
 376   X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 377   41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
 378   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 379   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 380   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 381   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 382   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 383   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 384   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 385   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 386 };
 387
 388 /** Helper: Get the identity hash from a router line, assuming that the line
 389  * at least appears to be a router line and thus starts with "r ".
 390  *
 391  * If an identity hash is found, store it (without decoding it) in
 392  * <b>hash_out</b>, and return 0.  On failure, return -1.
 393  */
 394 STATIC int
 395 get_id_hash(const cdline_t *line, cdline_t *hash_out)
 396 {
 397   if (line->len < 2)
 398     return -1;
 399
 400   /* Skip the router name. */
 401   const char *hash = memchr(line->s + 2, ' ', line->len - 2);
 402   if (!hash) {
 403     return -1;
 404   }
 405
 406   hash++;
 407   const char *hash_end = hash;
 408   /* Stop when the first non-base64 character is found. Use unsigned chars to
 409    * avoid negative indexes causing crashes.
 410    */
 411   while (base64_compare_table[*((unsigned char*)hash_end)]
 412            != NOT_VALID_BASE64 &&
 413          hash_end < line->s + line->len) {
 414     hash_end++;
 415   }
 416
 417   /* Empty hash. */
 418   if (hash_end == hash) {
 419     return -1;
 420   }
 421
 422   hash_out->s = hash;
 423   /* Always true because lines are limited to this length */
 424   tor_assert(hash_end >= hash);
 425   tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
 426   hash_out->len = (uint32_t)(hash_end - hash);
 427
 428   return 0;
 429 }
 430
 431 /** Helper: Check that a line is a valid router entry. We must at least be
 432  * able to fetch a proper identity hash from it for it to be valid.
 433  */
 434 STATIC int
 435 is_valid_router_entry(const cdline_t *line)
 436 {
 437   if (line->len < 2 || fast_memneq(line->s, "r ", 2))
 438     return 0;
 439   cdline_t tmp;
 440   return (get_id_hash(line, &tmp) == 0);
 441 }
 442
 443 /** Helper: Find the next router line starting at the current position.
 444  * Assumes that cur is lower than the length of the smartlist, i.e. it is a
 445  * line within the bounds of the consensus. The only exception is when we
 446  * don't want to skip the first line, in which case cur will be -1.
 447  */
 448 STATIC int
 449 next_router(const smartlist_t *cons, int cur)
 450 {
 451   int len = smartlist_len(cons);
 452   tor_assert(cur >= -1 && cur < len);
 453
 454   if (++cur >= len) {
 455     return len;
 456   }
 457
 458   const cdline_t *line = smartlist_get(cons, cur);
 459   while (!is_valid_router_entry(line)) {
 460     if (++cur >= len) {
 461       return len;
 462     }
 463     line = smartlist_get(cons, cur);
 464   }
 465   return cur;
 466 }
 467
 468 /** Helper: compare two base64-encoded identity hashes, which may be of
 469  * different lengths. Comparison ends when the first non-base64 char is found.
 470  */
 471 STATIC int
 472 base64cmp(const cdline_t *hash1, const cdline_t *hash2)
 473 {
 474   /* NULL is always lower, useful for last_hash which starts at NULL. */
 475   if (!hash1->s && !hash2->s) {
 476     return 0;
 477   }
 478   if (!hash1->s) {
 479     return -1;
 480   }
 481   if (!hash2->s) {
 482     return 1;
 483   }
 484
 485   /* Don't index with a char; char may be signed. */
 486   const unsigned char *a = (unsigned char*)hash1->s;
 487   const unsigned char *b = (unsigned char*)hash2->s;
 488   const unsigned char *a_end = a + hash1->len;
 489   const unsigned char *b_end = b + hash2->len;
 490   while (1) {
 491     uint8_t av = base64_compare_table[*a];
 492     uint8_t bv = base64_compare_table[*b];
 493     if (av == NOT_VALID_BASE64) {
 494       if (bv == NOT_VALID_BASE64) {
 495         /* Both ended with exactly the same characters. */
 496         return 0;
 497       } else {
 498         /* hash2 goes on longer than hash1 and thus hash1 is lower. */
 499         return -1;
 500       }
 501     } else if (bv == NOT_VALID_BASE64) {
 502       /* hash1 goes on longer than hash2 and thus hash1 is greater. */
 503       return 1;
 504     } else if (av < bv) {
 505       /* The first difference shows that hash1 is lower. */
 506       return -1;
 507     } else if (av > bv) {
 508       /* The first difference shows that hash1 is greater. */
 509       return 1;
 510     } else {
 511       a++;
 512       b++;
 513       if (a == a_end) {
 514         if (b == b_end) {
 515           return 0;
 516         } else {
 517           return -1;
 518         }
 519       } else if (b == b_end) {
 520         return 1;
 521       }
 522     }
 523   }
 524 }
 525
 526 /** Structure used to remember the previous and current identity hash of
 527  * the "r " lines in a consensus, to enforce well-ordering. */
 528 typedef struct router_id_iterator_t {
 529   cdline_t last_hash;
 530   cdline_t hash;
 531 } router_id_iterator_t;
 532
 533 #ifndef COCCI
 534 /**
 535  * Initializer for a router_id_iterator_t.
 536  */
 537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
 538 #endif /* !defined(COCCI) */
 539
 540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
 541  * the index to the next router line ("r ...") in the consensus, or to
 542  * an index one after the end of the list if there is no such line.
 543  *
 544  * Use <b>iter</b> to record the hash of the found router line, if any,
 545  * and to enforce ordering on the hashes.  If the hashes are mis-ordered,
 546  * return -1.  Else, return 0.
 547  **/
 548 static int
 549 find_next_router_line(const smartlist_t *cons,
 550                       const char *consname,
 551                       int *idxp,
 552                       router_id_iterator_t *iter)
 553 {
 554   *idxp = next_router(cons, *idxp);
 555   if (*idxp < smartlist_len(cons)) {
 556     memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
 557     if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
 558         base64cmp(&iter->hash, &iter->last_hash) <= 0) {
 559       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
 560                "the %s consensus doesn't have its router entries sorted "
 561                "properly.", consname);
 562       return -1;
 563     }
 564   }
 565   return 0;
 566 }
 567
 568 /** Line-prefix indicating the beginning of the signatures section that we
 569  * intend to delete. */
 570 #define START_OF_SIGNATURES_SECTION "directory-signature "
 571
 572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
 573  * to remove the signatures from it.  If the footer is removed, return a
 574  * cdline_t containing a delete command to delete the footer, allocated in
 575  * <b>area</b>.  If no footer is removed, return NULL.
 576  *
 577  * We remove the signatures here because they are not themselves signed, and
 578  * as such there might be different encodings for them.
 579  */
 580 static cdline_t *
 581 preprocess_consensus(memarea_t *area,
 582                      smartlist_t *cons)
 583 {
 584   int idx;
 585   int dirsig_idx = -1;
 586   for (idx = 0; idx < smartlist_len(cons); ++idx) {
 587     cdline_t *line = smartlist_get(cons, idx);
 588     if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
 589       dirsig_idx = idx;
 590       break;
 591     }
 592   }
 593   if (dirsig_idx >= 0) {
 594     char buf[64];
 595     while (smartlist_len(cons) > dirsig_idx)
 596       smartlist_del(cons, dirsig_idx);
 597     tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
 598     return cdline_linecpy(area, buf);
 599   } else {
 600     return NULL;
 601   }
 602 }
 603
 604 /** Generate an ed diff as a smartlist from two consensuses, also given as
 605  * smartlists. Will return NULL if the diff could not be generated, which can
 606  * happen if any lines the script had to add matched "." or if the routers
 607  * were not properly ordered.
 608  *
 609  * All cdline_t objects in the resulting object are either references to lines
 610  * in one of the inputs, or are newly allocated lines in the provided memarea.
 611  *
 612  * This implementation is consensus-specific. To generate an ed diff for any
 613  * given input in quadratic time, you can replace all the code until the
 614  * navigation in reverse order with the following:
 615  *
 616  *   int len1 = smartlist_len(cons1);
 617  *   int len2 = smartlist_len(cons2);
 618  *   bitarray_t *changed1 = bitarray_init_zero(len1);
 619  *   bitarray_t *changed2 = bitarray_init_zero(len2);
 620  *   cons1_sl = smartlist_slice(cons1, 0, -1);
 621  *   cons2_sl = smartlist_slice(cons2, 0, -1);
 622  *   calc_changes(cons1_sl, cons2_sl, changed1, changed2);
 623  */
 624 STATIC smartlist_t *
 625 gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
 626             memarea_t *area)
 627 {
 628   smartlist_t *cons1 = smartlist_new();
 629   smartlist_add_all(cons1, cons1_orig);
 630   cdline_t *remove_trailer = preprocess_consensus(area, cons1);
 631
 632   int len1 = smartlist_len(cons1);
 633   int len2 = smartlist_len(cons2);
 634   smartlist_t *result = smartlist_new();
 635
 636   if (remove_trailer) {
 637     /* There's a delete-the-trailer line at the end, so add it here. */
 638     smartlist_add(result, remove_trailer);
 639   }
 640
 641   /* Initialize the changed bitarrays to zero, so that calc_changes only needs
 642    * to set the ones that matter and leave the rest untouched.
 643    */
 644   bitarray_t *changed1 = bitarray_init_zero(len1);
 645   bitarray_t *changed2 = bitarray_init_zero(len2);
 646   int i1=-1, i2=-1;
 647   int start1=0, start2=0;
 648
 649   /* To check that hashes are ordered properly */
 650   router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
 651   router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
 652
 653   /* i1 and i2 are initialized at the first line of each consensus. They never
 654    * reach past len1 and len2 respectively, since next_router doesn't let that
 655    * happen. i1 and i2 are advanced by at least one line at each iteration as
 656    * long as they have not yet reached len1 and len2, so the loop is
 657    * guaranteed to end, and each pair of (i1,i2) will be inspected at most
 658    * once.
 659    */
 660   while (i1 < len1 || i2 < len2) {
 661
 662     /* Advance each of the two navigation positions by one router entry if not
 663      * yet at the end.
 664      */
 665     if (i1 < len1) {
 666       if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
 667         goto error_cleanup;
 668       }
 669     }
 670
 671     if (i2 < len2) {
 672       if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
 673         goto error_cleanup;
 674       }
 675     }
 676
 677     /* If we have reached the end of both consensuses, there is no need to
 678      * compare hashes anymore, since this is the last iteration.
 679      */
 680     if (i1 < len1 || i2 < len2) {
 681
 682       /* Keep on advancing the lower (by identity hash sorting) position until
 683        * we have two matching positions. The only other possible outcome is
 684        * that a lower position reaches the end of the consensus before it can
 685        * reach a hash that is no longer the lower one. Since there will always
 686        * be a lower hash for as long as the loop runs, one of the two indexes
 687        * will always be incremented, thus assuring that the loop must end
 688        * after a finite number of iterations. If that cannot be because said
 689        * consensus has already reached the end, both are extended to their
 690        * respecting ends since we are done.
 691        */
 692       int cmp = base64cmp(&iter1.hash, &iter2.hash);
 693       while (cmp != 0) {
 694         if (i1 < len1 && cmp < 0) {
 695           if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
 696             goto error_cleanup;
 697           }
 698           if (i1 == len1) {
 699             /* We finished the first consensus, so grab all the remaining
 700              * lines of the second consensus and finish up.
 701              */
 702             i2 = len2;
 703             break;
 704           }
 705         } else if (i2 < len2 && cmp > 0) {
 706           if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
 707             goto error_cleanup;
 708           }
 709           if (i2 == len2) {
 710             /* We finished the second consensus, so grab all the remaining
 711              * lines of the first consensus and finish up.
 712              */
 713             i1 = len1;
 714             break;
 715           }
 716         } else {
 717           i1 = len1;
 718           i2 = len2;
 719           break;
 720         }
 721         cmp = base64cmp(&iter1.hash, &iter2.hash);
 722       }
 723     }
 724
 725     /* Make slices out of these chunks (up to the common router entry) and
 726      * calculate the changes for them.
 727      * Error if any of the two slices are longer than 10K lines. That should
 728      * never happen with any pair of real consensuses. Feeding more than 10K
 729      * lines to calc_changes would be very slow anyway.
 730      */
 731 #define MAX_LINE_COUNT (10000)
 732     if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
 733       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
 734           "we found too few common router ids.");
 735       goto error_cleanup;
 736     }
 737
 738     smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
 739     smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
 740     calc_changes(cons1_sl, cons2_sl, changed1, changed2);
 741     tor_free(cons1_sl);
 742     tor_free(cons2_sl);
 743     start1 = i1, start2 = i2;
 744   }
 745
 746   /* Navigate the changes in reverse order and generate one ed command for
 747    * each chunk of changes.
 748    */
 749   i1=len1-1, i2=len2-1;
 750   char buf[128];
 751   while (i1 >= 0 || i2 >= 0) {
 752
 753     int start1x, start2x, end1, end2, added, deleted;
 754
 755     /* We are at a point were no changed bools are true, so just keep going. */
 756     if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
 757         !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
 758       if (i1 >= 0) {
 759         i1--;
 760       }
 761       if (i2 >= 0) {
 762         i2--;
 763       }
 764       continue;
 765     }
 766
 767     end1 = i1, end2 = i2;
 768
 769     /* Grab all contiguous changed lines */
 770     while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
 771       i1--;
 772     }
 773     while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
 774       i2--;
 775     }
 776
 777     start1x = i1+1, start2x = i2+1;
 778     added = end2-i2, deleted = end1-i1;
 779
 780     if (added == 0) {
 781       if (deleted == 1) {
 782         tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
 783         smartlist_add_linecpy(result, area, buf);
 784       } else {
 785         tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
 786         smartlist_add_linecpy(result, area, buf);
 787       }
 788     } else {
 789       int i;
 790       if (deleted == 0) {
 791         tor_snprintf(buf, sizeof(buf), "%ia", start1x);
 792         smartlist_add_linecpy(result, area, buf);
 793       } else if (deleted == 1) {
 794         tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
 795         smartlist_add_linecpy(result, area, buf);
 796       } else {
 797         tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
 798         smartlist_add_linecpy(result, area, buf);
 799       }
 800
 801       for (i = start2x; i <= end2; ++i) {
 802         cdline_t *line = smartlist_get(cons2, i);
 803         if (line_str_eq(line, ".")) {
 804           log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
 805               "one of the lines to be added is \".\".");
 806           goto error_cleanup;
 807         }
 808         smartlist_add(result, line);
 809       }
 810       smartlist_add_linecpy(result, area, ".");
 811     }
 812   }
 813
 814   smartlist_free(cons1);
 815   bitarray_free(changed1);
 816   bitarray_free(changed2);
 817
 818   return result;
 819
 820  error_cleanup:
 821
 822   smartlist_free(cons1);
 823   bitarray_free(changed1);
 824   bitarray_free(changed2);
 825
 826   smartlist_free(result);
 827
 828   return NULL;
 829 }
 830
 831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
 832  * store it in <b>num_out</b>.  Advance <b>s</b> to the character immediately
 833  * after the number.  Return 0 on success, -1 on failure. */
 834 static int
 835 get_linenum(const char **s, int *num_out)
 836 {
 837   int ok;
 838   char *next;
 839   if (!TOR_ISDIGIT(**s)) {
 840     return -1;
 841   }
 842   *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
 843   if (ok && next) {
 844     *s = next;
 845     return 0;
 846   } else {
 847     return -1;
 848   }
 849 }
 850
 851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
 852  * and return a new consensus, also as a line-based smartlist. Will return
 853  * NULL if the ed diff is not properly formatted.
 854  *
 855  * All cdline_t objects in the resulting object are references to lines
 856  * in one of the inputs; nothing is copied.
 857  */
 858 STATIC smartlist_t *
 859 apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
 860               int diff_starting_line)
 861 {
 862   int diff_len = smartlist_len(diff);
 863   int j = smartlist_len(cons1);
 864   smartlist_t *cons2 = smartlist_new();
 865
 866   for (int i=diff_starting_line; i<diff_len; ++i) {
 867     const cdline_t *diff_cdline = smartlist_get(diff, i);
 868     char diff_line[128];
 869
 870     if (diff_cdline->len > sizeof(diff_line) - 1) {
 871       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 872                "an ed command was far too long");
 873       goto error_cleanup;
 874     }
 875     /* Copy the line to make it nul-terminated. */
 876     memcpy(diff_line, diff_cdline->s, diff_cdline->len);
 877     diff_line[diff_cdline->len] = 0;
 878     const char *ptr = diff_line;
 879     int start = 0, end = 0;
 880     int had_range = 0;
 881     int end_was_eof = 0;
 882     if (get_linenum(&ptr, &start) < 0) {
 883       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 884                "an ed command was missing a line number.");
 885       goto error_cleanup;
 886     }
 887     if (*ptr == ',') {
 888       /* Two-item range */
 889       had_range = 1;
 890       ++ptr;
 891       if (*ptr == '$') {
 892         end_was_eof = 1;
 893         end = smartlist_len(cons1);
 894         ++ptr;
 895       } else if (get_linenum(&ptr, &end) < 0) {
 896         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 897                  "an ed command was missing a range end line number.");
 898         goto error_cleanup;
 899       }
 900       /* Incoherent range. */
 901       if (end <= start) {
 902         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 903                  "an invalid range was found in an ed command.");
 904         goto error_cleanup;
 905       }
 906     } else {
 907       /* We'll take <n1> as <n1>,<n1> for simplicity. */
 908       end = start;
 909     }
 910
 911     if (end > j) {
 912       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 913           "its commands are not properly sorted in reverse order.");
 914       goto error_cleanup;
 915     }
 916
 917     if (*ptr == '\0') {
 918       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 919                "a line with no ed command was found");
 920       goto error_cleanup;
 921     }
 922
 923     if (*(ptr+1) != '\0') {
 924       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 925           "an ed command longer than one char was found.");
 926       goto error_cleanup;
 927     }
 928
 929     char action = *ptr;
 930
 931     switch (action) {
 932       case 'a':
 933       case 'c':
 934       case 'd':
 935         break;
 936       default:
 937         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 938             "an unrecognised ed command was found.");
 939         goto error_cleanup;
 940     }
 941
 942     /** $ is not allowed with non-d actions. */
 943     if (end_was_eof && action != 'd') {
 944       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 945                "it wanted to use $ with a command other than delete");
 946       goto error_cleanup;
 947     }
 948
 949     /* 'a' commands are not allowed to have ranges. */
 950     if (had_range && action == 'a') {
 951       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 952           "it wanted to add lines after a range.");
 953       goto error_cleanup;
 954     }
 955
 956     /* Add unchanged lines. */
 957     for (; j && j > end; --j) {
 958       cdline_t *cons_line = smartlist_get(cons1, j-1);
 959       smartlist_add(cons2, cons_line);
 960     }
 961
 962     /* Ignore removed lines. */
 963     if (action == 'c' || action == 'd') {
 964       while (--j >= start) {
 965         /* Skip line */
 966       }
 967     }
 968
 969     /* Add new lines in reverse order, since it will all be reversed at the
 970      * end.
 971      */
 972     if (action == 'a' || action == 'c') {
 973       int added_end = i;
 974
 975       i++; /* Skip the line with the range and command. */
 976       while (i < diff_len) {
 977         if (line_str_eq(smartlist_get(diff, i), ".")) {
 978           break;
 979         }
 980         if (++i == diff_len) {
 981           log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 982               "it has lines to be inserted that don't end with a \".\".");
 983           goto error_cleanup;
 984         }
 985       }
 986
 987       int added_i = i-1;
 988
 989       /* It would make no sense to add zero new lines. */
 990       if (added_i == added_end) {
 991         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
 992             "it has an ed command that tries to insert zero lines.");
 993         goto error_cleanup;
 994       }
 995
 996       while (added_i > added_end) {
 997         cdline_t *added_line = smartlist_get(diff, added_i--);
 998         smartlist_add(cons2, added_line);
 999       }
1000     }
1001   }
1002
1003   /* Add remaining unchanged lines. */
1004   for (; j > 0; --j) {
1005     cdline_t *cons_line = smartlist_get(cons1, j-1);
1006     smartlist_add(cons2, cons_line);
1007   }
1008
1009   /* Reverse the whole thing since we did it from the end. */
1010   smartlist_reverse(cons2);
1011   return cons2;
1012
1013  error_cleanup:
1014
1015   smartlist_free(cons2);
1016
1017   return NULL;
1018 }
1019
1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
1021  * as smartlists. Will return NULL if the consensus diff could not be
1022  * generated. Neither of the two consensuses are modified in any way, so it's
1023  * up to the caller to free their resources.
1024  */
1025 smartlist_t *
1026 consdiff_gen_diff(const smartlist_t *cons1,
1027                   const smartlist_t *cons2,
1028                   const consensus_digest_t *digests1,
1029                   const consensus_digest_t *digests2,
1030                   memarea_t *area)
1031 {
1032   smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033   /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034   if (!ed_diff) {
1035     goto error_cleanup;
1036   }
1037
1038   /* See that the script actually produces what we want. */
1039   smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040   if (!ed_cons2) {
1041     /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043         "the generated ed diff could not be tested to successfully generate "
1044         "the target consensus.");
1045     goto error_cleanup;
1046     /* LCOV_EXCL_STOP */
1047   }
1048
1049   int cons2_eq = 1;
1050   if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051     SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052       const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053       if (!lines_eq(line1, line2)) {
1054         cons2_eq = 0;
1055         break;
1056       }
1057     } SMARTLIST_FOREACH_END(line1);
1058   } else {
1059     cons2_eq = 0;
1060   }
1061   smartlist_free(ed_cons2);
1062   if (!cons2_eq) {
1063     /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065         "the generated ed diff did not generate the target consensus "
1066         "successfully when tested.");
1067     goto error_cleanup;
1068     /* LCOV_EXCL_STOP */
1069   }
1070
1071   char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072   char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073   base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074                 (const char*)digests1->sha3_256, DIGEST256_LEN);
1075   base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076                 (const char*)digests2->sha3_256, DIGEST256_LEN);
1077
1078   /* Create the resulting consensus diff. */
1079   char buf[160];
1080   smartlist_t *result = smartlist_new();
1081   tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082   smartlist_add_linecpy(result, area, buf);
1083   tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084       cons1_hash_hex, cons2_hash_hex);
1085   smartlist_add_linecpy(result, area, buf);
1086   smartlist_add_all(result, ed_diff);
1087   smartlist_free(ed_diff);
1088   return result;
1089
1090  error_cleanup:
1091
1092   if (ed_diff) {
1093     /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094     smartlist_free(ed_diff);
1095     /* LCOV_EXCL_STOP */
1096   }
1097
1098   return NULL;
1099 }
1100
1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102  * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103  * length DIGEST256_LEN or larger if not NULL.
1104  */
1105 int
1106 consdiff_get_digests(const smartlist_t *diff,
1107                      char *digest1_out,
1108                      char *digest2_out)
1109 {
1110   smartlist_t *hash_words = NULL;
1111   const cdline_t *format;
1112   char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113   char *cons1_hash_hex, *cons2_hash_hex;
1114   if (smartlist_len(diff) < 2) {
1115     log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116     goto error_cleanup;
1117   }
1118
1119   /* Check that it's the format and version we know. */
1120   format = smartlist_get(diff, 0);
1121   if (!line_str_eq(format, ns_diff_version)) {
1122     log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123     goto error_cleanup;
1124   }
1125
1126   /* Grab the base16 digests. */
1127   hash_words = smartlist_new();
1128   {
1129     const cdline_t *line2 = smartlist_get(diff, 1);
1130     char *h = tor_memdup_nulterm(line2->s, line2->len);
1131     smartlist_split_string(hash_words, h, " ", 0, 4);
1132     tor_free(h);
1133   }
1134
1135   /* There have to be three words, the first of which must be hash_token. */
1136   if (smartlist_len(hash_words) != 3 ||
1137       strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138     log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139         "the necessary digests.");
1140     goto error_cleanup;
1141   }
1142
1143   /* Expected hashes as found in the consensus diff header. They must be of
1144    * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145    * If any of the decodings fail, error to make sure that the hashes are
1146    * proper base16-encoded digests.
1147    */
1148   cons1_hash_hex = smartlist_get(hash_words, 1);
1149   cons2_hash_hex = smartlist_get(hash_words, 2);
1150   if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151       strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152     log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153         "base16-encoded digests of incorrect size.");
1154     goto error_cleanup;
1155   }
1156
1157   if (base16_decode(cons1_hash, DIGEST256_LEN,
1158           cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159       base16_decode(cons2_hash, DIGEST256_LEN,
1160           cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161     log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162         "malformed digests.");
1163     goto error_cleanup;
1164   }
1165
1166   if (digest1_out) {
1167     memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1168   }
1169   if (digest2_out) {
1170     memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1171   }
1172
1173   SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174   smartlist_free(hash_words);
1175   return 0;
1176
1177  error_cleanup:
1178
1179   if (hash_words) {
1180     SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181     smartlist_free(hash_words);
1182   }
1183   return 1;
1184 }
1185
1186 /** Apply the consensus diff to the given consensus and return a new
1187  * consensus, also as a line-based smartlist. Will return NULL if the diff
1188  * could not be applied. Neither the consensus nor the diff are modified in
1189  * any way, so it's up to the caller to free their resources.
1190  */
1191 char *
1192 consdiff_apply_diff(const smartlist_t *cons1,
1193                     const smartlist_t *diff,
1194                     const consensus_digest_t *digests1)
1195 {
1196   smartlist_t *cons2 = NULL;
1197   char *cons2_str = NULL;
1198   char e_cons1_hash[DIGEST256_LEN];
1199   char e_cons2_hash[DIGEST256_LEN];
1200
1201   if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202     goto error_cleanup;
1203   }
1204
1205   /* See that the consensus that was given to us matches its hash. */
1206   if (!consensus_digest_eq(digests1->sha3_256,
1207                            (const uint8_t*)e_cons1_hash)) {
1208     char hex_digest1[HEX_DIGEST256_LEN+1];
1209     char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211         "the base consensus doesn't match the digest as found in "
1212         "the consensus diff header.");
1213     base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214                   (const char *)digests1->sha3_256, DIGEST256_LEN);
1215     base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216                   e_cons1_hash, DIGEST256_LEN);
1217     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218              hex_digest1, e_hex_digest1);
1219     goto error_cleanup;
1220   }
1221
1222   /* Grab the ed diff and calculate the resulting consensus. */
1223   /* Skip the first two lines. */
1224   cons2 = apply_ed_diff(cons1, diff, 2);
1225
1226   /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227   if (!cons2) {
1228     goto error_cleanup;
1229   }
1230
1231   cons2_str = consensus_join_lines(cons2);
1232
1233   consensus_digest_t cons2_digests;
1234   if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235                                &cons2_digests) < 0) {
1236     /* LCOV_EXCL_START -- digest can't fail */
1237     log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238         "resulting from applying a consensus diff.");
1239     goto error_cleanup;
1240     /* LCOV_EXCL_STOP */
1241   }
1242
1243   /* See that the resulting consensus matches its hash. */
1244   if (!consensus_digest_eq(cons2_digests.sha3_256,
1245                            (const uint8_t*)e_cons2_hash)) {
1246     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247         "the resulting consensus doesn't match the digest as found in "
1248         "the consensus diff header.");
1249     char hex_digest2[HEX_DIGEST256_LEN+1];
1250     char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251     base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252         (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253     base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254         e_cons2_hash, DIGEST256_LEN);
1255     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256              hex_digest2, e_hex_digest2);
1257     goto error_cleanup;
1258   }
1259
1260   goto done;
1261
1262  error_cleanup:
1263   tor_free(cons2_str); /* Sets it to NULL */
1264
1265  done:
1266   if (cons2) {
1267     smartlist_free(cons2);
1268   }
1269
1270   return cons2_str;
1271 }
1272
1273 /** Any consensus line longer than this means that the input is invalid. */
1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
1275
1276 /**
1277  * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278  * that line (without trailing newline) to <b>out</b>.  Return -1 if there are
1279  * any non-NL terminated lines; 0 otherwise.
1280  *
1281  * Unlike tor_split_lines, this function avoids ambiguity on its
1282  * handling of a final line that isn't NL-terminated.
1283  *
1284  * All cdline_t objects are allocated in the provided memarea.  Strings
1285  * are not copied: if <b>s</b> changes or becomes invalid, then all
1286  * generated cdlines will become invalid.
1287  */
1288 STATIC int
1289 consensus_split_lines(smartlist_t *out,
1290                       const char *s, size_t len,
1291                       memarea_t *area)
1292 {
1293   const char *end_of_str = s + len;
1294
1295   while (s < end_of_str) {
1296     const char *eol = memchr(s, '\n', end_of_str - s);
1297     if (!eol) {
1298       /* File doesn't end with newline. */
1299       return -1;
1300     }
1301     if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302       /* Line is far too long. */
1303       return -1;
1304     }
1305     cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306     line->s = s;
1307     line->len = (uint32_t)(eol - s);
1308     smartlist_add(out, line);
1309     s = eol+1;
1310   }
1311   return 0;
1312 }
1313
1314 /** Given a list of cdline_t, return a newly allocated string containing
1315  * all of the lines, terminated with NL, concatenated.
1316  *
1317  * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318  * lists.  */
1319 static char *
1320 consensus_join_lines(const smartlist_t *inp)
1321 {
1322   size_t n = 0;
1323   SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324   n += 1;
1325   char *result = tor_malloc(n);
1326   char *out = result;
1327   SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328     memcpy(out, cdline->s, cdline->len);
1329     out += cdline->len;
1330     *out++ = '\n';
1331   } SMARTLIST_FOREACH_END(cdline);
1332   *out++ = '\0';
1333   tor_assert(out == result+n);
1334   return result;
1335 }
1336
1337 /** Given two consensus documents, try to compute a diff between them.  On
1338  * success, return a newly allocated string containing that diff.  On failure,
1339  * return NULL. */
1340 char *
1341 consensus_diff_generate(const char *cons1, size_t cons1len,
1342                         const char *cons2, size_t cons2len)
1343 {
1344   consensus_digest_t d1, d2;
1345   smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346   int r1, r2;
1347   char *result = NULL;
1348
1349   r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350   r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351   if (BUG(r1 < 0 || r2 < 0))
1352     return NULL; // LCOV_EXCL_LINE
1353
1354   memarea_t *area = memarea_new();
1355   lines1 = smartlist_new();
1356   lines2 = smartlist_new();
1357   if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358     goto done;
1359   if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360     goto done;
1361
1362   result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1363
1364  done:
1365   if (result_lines) {
1366     result = consensus_join_lines(result_lines);
1367     smartlist_free(result_lines);
1368   }
1369
1370   memarea_drop_all(area);
1371   smartlist_free(lines1);
1372   smartlist_free(lines2);
1373
1374   return result;
1375 }
1376
1377 /** Given a consensus document and a diff, try to apply the diff to the
1378  * consensus.  On success return a newly allocated string containing the new
1379  * consensus.  On failure, return NULL. */
1380 char *
1381 consensus_diff_apply(const char *consensus,
1382                      size_t consensus_len,
1383                      const char *diff,
1384                      size_t diff_len)
1385 {
1386   consensus_digest_t d1;
1387   smartlist_t *lines1 = NULL, *lines2 = NULL;
1388   int r1;
1389   char *result = NULL;
1390   memarea_t *area = memarea_new();
1391
1392   r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393   if (BUG(r1 < 0))
1394     goto done;
1395
1396   lines1 = smartlist_new();
1397   lines2 = smartlist_new();
1398   if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399     goto done;
1400   if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401     goto done;
1402
1403   result = consdiff_apply_diff(lines1, lines2, &d1);
1404
1405  done:
1406   smartlist_free(lines1);
1407   smartlist_free(lines2);
1408   memarea_drop_all(area);
1409
1410   return result;
1411 }
1412
1413 /** Return true iff, based on its header, <b>document</b> is likely
1414  * to be a consensus diff. */
1415 int
1416 looks_like_a_consensus_diff(const char *document, size_t len)
1417 {
1418   return (len >= strlen(ns_diff_version) &&
1419           fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
1420 }