Merge branch 'maint-0.4.8'
[tor.git] / src / feature / dircommon / consdiff.c
blob323f2bd5761c03bef864fb6a883c749e049c0598
1 /* Copyright (c) 2014, Daniel Martí
2 * Copyright (c) 2014-2021, The Tor Project, Inc. */
3 /* See LICENSE for licensing information */
5 /**
6 * \file consdiff.c
7 * \brief Consensus diff implementation, including both the generation and the
8 * application of diffs in a minimal ed format.
10 * The consensus diff application is done in consdiff_apply_diff, which relies
11 * on apply_ed_diff for the main ed diff part and on some digest helper
12 * functions to check the digest hashes found in the consensus diff header.
14 * The consensus diff generation is more complex. consdiff_gen_diff generates
15 * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16 * functions to generate the digest hashes.
18 * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19 * time and linear space to generate an ed diff given two smartlists. As shown
20 * in its comment section, calling calc_changes on the entire two consensuses
21 * will calculate what is to be added and what is to be deleted in the diff.
22 * Its comment section briefly explains how it works.
24 * In our case specific to consensuses, we take advantage of the fact that
25 * consensuses list routers sorted by their identities. We use that
26 * information to avoid running calc_changes on the whole smartlists.
27 * gen_ed_diff will navigate through the two consensuses identity by identity
28 * and will send small couples of slices to calc_changes, keeping the running
29 * time near-linear. This is explained in more detail in the gen_ed_diff
30 * comments.
32 * The allocation strategy tries to save time and memory by avoiding needless
33 * copies. Instead of actually splitting the inputs into separate strings, we
34 * allocate cdline_t objects, each of which represents a line in the original
35 * object or in the output. We use memarea_t allocators to manage the
36 * temporary memory we use when generating or applying diffs.
37 **/
39 #define CONSDIFF_PRIVATE
41 #include "core/or/or.h"
42 #include "feature/dircommon/consdiff.h"
43 #include "lib/memarea/memarea.h"
44 #include "feature/dirparse/ns_parse.h"
46 static const char* ns_diff_version = "network-status-diff-version 1";
47 static const char* hash_token = "hash";
49 static char *consensus_join_lines(const smartlist_t *inp);
51 /** Return true iff a and b have the same contents. */
52 STATIC int
53 lines_eq(const cdline_t *a, const cdline_t *b)
55 return a->len == b->len && fast_memeq(a->s, b->s, a->len);
58 /** Return true iff a has the same contents as the nul-terminated string b. */
59 STATIC int
60 line_str_eq(const cdline_t *a, const char *b)
62 const size_t len = strlen(b);
63 tor_assert(len <= UINT32_MAX);
64 cdline_t bline = { b, (uint32_t)len };
65 return lines_eq(a, &bline);
68 /** Return true iff a begins with the same contents as the nul-terminated
69 * string b. */
70 static int
71 line_starts_with_str(const cdline_t *a, const char *b)
73 const size_t len = strlen(b);
74 tor_assert(len <= UINT32_MAX);
75 return a->len >= len && fast_memeq(a->s, b, len);
78 /** Return a new cdline_t holding as its contents the nul-terminated
79 * string s. Use the provided memory area for storage. */
80 static cdline_t *
81 cdline_linecpy(memarea_t *area, const char *s)
83 size_t len = strlen(s);
84 const char *ss = memarea_memdup(area, s, len);
85 cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
86 line->s = ss;
87 line->len = (uint32_t)len;
88 return line;
91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92 * string s. Use the provided memory area for storage. */
93 STATIC void
94 smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
96 smartlist_add(lst, cdline_linecpy(area, s));
99 /** Compute the digest of <b>cons</b>, and store the result in
100 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101 /* This is a separate, mockable function so that we can override it when
102 * fuzzing. */
103 MOCK_IMPL(STATIC int,
104 consensus_compute_digest,(const char *cons, size_t len,
105 consensus_digest_t *digest_out))
107 int r = crypto_digest256((char*)digest_out->sha3_256,
108 cons, len, DIGEST_SHA3_256);
109 return r;
112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
113 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114 /* This is a separate, mockable function so that we can override it when
115 * fuzzing. */
116 MOCK_IMPL(STATIC int,
117 consensus_compute_digest_as_signed,(const char *cons, size_t len,
118 consensus_digest_t *digest_out))
120 return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
121 cons, len);
124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125 /* This is a separate, mockable function so that we can override it when
126 * fuzzing. */
127 MOCK_IMPL(STATIC int,
128 consensus_digest_eq,(const uint8_t *d1,
129 const uint8_t *d2))
131 return fast_memeq(d1, d2, DIGEST256_LEN);
134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
135 * and the end indexes are within the bounds of the initial smartlist. The end
136 * element is not part of the resulting slice. If end is -1, the slice is to
137 * reach the end of the smartlist.
139 STATIC smartlist_slice_t *
140 smartlist_slice(const smartlist_t *list, int start, int end)
142 int list_len = smartlist_len(list);
143 tor_assert(start >= 0);
144 tor_assert(start <= list_len);
145 if (end == -1) {
146 end = list_len;
148 tor_assert(start <= end);
150 smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
151 slice->list = list;
152 slice->offset = start;
153 slice->len = end - start;
154 return slice;
157 /** Helper: Compute the longest common subsequence lengths for the two slices.
158 * Used as part of the diff generation to find the column at which to split
159 * slice2 while still having the optimal solution.
160 * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161 * The length of the resulting integer array is that of the second slice plus
162 * one.
164 STATIC int *
165 lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
166 int direction)
168 size_t a_size = sizeof(int) * (slice2->len+1);
170 /* Resulting lcs lengths. */
171 int *result = tor_malloc_zero(a_size);
172 /* Copy of the lcs lengths from the last iteration. */
173 int *prev = tor_malloc(a_size);
175 tor_assert(direction == 1 || direction == -1);
177 int si = slice1->offset;
178 if (direction == -1) {
179 si += (slice1->len-1);
182 for (int i = 0; i < slice1->len; ++i, si+=direction) {
184 const cdline_t *line1 = smartlist_get(slice1->list, si);
185 /* Store the last results. */
186 memcpy(prev, result, a_size);
188 int sj = slice2->offset;
189 if (direction == -1) {
190 sj += (slice2->len-1);
193 for (int j = 0; j < slice2->len; ++j, sj+=direction) {
195 const cdline_t *line2 = smartlist_get(slice2->list, sj);
196 if (lines_eq(line1, line2)) {
197 /* If the lines are equal, the lcs is one line longer. */
198 result[j + 1] = prev[j] + 1;
199 } else {
200 /* If not, see what lcs parent path is longer. */
201 result[j + 1] = MAX(result[j], prev[j + 1]);
205 tor_free(prev);
206 return result;
209 /** Helper: Trim any number of lines that are equally at the start or the end
210 * of both slices.
212 STATIC void
213 trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
215 while (slice1->len>0 && slice2->len>0) {
216 const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
217 const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
218 if (!lines_eq(line1, line2)) {
219 break;
221 slice1->offset++; slice1->len--;
222 slice2->offset++; slice2->len--;
225 int i1 = (slice1->offset+slice1->len)-1;
226 int i2 = (slice2->offset+slice2->len)-1;
228 while (slice1->len>0 && slice2->len>0) {
229 const cdline_t *line1 = smartlist_get(slice1->list, i1);
230 const cdline_t *line2 = smartlist_get(slice2->list, i2);
231 if (!lines_eq(line1, line2)) {
232 break;
234 i1--;
235 slice1->len--;
236 i2--;
237 slice2->len--;
241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242 * bounds of the slice.
244 STATIC int
245 smartlist_slice_string_pos(const smartlist_slice_t *slice,
246 const cdline_t *string)
248 int end = slice->offset + slice->len;
249 for (int i = slice->offset; i < end; ++i) {
250 const cdline_t *el = smartlist_get(slice->list, i);
251 if (lines_eq(el, string)) {
252 return i;
255 return -1;
258 /** Helper: Set all the appropriate changed booleans to true. The first slice
259 * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260 * present in the other slice will be set to changed in their bool array.
261 * The two changed bool arrays are passed in the same order as the slices.
263 STATIC void
264 set_changed(bitarray_t *changed1, bitarray_t *changed2,
265 const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
267 int toskip = -1;
268 tor_assert(slice1->len == 0 || slice1->len == 1);
270 if (slice1->len == 1) {
271 const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
272 toskip = smartlist_slice_string_pos(slice2, line_common);
273 if (toskip == -1) {
274 bitarray_set(changed1, slice1->offset);
277 int end = slice2->offset + slice2->len;
278 for (int i = slice2->offset; i < end; ++i) {
279 if (i != toskip) {
280 bitarray_set(changed2, i);
286 * Helper: Given that slice1 has been split by half into top and bot, we want
287 * to fetch the column at which to split slice2 so that we are still on track
288 * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289 * since the shortest diff is just another way to say the longest common
290 * subsequence.
292 static int
293 optimal_column_to_split(const smartlist_slice_t *top,
294 const smartlist_slice_t *bot,
295 const smartlist_slice_t *slice2)
297 int *lens_top = lcs_lengths(top, slice2, 1);
298 int *lens_bot = lcs_lengths(bot, slice2, -1);
299 int column=0, max_sum=-1;
301 for (int i = 0; i < slice2->len+1; ++i) {
302 int sum = lens_top[i] + lens_bot[slice2->len-i];
303 if (sum > max_sum) {
304 column = i;
305 max_sum = sum;
308 tor_free(lens_top);
309 tor_free(lens_bot);
311 return column;
315 * Helper: Figure out what elements are new or gone on the second smartlist
316 * relative to the first smartlist, and store the booleans in the bitarrays.
317 * True on the first bitarray means the element is gone, true on the second
318 * bitarray means it's new.
320 * In its base case, either of the smartlists is of length <= 1 and we can
321 * quickly see what elements are new or are gone. In the other case, we will
322 * split one smartlist by half and we'll use optimal_column_to_split to find
323 * the optimal column at which to split the second smartlist so that we are
324 * finding the smallest diff possible.
326 STATIC void
327 calc_changes(smartlist_slice_t *slice1,
328 smartlist_slice_t *slice2,
329 bitarray_t *changed1, bitarray_t *changed2)
331 trim_slices(slice1, slice2);
333 if (slice1->len <= 1) {
334 set_changed(changed1, changed2, slice1, slice2);
336 } else if (slice2->len <= 1) {
337 set_changed(changed2, changed1, slice2, slice1);
339 /* Keep on splitting the slices in two. */
340 } else {
341 smartlist_slice_t *top, *bot, *left, *right;
343 /* Split the first slice in half. */
344 int mid = slice1->len/2;
345 top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
346 bot = smartlist_slice(slice1->list, slice1->offset+mid,
347 slice1->offset+slice1->len);
349 /* Split the second slice by the optimal column. */
350 int mid2 = optimal_column_to_split(top, bot, slice2);
351 left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
352 right = smartlist_slice(slice2->list, slice2->offset+mid2,
353 slice2->offset+slice2->len);
355 calc_changes(top, left, changed1, changed2);
356 calc_changes(bot, right, changed1, changed2);
357 tor_free(top);
358 tor_free(bot);
359 tor_free(left);
360 tor_free(right);
364 /* This table is from crypto.c. The SP and PAD defines are different. */
365 #define NOT_VALID_BASE64 255
366 #define X NOT_VALID_BASE64
367 #define SP NOT_VALID_BASE64
368 #define PAD NOT_VALID_BASE64
369 static const uint8_t base64_compare_table[256] = {
370 X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
371 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
372 SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
373 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
374 X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
376 X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
378 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
382 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
384 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
385 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
388 /** Helper: Get the identity hash from a router line, assuming that the line
389 * at least appears to be a router line and thus starts with "r ".
391 * If an identity hash is found, store it (without decoding it) in
392 * <b>hash_out</b>, and return 0. On failure, return -1.
394 STATIC int
395 get_id_hash(const cdline_t *line, cdline_t *hash_out)
397 if (line->len < 2)
398 return -1;
400 /* Skip the router name. */
401 const char *hash = memchr(line->s + 2, ' ', line->len - 2);
402 if (!hash) {
403 return -1;
406 hash++;
407 const char *hash_end = hash;
408 /* Stop when the first non-base64 character is found. Use unsigned chars to
409 * avoid negative indexes causing crashes.
411 while (base64_compare_table[*((unsigned char*)hash_end)]
412 != NOT_VALID_BASE64 &&
413 hash_end < line->s + line->len) {
414 hash_end++;
417 /* Empty hash. */
418 if (hash_end == hash) {
419 return -1;
422 hash_out->s = hash;
423 /* Always true because lines are limited to this length */
424 tor_assert(hash_end >= hash);
425 tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
426 hash_out->len = (uint32_t)(hash_end - hash);
428 return 0;
431 /** Helper: Check that a line is a valid router entry. We must at least be
432 * able to fetch a proper identity hash from it for it to be valid.
434 STATIC int
435 is_valid_router_entry(const cdline_t *line)
437 if (line->len < 2 || fast_memneq(line->s, "r ", 2))
438 return 0;
439 cdline_t tmp;
440 return (get_id_hash(line, &tmp) == 0);
443 /** Helper: Find the next router line starting at the current position.
444 * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445 * line within the bounds of the consensus. The only exception is when we
446 * don't want to skip the first line, in which case cur will be -1.
448 STATIC int
449 next_router(const smartlist_t *cons, int cur)
451 int len = smartlist_len(cons);
452 tor_assert(cur >= -1 && cur < len);
454 if (++cur >= len) {
455 return len;
458 const cdline_t *line = smartlist_get(cons, cur);
459 while (!is_valid_router_entry(line)) {
460 if (++cur >= len) {
461 return len;
463 line = smartlist_get(cons, cur);
465 return cur;
468 /** Helper: compare two base64-encoded identity hashes, which may be of
469 * different lengths. Comparison ends when the first non-base64 char is found.
471 STATIC int
472 base64cmp(const cdline_t *hash1, const cdline_t *hash2)
474 /* NULL is always lower, useful for last_hash which starts at NULL. */
475 if (!hash1->s && !hash2->s) {
476 return 0;
478 if (!hash1->s) {
479 return -1;
481 if (!hash2->s) {
482 return 1;
485 /* Don't index with a char; char may be signed. */
486 const unsigned char *a = (unsigned char*)hash1->s;
487 const unsigned char *b = (unsigned char*)hash2->s;
488 const unsigned char *a_end = a + hash1->len;
489 const unsigned char *b_end = b + hash2->len;
490 while (1) {
491 uint8_t av = base64_compare_table[*a];
492 uint8_t bv = base64_compare_table[*b];
493 if (av == NOT_VALID_BASE64) {
494 if (bv == NOT_VALID_BASE64) {
495 /* Both ended with exactly the same characters. */
496 return 0;
497 } else {
498 /* hash2 goes on longer than hash1 and thus hash1 is lower. */
499 return -1;
501 } else if (bv == NOT_VALID_BASE64) {
502 /* hash1 goes on longer than hash2 and thus hash1 is greater. */
503 return 1;
504 } else if (av < bv) {
505 /* The first difference shows that hash1 is lower. */
506 return -1;
507 } else if (av > bv) {
508 /* The first difference shows that hash1 is greater. */
509 return 1;
510 } else {
511 a++;
512 b++;
513 if (a == a_end) {
514 if (b == b_end) {
515 return 0;
516 } else {
517 return -1;
519 } else if (b == b_end) {
520 return 1;
526 /** Structure used to remember the previous and current identity hash of
527 * the "r " lines in a consensus, to enforce well-ordering. */
528 typedef struct router_id_iterator_t {
529 cdline_t last_hash;
530 cdline_t hash;
531 } router_id_iterator_t;
533 #ifndef COCCI
535 * Initializer for a router_id_iterator_t.
537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538 #endif /* !defined(COCCI) */
540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541 * the index to the next router line ("r ...") in the consensus, or to
542 * an index one after the end of the list if there is no such line.
544 * Use <b>iter</b> to record the hash of the found router line, if any,
545 * and to enforce ordering on the hashes. If the hashes are mis-ordered,
546 * return -1. Else, return 0.
548 static int
549 find_next_router_line(const smartlist_t *cons,
550 const char *consname,
551 int *idxp,
552 router_id_iterator_t *iter)
554 *idxp = next_router(cons, *idxp);
555 if (*idxp < smartlist_len(cons)) {
556 memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
557 if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
558 base64cmp(&iter->hash, &iter->last_hash) <= 0) {
559 log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
560 "the %s consensus doesn't have its router entries sorted "
561 "properly.", consname);
562 return -1;
565 return 0;
568 /** Line-prefix indicating the beginning of the signatures section that we
569 * intend to delete. */
570 #define START_OF_SIGNATURES_SECTION "directory-signature "
572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573 * to remove the signatures from it. If the footer is removed, return a
574 * cdline_t containing a delete command to delete the footer, allocated in
575 * <b>area</b>. If no footer is removed, return NULL.
577 * We remove the signatures here because they are not themselves signed, and
578 * as such there might be different encodings for them.
580 static cdline_t *
581 preprocess_consensus(memarea_t *area,
582 smartlist_t *cons)
584 int idx;
585 int dirsig_idx = -1;
586 for (idx = 0; idx < smartlist_len(cons); ++idx) {
587 cdline_t *line = smartlist_get(cons, idx);
588 if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
589 dirsig_idx = idx;
590 break;
593 if (dirsig_idx >= 0) {
594 char buf[64];
595 while (smartlist_len(cons) > dirsig_idx)
596 smartlist_del(cons, dirsig_idx);
597 tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
598 return cdline_linecpy(area, buf);
599 } else {
600 return NULL;
604 /** Generate an ed diff as a smartlist from two consensuses, also given as
605 * smartlists. Will return NULL if the diff could not be generated, which can
606 * happen if any lines the script had to add matched "." or if the routers
607 * were not properly ordered.
609 * All cdline_t objects in the resulting object are either references to lines
610 * in one of the inputs, or are newly allocated lines in the provided memarea.
612 * This implementation is consensus-specific. To generate an ed diff for any
613 * given input in quadratic time, you can replace all the code until the
614 * navigation in reverse order with the following:
616 * int len1 = smartlist_len(cons1);
617 * int len2 = smartlist_len(cons2);
618 * bitarray_t *changed1 = bitarray_init_zero(len1);
619 * bitarray_t *changed2 = bitarray_init_zero(len2);
620 * cons1_sl = smartlist_slice(cons1, 0, -1);
621 * cons2_sl = smartlist_slice(cons2, 0, -1);
622 * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
624 STATIC smartlist_t *
625 gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
626 memarea_t *area)
628 smartlist_t *cons1 = smartlist_new();
629 smartlist_add_all(cons1, cons1_orig);
630 cdline_t *remove_trailer = preprocess_consensus(area, cons1);
632 int len1 = smartlist_len(cons1);
633 int len2 = smartlist_len(cons2);
634 smartlist_t *result = smartlist_new();
636 if (remove_trailer) {
637 /* There's a delete-the-trailer line at the end, so add it here. */
638 smartlist_add(result, remove_trailer);
641 /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642 * to set the ones that matter and leave the rest untouched.
644 bitarray_t *changed1 = bitarray_init_zero(len1);
645 bitarray_t *changed2 = bitarray_init_zero(len2);
646 int i1=-1, i2=-1;
647 int start1=0, start2=0;
649 /* To check that hashes are ordered properly */
650 router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
651 router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
653 /* i1 and i2 are initialized at the first line of each consensus. They never
654 * reach past len1 and len2 respectively, since next_router doesn't let that
655 * happen. i1 and i2 are advanced by at least one line at each iteration as
656 * long as they have not yet reached len1 and len2, so the loop is
657 * guaranteed to end, and each pair of (i1,i2) will be inspected at most
658 * once.
660 while (i1 < len1 || i2 < len2) {
662 /* Advance each of the two navigation positions by one router entry if not
663 * yet at the end.
665 if (i1 < len1) {
666 if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
667 goto error_cleanup;
671 if (i2 < len2) {
672 if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
673 goto error_cleanup;
677 /* If we have reached the end of both consensuses, there is no need to
678 * compare hashes anymore, since this is the last iteration.
680 if (i1 < len1 || i2 < len2) {
682 /* Keep on advancing the lower (by identity hash sorting) position until
683 * we have two matching positions. The only other possible outcome is
684 * that a lower position reaches the end of the consensus before it can
685 * reach a hash that is no longer the lower one. Since there will always
686 * be a lower hash for as long as the loop runs, one of the two indexes
687 * will always be incremented, thus assuring that the loop must end
688 * after a finite number of iterations. If that cannot be because said
689 * consensus has already reached the end, both are extended to their
690 * respecting ends since we are done.
692 int cmp = base64cmp(&iter1.hash, &iter2.hash);
693 while (cmp != 0) {
694 if (i1 < len1 && cmp < 0) {
695 if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
696 goto error_cleanup;
698 if (i1 == len1) {
699 /* We finished the first consensus, so grab all the remaining
700 * lines of the second consensus and finish up.
702 i2 = len2;
703 break;
705 } else if (i2 < len2 && cmp > 0) {
706 if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
707 goto error_cleanup;
709 if (i2 == len2) {
710 /* We finished the second consensus, so grab all the remaining
711 * lines of the first consensus and finish up.
713 i1 = len1;
714 break;
716 } else {
717 i1 = len1;
718 i2 = len2;
719 break;
721 cmp = base64cmp(&iter1.hash, &iter2.hash);
725 /* Make slices out of these chunks (up to the common router entry) and
726 * calculate the changes for them.
727 * Error if any of the two slices are longer than 10K lines. That should
728 * never happen with any pair of real consensuses. Feeding more than 10K
729 * lines to calc_changes would be very slow anyway.
731 #define MAX_LINE_COUNT (10000)
732 if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
733 log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
734 "we found too few common router ids.");
735 goto error_cleanup;
738 smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
739 smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
740 calc_changes(cons1_sl, cons2_sl, changed1, changed2);
741 tor_free(cons1_sl);
742 tor_free(cons2_sl);
743 start1 = i1, start2 = i2;
746 /* Navigate the changes in reverse order and generate one ed command for
747 * each chunk of changes.
749 i1=len1-1, i2=len2-1;
750 char buf[128];
751 while (i1 >= 0 || i2 >= 0) {
753 int start1x, start2x, end1, end2, added, deleted;
755 /* We are at a point were no changed bools are true, so just keep going. */
756 if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
757 !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
758 if (i1 >= 0) {
759 i1--;
761 if (i2 >= 0) {
762 i2--;
764 continue;
767 end1 = i1, end2 = i2;
769 /* Grab all contiguous changed lines */
770 while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
771 i1--;
773 while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
774 i2--;
777 start1x = i1+1, start2x = i2+1;
778 added = end2-i2, deleted = end1-i1;
780 if (added == 0) {
781 if (deleted == 1) {
782 tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
783 smartlist_add_linecpy(result, area, buf);
784 } else {
785 tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
786 smartlist_add_linecpy(result, area, buf);
788 } else {
789 int i;
790 if (deleted == 0) {
791 tor_snprintf(buf, sizeof(buf), "%ia", start1x);
792 smartlist_add_linecpy(result, area, buf);
793 } else if (deleted == 1) {
794 tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
795 smartlist_add_linecpy(result, area, buf);
796 } else {
797 tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
798 smartlist_add_linecpy(result, area, buf);
801 for (i = start2x; i <= end2; ++i) {
802 cdline_t *line = smartlist_get(cons2, i);
803 if (line_str_eq(line, ".")) {
804 log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
805 "one of the lines to be added is \".\".");
806 goto error_cleanup;
808 smartlist_add(result, line);
810 smartlist_add_linecpy(result, area, ".");
814 smartlist_free(cons1);
815 bitarray_free(changed1);
816 bitarray_free(changed2);
818 return result;
820 error_cleanup:
822 smartlist_free(cons1);
823 bitarray_free(changed1);
824 bitarray_free(changed2);
826 smartlist_free(result);
828 return NULL;
831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832 * store it in <b>num_out</b>. Advance <b>s</b> to the character immediately
833 * after the number. Return 0 on success, -1 on failure. */
834 static int
835 get_linenum(const char **s, int *num_out)
837 int ok;
838 char *next;
839 if (!TOR_ISDIGIT(**s)) {
840 return -1;
842 *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
843 if (ok && next) {
844 *s = next;
845 return 0;
846 } else {
847 return -1;
851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852 * and return a new consensus, also as a line-based smartlist. Will return
853 * NULL if the ed diff is not properly formatted.
855 * All cdline_t objects in the resulting object are references to lines
856 * in one of the inputs; nothing is copied.
858 STATIC smartlist_t *
859 apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
860 int diff_starting_line)
862 int diff_len = smartlist_len(diff);
863 int j = smartlist_len(cons1);
864 smartlist_t *cons2 = smartlist_new();
866 for (int i=diff_starting_line; i<diff_len; ++i) {
867 const cdline_t *diff_cdline = smartlist_get(diff, i);
868 char diff_line[128];
870 if (diff_cdline->len > sizeof(diff_line) - 1) {
871 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
872 "an ed command was far too long");
873 goto error_cleanup;
875 /* Copy the line to make it nul-terminated. */
876 memcpy(diff_line, diff_cdline->s, diff_cdline->len);
877 diff_line[diff_cdline->len] = 0;
878 const char *ptr = diff_line;
879 int start = 0, end = 0;
880 int had_range = 0;
881 int end_was_eof = 0;
882 if (get_linenum(&ptr, &start) < 0) {
883 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
884 "an ed command was missing a line number.");
885 goto error_cleanup;
887 if (*ptr == ',') {
888 /* Two-item range */
889 had_range = 1;
890 ++ptr;
891 if (*ptr == '$') {
892 end_was_eof = 1;
893 end = smartlist_len(cons1);
894 ++ptr;
895 } else if (get_linenum(&ptr, &end) < 0) {
896 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
897 "an ed command was missing a range end line number.");
898 goto error_cleanup;
900 /* Incoherent range. */
901 if (end <= start) {
902 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
903 "an invalid range was found in an ed command.");
904 goto error_cleanup;
906 } else {
907 /* We'll take <n1> as <n1>,<n1> for simplicity. */
908 end = start;
911 if (end > j) {
912 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
913 "its commands are not properly sorted in reverse order.");
914 goto error_cleanup;
917 if (*ptr == '\0') {
918 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
919 "a line with no ed command was found");
920 goto error_cleanup;
923 if (*(ptr+1) != '\0') {
924 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
925 "an ed command longer than one char was found.");
926 goto error_cleanup;
929 char action = *ptr;
931 switch (action) {
932 case 'a':
933 case 'c':
934 case 'd':
935 break;
936 default:
937 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
938 "an unrecognised ed command was found.");
939 goto error_cleanup;
942 /** $ is not allowed with non-d actions. */
943 if (end_was_eof && action != 'd') {
944 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
945 "it wanted to use $ with a command other than delete");
946 goto error_cleanup;
949 /* 'a' commands are not allowed to have ranges. */
950 if (had_range && action == 'a') {
951 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
952 "it wanted to add lines after a range.");
953 goto error_cleanup;
956 /* Add unchanged lines. */
957 for (; j && j > end; --j) {
958 cdline_t *cons_line = smartlist_get(cons1, j-1);
959 smartlist_add(cons2, cons_line);
962 /* Ignore removed lines. */
963 if (action == 'c' || action == 'd') {
964 while (--j >= start) {
965 /* Skip line */
969 /* Add new lines in reverse order, since it will all be reversed at the
970 * end.
972 if (action == 'a' || action == 'c') {
973 int added_end = i;
975 i++; /* Skip the line with the range and command. */
976 while (i < diff_len) {
977 if (line_str_eq(smartlist_get(diff, i), ".")) {
978 break;
980 if (++i == diff_len) {
981 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
982 "it has lines to be inserted that don't end with a \".\".");
983 goto error_cleanup;
987 int added_i = i-1;
989 /* It would make no sense to add zero new lines. */
990 if (added_i == added_end) {
991 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
992 "it has an ed command that tries to insert zero lines.");
993 goto error_cleanup;
996 while (added_i > added_end) {
997 cdline_t *added_line = smartlist_get(diff, added_i--);
998 smartlist_add(cons2, added_line);
1003 /* Add remaining unchanged lines. */
1004 for (; j > 0; --j) {
1005 cdline_t *cons_line = smartlist_get(cons1, j-1);
1006 smartlist_add(cons2, cons_line);
1009 /* Reverse the whole thing since we did it from the end. */
1010 smartlist_reverse(cons2);
1011 return cons2;
1013 error_cleanup:
1015 smartlist_free(cons2);
1017 return NULL;
1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
1021 * as smartlists. Will return NULL if the consensus diff could not be
1022 * generated. Neither of the two consensuses are modified in any way, so it's
1023 * up to the caller to free their resources.
1025 smartlist_t *
1026 consdiff_gen_diff(const smartlist_t *cons1,
1027 const smartlist_t *cons2,
1028 const consensus_digest_t *digests1,
1029 const consensus_digest_t *digests2,
1030 memarea_t *area)
1032 smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033 /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034 if (!ed_diff) {
1035 goto error_cleanup;
1038 /* See that the script actually produces what we want. */
1039 smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040 if (!ed_cons2) {
1041 /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042 log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043 "the generated ed diff could not be tested to successfully generate "
1044 "the target consensus.");
1045 goto error_cleanup;
1046 /* LCOV_EXCL_STOP */
1049 int cons2_eq = 1;
1050 if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051 SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052 const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053 if (!lines_eq(line1, line2)) {
1054 cons2_eq = 0;
1055 break;
1057 } SMARTLIST_FOREACH_END(line1);
1058 } else {
1059 cons2_eq = 0;
1061 smartlist_free(ed_cons2);
1062 if (!cons2_eq) {
1063 /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064 log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065 "the generated ed diff did not generate the target consensus "
1066 "successfully when tested.");
1067 goto error_cleanup;
1068 /* LCOV_EXCL_STOP */
1071 char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072 char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073 base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074 (const char*)digests1->sha3_256, DIGEST256_LEN);
1075 base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076 (const char*)digests2->sha3_256, DIGEST256_LEN);
1078 /* Create the resulting consensus diff. */
1079 char buf[160];
1080 smartlist_t *result = smartlist_new();
1081 tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082 smartlist_add_linecpy(result, area, buf);
1083 tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084 cons1_hash_hex, cons2_hash_hex);
1085 smartlist_add_linecpy(result, area, buf);
1086 smartlist_add_all(result, ed_diff);
1087 smartlist_free(ed_diff);
1088 return result;
1090 error_cleanup:
1092 if (ed_diff) {
1093 /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094 smartlist_free(ed_diff);
1095 /* LCOV_EXCL_STOP */
1098 return NULL;
1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102 * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103 * length DIGEST256_LEN or larger if not NULL.
1106 consdiff_get_digests(const smartlist_t *diff,
1107 char *digest1_out,
1108 char *digest2_out)
1110 smartlist_t *hash_words = NULL;
1111 const cdline_t *format;
1112 char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113 char *cons1_hash_hex, *cons2_hash_hex;
1114 if (smartlist_len(diff) < 2) {
1115 log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116 goto error_cleanup;
1119 /* Check that it's the format and version we know. */
1120 format = smartlist_get(diff, 0);
1121 if (!line_str_eq(format, ns_diff_version)) {
1122 log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123 goto error_cleanup;
1126 /* Grab the base16 digests. */
1127 hash_words = smartlist_new();
1129 const cdline_t *line2 = smartlist_get(diff, 1);
1130 char *h = tor_memdup_nulterm(line2->s, line2->len);
1131 smartlist_split_string(hash_words, h, " ", 0, 4);
1132 tor_free(h);
1135 /* There have to be three words, the first of which must be hash_token. */
1136 if (smartlist_len(hash_words) != 3 ||
1137 strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138 log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139 "the necessary digests.");
1140 goto error_cleanup;
1143 /* Expected hashes as found in the consensus diff header. They must be of
1144 * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145 * If any of the decodings fail, error to make sure that the hashes are
1146 * proper base16-encoded digests.
1148 cons1_hash_hex = smartlist_get(hash_words, 1);
1149 cons2_hash_hex = smartlist_get(hash_words, 2);
1150 if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151 strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152 log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153 "base16-encoded digests of incorrect size.");
1154 goto error_cleanup;
1157 if (base16_decode(cons1_hash, DIGEST256_LEN,
1158 cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159 base16_decode(cons2_hash, DIGEST256_LEN,
1160 cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161 log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162 "malformed digests.");
1163 goto error_cleanup;
1166 if (digest1_out) {
1167 memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1169 if (digest2_out) {
1170 memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1173 SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174 smartlist_free(hash_words);
1175 return 0;
1177 error_cleanup:
1179 if (hash_words) {
1180 SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181 smartlist_free(hash_words);
1183 return 1;
1186 /** Apply the consensus diff to the given consensus and return a new
1187 * consensus, also as a line-based smartlist. Will return NULL if the diff
1188 * could not be applied. Neither the consensus nor the diff are modified in
1189 * any way, so it's up to the caller to free their resources.
1191 char *
1192 consdiff_apply_diff(const smartlist_t *cons1,
1193 const smartlist_t *diff,
1194 const consensus_digest_t *digests1)
1196 smartlist_t *cons2 = NULL;
1197 char *cons2_str = NULL;
1198 char e_cons1_hash[DIGEST256_LEN];
1199 char e_cons2_hash[DIGEST256_LEN];
1201 if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202 goto error_cleanup;
1205 /* See that the consensus that was given to us matches its hash. */
1206 if (!consensus_digest_eq(digests1->sha3_256,
1207 (const uint8_t*)e_cons1_hash)) {
1208 char hex_digest1[HEX_DIGEST256_LEN+1];
1209 char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210 log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211 "the base consensus doesn't match the digest as found in "
1212 "the consensus diff header.");
1213 base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214 (const char *)digests1->sha3_256, DIGEST256_LEN);
1215 base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216 e_cons1_hash, DIGEST256_LEN);
1217 log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218 hex_digest1, e_hex_digest1);
1219 goto error_cleanup;
1222 /* Grab the ed diff and calculate the resulting consensus. */
1223 /* Skip the first two lines. */
1224 cons2 = apply_ed_diff(cons1, diff, 2);
1226 /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227 if (!cons2) {
1228 goto error_cleanup;
1231 cons2_str = consensus_join_lines(cons2);
1233 consensus_digest_t cons2_digests;
1234 if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235 &cons2_digests) < 0) {
1236 /* LCOV_EXCL_START -- digest can't fail */
1237 log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238 "resulting from applying a consensus diff.");
1239 goto error_cleanup;
1240 /* LCOV_EXCL_STOP */
1243 /* See that the resulting consensus matches its hash. */
1244 if (!consensus_digest_eq(cons2_digests.sha3_256,
1245 (const uint8_t*)e_cons2_hash)) {
1246 log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247 "the resulting consensus doesn't match the digest as found in "
1248 "the consensus diff header.");
1249 char hex_digest2[HEX_DIGEST256_LEN+1];
1250 char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251 base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252 (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253 base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254 e_cons2_hash, DIGEST256_LEN);
1255 log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256 hex_digest2, e_hex_digest2);
1257 goto error_cleanup;
1260 goto done;
1262 error_cleanup:
1263 tor_free(cons2_str); /* Sets it to NULL */
1265 done:
1266 if (cons2) {
1267 smartlist_free(cons2);
1270 return cons2_str;
1273 /** Any consensus line longer than this means that the input is invalid. */
1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
1277 * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278 * that line (without trailing newline) to <b>out</b>. Return -1 if there are
1279 * any non-NL terminated lines; 0 otherwise.
1281 * Unlike tor_split_lines, this function avoids ambiguity on its
1282 * handling of a final line that isn't NL-terminated.
1284 * All cdline_t objects are allocated in the provided memarea. Strings
1285 * are not copied: if <b>s</b> changes or becomes invalid, then all
1286 * generated cdlines will become invalid.
1288 STATIC int
1289 consensus_split_lines(smartlist_t *out,
1290 const char *s, size_t len,
1291 memarea_t *area)
1293 const char *end_of_str = s + len;
1295 while (s < end_of_str) {
1296 const char *eol = memchr(s, '\n', end_of_str - s);
1297 if (!eol) {
1298 /* File doesn't end with newline. */
1299 return -1;
1301 if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302 /* Line is far too long. */
1303 return -1;
1305 cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306 line->s = s;
1307 line->len = (uint32_t)(eol - s);
1308 smartlist_add(out, line);
1309 s = eol+1;
1311 return 0;
1314 /** Given a list of cdline_t, return a newly allocated string containing
1315 * all of the lines, terminated with NL, concatenated.
1317 * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318 * lists. */
1319 static char *
1320 consensus_join_lines(const smartlist_t *inp)
1322 size_t n = 0;
1323 SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324 n += 1;
1325 char *result = tor_malloc(n);
1326 char *out = result;
1327 SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328 memcpy(out, cdline->s, cdline->len);
1329 out += cdline->len;
1330 *out++ = '\n';
1331 } SMARTLIST_FOREACH_END(cdline);
1332 *out++ = '\0';
1333 tor_assert(out == result+n);
1334 return result;
1337 /** Given two consensus documents, try to compute a diff between them. On
1338 * success, return a newly allocated string containing that diff. On failure,
1339 * return NULL. */
1340 char *
1341 consensus_diff_generate(const char *cons1, size_t cons1len,
1342 const char *cons2, size_t cons2len)
1344 consensus_digest_t d1, d2;
1345 smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346 int r1, r2;
1347 char *result = NULL;
1349 r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350 r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351 if (BUG(r1 < 0 || r2 < 0))
1352 return NULL; // LCOV_EXCL_LINE
1354 memarea_t *area = memarea_new();
1355 lines1 = smartlist_new();
1356 lines2 = smartlist_new();
1357 if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358 goto done;
1359 if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360 goto done;
1362 result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1364 done:
1365 if (result_lines) {
1366 result = consensus_join_lines(result_lines);
1367 smartlist_free(result_lines);
1370 memarea_drop_all(area);
1371 smartlist_free(lines1);
1372 smartlist_free(lines2);
1374 return result;
1377 /** Given a consensus document and a diff, try to apply the diff to the
1378 * consensus. On success return a newly allocated string containing the new
1379 * consensus. On failure, return NULL. */
1380 char *
1381 consensus_diff_apply(const char *consensus,
1382 size_t consensus_len,
1383 const char *diff,
1384 size_t diff_len)
1386 consensus_digest_t d1;
1387 smartlist_t *lines1 = NULL, *lines2 = NULL;
1388 int r1;
1389 char *result = NULL;
1390 memarea_t *area = memarea_new();
1392 r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393 if (BUG(r1 < 0))
1394 goto done;
1396 lines1 = smartlist_new();
1397 lines2 = smartlist_new();
1398 if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399 goto done;
1400 if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401 goto done;
1403 result = consdiff_apply_diff(lines1, lines2, &d1);
1405 done:
1406 smartlist_free(lines1);
1407 smartlist_free(lines2);
1408 memarea_drop_all(area);
1410 return result;
1413 /** Return true iff, based on its header, <b>document</b> is likely
1414 * to be a consensus diff. */
1416 looks_like_a_consensus_diff(const char *document, size_t len)
1418 return (len >= strlen(ns_diff_version) &&
1419 fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));