1 /* Copyright (c) 2014, Daniel MartÃ
2 * Copyright (c) 2014-2021, The Tor Project, Inc. */
3 /* See LICENSE for licensing information */
7 * \brief Consensus diff implementation, including both the generation and the
8 * application of diffs in a minimal ed format.
10 * The consensus diff application is done in consdiff_apply_diff, which relies
11 * on apply_ed_diff for the main ed diff part and on some digest helper
12 * functions to check the digest hashes found in the consensus diff header.
14 * The consensus diff generation is more complex. consdiff_gen_diff generates
15 * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16 * functions to generate the digest hashes.
18 * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19 * time and linear space to generate an ed diff given two smartlists. As shown
20 * in its comment section, calling calc_changes on the entire two consensuses
21 * will calculate what is to be added and what is to be deleted in the diff.
22 * Its comment section briefly explains how it works.
24 * In our case specific to consensuses, we take advantage of the fact that
25 * consensuses list routers sorted by their identities. We use that
26 * information to avoid running calc_changes on the whole smartlists.
27 * gen_ed_diff will navigate through the two consensuses identity by identity
28 * and will send small couples of slices to calc_changes, keeping the running
29 * time near-linear. This is explained in more detail in the gen_ed_diff
32 * The allocation strategy tries to save time and memory by avoiding needless
33 * copies. Instead of actually splitting the inputs into separate strings, we
34 * allocate cdline_t objects, each of which represents a line in the original
35 * object or in the output. We use memarea_t allocators to manage the
36 * temporary memory we use when generating or applying diffs.
39 #define CONSDIFF_PRIVATE
41 #include "core/or/or.h"
42 #include "feature/dircommon/consdiff.h"
43 #include "lib/memarea/memarea.h"
44 #include "feature/dirparse/ns_parse.h"
46 static const char* ns_diff_version
= "network-status-diff-version 1";
47 static const char* hash_token
= "hash";
49 static char *consensus_join_lines(const smartlist_t
*inp
);
51 /** Return true iff a and b have the same contents. */
53 lines_eq(const cdline_t
*a
, const cdline_t
*b
)
55 return a
->len
== b
->len
&& fast_memeq(a
->s
, b
->s
, a
->len
);
58 /** Return true iff a has the same contents as the nul-terminated string b. */
60 line_str_eq(const cdline_t
*a
, const char *b
)
62 const size_t len
= strlen(b
);
63 tor_assert(len
<= UINT32_MAX
);
64 cdline_t bline
= { b
, (uint32_t)len
};
65 return lines_eq(a
, &bline
);
68 /** Return true iff a begins with the same contents as the nul-terminated
71 line_starts_with_str(const cdline_t
*a
, const char *b
)
73 const size_t len
= strlen(b
);
74 tor_assert(len
<= UINT32_MAX
);
75 return a
->len
>= len
&& fast_memeq(a
->s
, b
, len
);
78 /** Return a new cdline_t holding as its contents the nul-terminated
79 * string s. Use the provided memory area for storage. */
81 cdline_linecpy(memarea_t
*area
, const char *s
)
83 size_t len
= strlen(s
);
84 const char *ss
= memarea_memdup(area
, s
, len
);
85 cdline_t
*line
= memarea_alloc(area
, sizeof(cdline_t
));
87 line
->len
= (uint32_t)len
;
91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92 * string s. Use the provided memory area for storage. */
94 smartlist_add_linecpy(smartlist_t
*lst
, memarea_t
*area
, const char *s
)
96 smartlist_add(lst
, cdline_linecpy(area
, s
));
99 /** Compute the digest of <b>cons</b>, and store the result in
100 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101 /* This is a separate, mockable function so that we can override it when
103 MOCK_IMPL(STATIC
int,
104 consensus_compute_digest
,(const char *cons
, size_t len
,
105 consensus_digest_t
*digest_out
))
107 int r
= crypto_digest256((char*)digest_out
->sha3_256
,
108 cons
, len
, DIGEST_SHA3_256
);
112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
113 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114 /* This is a separate, mockable function so that we can override it when
116 MOCK_IMPL(STATIC
int,
117 consensus_compute_digest_as_signed
,(const char *cons
, size_t len
,
118 consensus_digest_t
*digest_out
))
120 return router_get_networkstatus_v3_sha3_as_signed(digest_out
->sha3_256
,
124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125 /* This is a separate, mockable function so that we can override it when
127 MOCK_IMPL(STATIC
int,
128 consensus_digest_eq
,(const uint8_t *d1
,
131 return fast_memeq(d1
, d2
, DIGEST256_LEN
);
134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
135 * and the end indexes are within the bounds of the initial smartlist. The end
136 * element is not part of the resulting slice. If end is -1, the slice is to
137 * reach the end of the smartlist.
139 STATIC smartlist_slice_t
*
140 smartlist_slice(const smartlist_t
*list
, int start
, int end
)
142 int list_len
= smartlist_len(list
);
143 tor_assert(start
>= 0);
144 tor_assert(start
<= list_len
);
148 tor_assert(start
<= end
);
150 smartlist_slice_t
*slice
= tor_malloc(sizeof(smartlist_slice_t
));
152 slice
->offset
= start
;
153 slice
->len
= end
- start
;
157 /** Helper: Compute the longest common subsequence lengths for the two slices.
158 * Used as part of the diff generation to find the column at which to split
159 * slice2 while still having the optimal solution.
160 * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161 * The length of the resulting integer array is that of the second slice plus
165 lcs_lengths(const smartlist_slice_t
*slice1
, const smartlist_slice_t
*slice2
,
168 size_t a_size
= sizeof(int) * (slice2
->len
+1);
170 /* Resulting lcs lengths. */
171 int *result
= tor_malloc_zero(a_size
);
172 /* Copy of the lcs lengths from the last iteration. */
173 int *prev
= tor_malloc(a_size
);
175 tor_assert(direction
== 1 || direction
== -1);
177 int si
= slice1
->offset
;
178 if (direction
== -1) {
179 si
+= (slice1
->len
-1);
182 for (int i
= 0; i
< slice1
->len
; ++i
, si
+=direction
) {
184 const cdline_t
*line1
= smartlist_get(slice1
->list
, si
);
185 /* Store the last results. */
186 memcpy(prev
, result
, a_size
);
188 int sj
= slice2
->offset
;
189 if (direction
== -1) {
190 sj
+= (slice2
->len
-1);
193 for (int j
= 0; j
< slice2
->len
; ++j
, sj
+=direction
) {
195 const cdline_t
*line2
= smartlist_get(slice2
->list
, sj
);
196 if (lines_eq(line1
, line2
)) {
197 /* If the lines are equal, the lcs is one line longer. */
198 result
[j
+ 1] = prev
[j
] + 1;
200 /* If not, see what lcs parent path is longer. */
201 result
[j
+ 1] = MAX(result
[j
], prev
[j
+ 1]);
209 /** Helper: Trim any number of lines that are equally at the start or the end
213 trim_slices(smartlist_slice_t
*slice1
, smartlist_slice_t
*slice2
)
215 while (slice1
->len
>0 && slice2
->len
>0) {
216 const cdline_t
*line1
= smartlist_get(slice1
->list
, slice1
->offset
);
217 const cdline_t
*line2
= smartlist_get(slice2
->list
, slice2
->offset
);
218 if (!lines_eq(line1
, line2
)) {
221 slice1
->offset
++; slice1
->len
--;
222 slice2
->offset
++; slice2
->len
--;
225 int i1
= (slice1
->offset
+slice1
->len
)-1;
226 int i2
= (slice2
->offset
+slice2
->len
)-1;
228 while (slice1
->len
>0 && slice2
->len
>0) {
229 const cdline_t
*line1
= smartlist_get(slice1
->list
, i1
);
230 const cdline_t
*line2
= smartlist_get(slice2
->list
, i2
);
231 if (!lines_eq(line1
, line2
)) {
241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242 * bounds of the slice.
245 smartlist_slice_string_pos(const smartlist_slice_t
*slice
,
246 const cdline_t
*string
)
248 int end
= slice
->offset
+ slice
->len
;
249 for (int i
= slice
->offset
; i
< end
; ++i
) {
250 const cdline_t
*el
= smartlist_get(slice
->list
, i
);
251 if (lines_eq(el
, string
)) {
258 /** Helper: Set all the appropriate changed booleans to true. The first slice
259 * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260 * present in the other slice will be set to changed in their bool array.
261 * The two changed bool arrays are passed in the same order as the slices.
264 set_changed(bitarray_t
*changed1
, bitarray_t
*changed2
,
265 const smartlist_slice_t
*slice1
, const smartlist_slice_t
*slice2
)
268 tor_assert(slice1
->len
== 0 || slice1
->len
== 1);
270 if (slice1
->len
== 1) {
271 const cdline_t
*line_common
= smartlist_get(slice1
->list
, slice1
->offset
);
272 toskip
= smartlist_slice_string_pos(slice2
, line_common
);
274 bitarray_set(changed1
, slice1
->offset
);
277 int end
= slice2
->offset
+ slice2
->len
;
278 for (int i
= slice2
->offset
; i
< end
; ++i
) {
280 bitarray_set(changed2
, i
);
286 * Helper: Given that slice1 has been split by half into top and bot, we want
287 * to fetch the column at which to split slice2 so that we are still on track
288 * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289 * since the shortest diff is just another way to say the longest common
293 optimal_column_to_split(const smartlist_slice_t
*top
,
294 const smartlist_slice_t
*bot
,
295 const smartlist_slice_t
*slice2
)
297 int *lens_top
= lcs_lengths(top
, slice2
, 1);
298 int *lens_bot
= lcs_lengths(bot
, slice2
, -1);
299 int column
=0, max_sum
=-1;
301 for (int i
= 0; i
< slice2
->len
+1; ++i
) {
302 int sum
= lens_top
[i
] + lens_bot
[slice2
->len
-i
];
315 * Helper: Figure out what elements are new or gone on the second smartlist
316 * relative to the first smartlist, and store the booleans in the bitarrays.
317 * True on the first bitarray means the element is gone, true on the second
318 * bitarray means it's new.
320 * In its base case, either of the smartlists is of length <= 1 and we can
321 * quickly see what elements are new or are gone. In the other case, we will
322 * split one smartlist by half and we'll use optimal_column_to_split to find
323 * the optimal column at which to split the second smartlist so that we are
324 * finding the smallest diff possible.
327 calc_changes(smartlist_slice_t
*slice1
,
328 smartlist_slice_t
*slice2
,
329 bitarray_t
*changed1
, bitarray_t
*changed2
)
331 trim_slices(slice1
, slice2
);
333 if (slice1
->len
<= 1) {
334 set_changed(changed1
, changed2
, slice1
, slice2
);
336 } else if (slice2
->len
<= 1) {
337 set_changed(changed2
, changed1
, slice2
, slice1
);
339 /* Keep on splitting the slices in two. */
341 smartlist_slice_t
*top
, *bot
, *left
, *right
;
343 /* Split the first slice in half. */
344 int mid
= slice1
->len
/2;
345 top
= smartlist_slice(slice1
->list
, slice1
->offset
, slice1
->offset
+mid
);
346 bot
= smartlist_slice(slice1
->list
, slice1
->offset
+mid
,
347 slice1
->offset
+slice1
->len
);
349 /* Split the second slice by the optimal column. */
350 int mid2
= optimal_column_to_split(top
, bot
, slice2
);
351 left
= smartlist_slice(slice2
->list
, slice2
->offset
, slice2
->offset
+mid2
);
352 right
= smartlist_slice(slice2
->list
, slice2
->offset
+mid2
,
353 slice2
->offset
+slice2
->len
);
355 calc_changes(top
, left
, changed1
, changed2
);
356 calc_changes(bot
, right
, changed1
, changed2
);
364 /* This table is from crypto.c. The SP and PAD defines are different. */
365 #define NOT_VALID_BASE64 255
366 #define X NOT_VALID_BASE64
367 #define SP NOT_VALID_BASE64
368 #define PAD NOT_VALID_BASE64
369 static const uint8_t base64_compare_table
[256] = {
370 X
, X
, X
, X
, X
, X
, X
, X
, X
, SP
, SP
, SP
, X
, SP
, X
, X
,
371 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
372 SP
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, 62, X
, X
, X
, 63,
373 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X
, X
, X
, PAD
, X
, X
,
374 X
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X
, X
, X
, X
, X
,
376 X
, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X
, X
, X
, X
, X
,
378 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
379 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
380 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
381 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
382 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
383 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
384 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
385 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
388 /** Helper: Get the identity hash from a router line, assuming that the line
389 * at least appears to be a router line and thus starts with "r ".
391 * If an identity hash is found, store it (without decoding it) in
392 * <b>hash_out</b>, and return 0. On failure, return -1.
395 get_id_hash(const cdline_t
*line
, cdline_t
*hash_out
)
400 /* Skip the router name. */
401 const char *hash
= memchr(line
->s
+ 2, ' ', line
->len
- 2);
407 const char *hash_end
= hash
;
408 /* Stop when the first non-base64 character is found. Use unsigned chars to
409 * avoid negative indexes causing crashes.
411 while (base64_compare_table
[*((unsigned char*)hash_end
)]
412 != NOT_VALID_BASE64
&&
413 hash_end
< line
->s
+ line
->len
) {
418 if (hash_end
== hash
) {
423 /* Always true because lines are limited to this length */
424 tor_assert(hash_end
>= hash
);
425 tor_assert((size_t)(hash_end
- hash
) <= UINT32_MAX
);
426 hash_out
->len
= (uint32_t)(hash_end
- hash
);
431 /** Helper: Check that a line is a valid router entry. We must at least be
432 * able to fetch a proper identity hash from it for it to be valid.
435 is_valid_router_entry(const cdline_t
*line
)
437 if (line
->len
< 2 || fast_memneq(line
->s
, "r ", 2))
440 return (get_id_hash(line
, &tmp
) == 0);
443 /** Helper: Find the next router line starting at the current position.
444 * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445 * line within the bounds of the consensus. The only exception is when we
446 * don't want to skip the first line, in which case cur will be -1.
449 next_router(const smartlist_t
*cons
, int cur
)
451 int len
= smartlist_len(cons
);
452 tor_assert(cur
>= -1 && cur
< len
);
458 const cdline_t
*line
= smartlist_get(cons
, cur
);
459 while (!is_valid_router_entry(line
)) {
463 line
= smartlist_get(cons
, cur
);
468 /** Helper: compare two base64-encoded identity hashes, which may be of
469 * different lengths. Comparison ends when the first non-base64 char is found.
472 base64cmp(const cdline_t
*hash1
, const cdline_t
*hash2
)
474 /* NULL is always lower, useful for last_hash which starts at NULL. */
475 if (!hash1
->s
&& !hash2
->s
) {
485 /* Don't index with a char; char may be signed. */
486 const unsigned char *a
= (unsigned char*)hash1
->s
;
487 const unsigned char *b
= (unsigned char*)hash2
->s
;
488 const unsigned char *a_end
= a
+ hash1
->len
;
489 const unsigned char *b_end
= b
+ hash2
->len
;
491 uint8_t av
= base64_compare_table
[*a
];
492 uint8_t bv
= base64_compare_table
[*b
];
493 if (av
== NOT_VALID_BASE64
) {
494 if (bv
== NOT_VALID_BASE64
) {
495 /* Both ended with exactly the same characters. */
498 /* hash2 goes on longer than hash1 and thus hash1 is lower. */
501 } else if (bv
== NOT_VALID_BASE64
) {
502 /* hash1 goes on longer than hash2 and thus hash1 is greater. */
504 } else if (av
< bv
) {
505 /* The first difference shows that hash1 is lower. */
507 } else if (av
> bv
) {
508 /* The first difference shows that hash1 is greater. */
519 } else if (b
== b_end
) {
526 /** Structure used to remember the previous and current identity hash of
527 * the "r " lines in a consensus, to enforce well-ordering. */
528 typedef struct router_id_iterator_t
{
531 } router_id_iterator_t
;
535 * Initializer for a router_id_iterator_t.
537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538 #endif /* !defined(COCCI) */
540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541 * the index to the next router line ("r ...") in the consensus, or to
542 * an index one after the end of the list if there is no such line.
544 * Use <b>iter</b> to record the hash of the found router line, if any,
545 * and to enforce ordering on the hashes. If the hashes are mis-ordered,
546 * return -1. Else, return 0.
549 find_next_router_line(const smartlist_t
*cons
,
550 const char *consname
,
552 router_id_iterator_t
*iter
)
554 *idxp
= next_router(cons
, *idxp
);
555 if (*idxp
< smartlist_len(cons
)) {
556 memcpy(&iter
->last_hash
, &iter
->hash
, sizeof(cdline_t
));
557 if (get_id_hash(smartlist_get(cons
, *idxp
), &iter
->hash
) < 0 ||
558 base64cmp(&iter
->hash
, &iter
->last_hash
) <= 0) {
559 log_warn(LD_CONSDIFF
, "Refusing to generate consensus diff because "
560 "the %s consensus doesn't have its router entries sorted "
561 "properly.", consname
);
568 /** Line-prefix indicating the beginning of the signatures section that we
569 * intend to delete. */
570 #define START_OF_SIGNATURES_SECTION "directory-signature "
572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573 * to remove the signatures from it. If the footer is removed, return a
574 * cdline_t containing a delete command to delete the footer, allocated in
575 * <b>area</b>. If no footer is removed, return NULL.
577 * We remove the signatures here because they are not themselves signed, and
578 * as such there might be different encodings for them.
581 preprocess_consensus(memarea_t
*area
,
586 for (idx
= 0; idx
< smartlist_len(cons
); ++idx
) {
587 cdline_t
*line
= smartlist_get(cons
, idx
);
588 if (line_starts_with_str(line
, START_OF_SIGNATURES_SECTION
)) {
593 if (dirsig_idx
>= 0) {
595 while (smartlist_len(cons
) > dirsig_idx
)
596 smartlist_del(cons
, dirsig_idx
);
597 tor_snprintf(buf
, sizeof(buf
), "%d,$d", dirsig_idx
+1);
598 return cdline_linecpy(area
, buf
);
604 /** Generate an ed diff as a smartlist from two consensuses, also given as
605 * smartlists. Will return NULL if the diff could not be generated, which can
606 * happen if any lines the script had to add matched "." or if the routers
607 * were not properly ordered.
609 * All cdline_t objects in the resulting object are either references to lines
610 * in one of the inputs, or are newly allocated lines in the provided memarea.
612 * This implementation is consensus-specific. To generate an ed diff for any
613 * given input in quadratic time, you can replace all the code until the
614 * navigation in reverse order with the following:
616 * int len1 = smartlist_len(cons1);
617 * int len2 = smartlist_len(cons2);
618 * bitarray_t *changed1 = bitarray_init_zero(len1);
619 * bitarray_t *changed2 = bitarray_init_zero(len2);
620 * cons1_sl = smartlist_slice(cons1, 0, -1);
621 * cons2_sl = smartlist_slice(cons2, 0, -1);
622 * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
625 gen_ed_diff(const smartlist_t
*cons1_orig
, const smartlist_t
*cons2
,
628 smartlist_t
*cons1
= smartlist_new();
629 smartlist_add_all(cons1
, cons1_orig
);
630 cdline_t
*remove_trailer
= preprocess_consensus(area
, cons1
);
632 int len1
= smartlist_len(cons1
);
633 int len2
= smartlist_len(cons2
);
634 smartlist_t
*result
= smartlist_new();
636 if (remove_trailer
) {
637 /* There's a delete-the-trailer line at the end, so add it here. */
638 smartlist_add(result
, remove_trailer
);
641 /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642 * to set the ones that matter and leave the rest untouched.
644 bitarray_t
*changed1
= bitarray_init_zero(len1
);
645 bitarray_t
*changed2
= bitarray_init_zero(len2
);
647 int start1
=0, start2
=0;
649 /* To check that hashes are ordered properly */
650 router_id_iterator_t iter1
= ROUTER_ID_ITERATOR_INIT
;
651 router_id_iterator_t iter2
= ROUTER_ID_ITERATOR_INIT
;
653 /* i1 and i2 are initialized at the first line of each consensus. They never
654 * reach past len1 and len2 respectively, since next_router doesn't let that
655 * happen. i1 and i2 are advanced by at least one line at each iteration as
656 * long as they have not yet reached len1 and len2, so the loop is
657 * guaranteed to end, and each pair of (i1,i2) will be inspected at most
660 while (i1
< len1
|| i2
< len2
) {
662 /* Advance each of the two navigation positions by one router entry if not
666 if (find_next_router_line(cons1
, "base", &i1
, &iter1
) < 0) {
672 if (find_next_router_line(cons2
, "target", &i2
, &iter2
) < 0) {
677 /* If we have reached the end of both consensuses, there is no need to
678 * compare hashes anymore, since this is the last iteration.
680 if (i1
< len1
|| i2
< len2
) {
682 /* Keep on advancing the lower (by identity hash sorting) position until
683 * we have two matching positions. The only other possible outcome is
684 * that a lower position reaches the end of the consensus before it can
685 * reach a hash that is no longer the lower one. Since there will always
686 * be a lower hash for as long as the loop runs, one of the two indexes
687 * will always be incremented, thus assuring that the loop must end
688 * after a finite number of iterations. If that cannot be because said
689 * consensus has already reached the end, both are extended to their
690 * respecting ends since we are done.
692 int cmp
= base64cmp(&iter1
.hash
, &iter2
.hash
);
694 if (i1
< len1
&& cmp
< 0) {
695 if (find_next_router_line(cons1
, "base", &i1
, &iter1
) < 0) {
699 /* We finished the first consensus, so grab all the remaining
700 * lines of the second consensus and finish up.
705 } else if (i2
< len2
&& cmp
> 0) {
706 if (find_next_router_line(cons2
, "target", &i2
, &iter2
) < 0) {
710 /* We finished the second consensus, so grab all the remaining
711 * lines of the first consensus and finish up.
721 cmp
= base64cmp(&iter1
.hash
, &iter2
.hash
);
725 /* Make slices out of these chunks (up to the common router entry) and
726 * calculate the changes for them.
727 * Error if any of the two slices are longer than 10K lines. That should
728 * never happen with any pair of real consensuses. Feeding more than 10K
729 * lines to calc_changes would be very slow anyway.
731 #define MAX_LINE_COUNT (10000)
732 if (i1
-start1
> MAX_LINE_COUNT
|| i2
-start2
> MAX_LINE_COUNT
) {
733 log_warn(LD_CONSDIFF
, "Refusing to generate consensus diff because "
734 "we found too few common router ids.");
738 smartlist_slice_t
*cons1_sl
= smartlist_slice(cons1
, start1
, i1
);
739 smartlist_slice_t
*cons2_sl
= smartlist_slice(cons2
, start2
, i2
);
740 calc_changes(cons1_sl
, cons2_sl
, changed1
, changed2
);
743 start1
= i1
, start2
= i2
;
746 /* Navigate the changes in reverse order and generate one ed command for
747 * each chunk of changes.
749 i1
=len1
-1, i2
=len2
-1;
751 while (i1
>= 0 || i2
>= 0) {
753 int start1x
, start2x
, end1
, end2
, added
, deleted
;
755 /* We are at a point were no changed bools are true, so just keep going. */
756 if (!(i1
>= 0 && bitarray_is_set(changed1
, i1
)) &&
757 !(i2
>= 0 && bitarray_is_set(changed2
, i2
))) {
767 end1
= i1
, end2
= i2
;
769 /* Grab all contiguous changed lines */
770 while (i1
>= 0 && bitarray_is_set(changed1
, i1
)) {
773 while (i2
>= 0 && bitarray_is_set(changed2
, i2
)) {
777 start1x
= i1
+1, start2x
= i2
+1;
778 added
= end2
-i2
, deleted
= end1
-i1
;
782 tor_snprintf(buf
, sizeof(buf
), "%id", start1x
+1);
783 smartlist_add_linecpy(result
, area
, buf
);
785 tor_snprintf(buf
, sizeof(buf
), "%i,%id", start1x
+1, start1x
+deleted
);
786 smartlist_add_linecpy(result
, area
, buf
);
791 tor_snprintf(buf
, sizeof(buf
), "%ia", start1x
);
792 smartlist_add_linecpy(result
, area
, buf
);
793 } else if (deleted
== 1) {
794 tor_snprintf(buf
, sizeof(buf
), "%ic", start1x
+1);
795 smartlist_add_linecpy(result
, area
, buf
);
797 tor_snprintf(buf
, sizeof(buf
), "%i,%ic", start1x
+1, start1x
+deleted
);
798 smartlist_add_linecpy(result
, area
, buf
);
801 for (i
= start2x
; i
<= end2
; ++i
) {
802 cdline_t
*line
= smartlist_get(cons2
, i
);
803 if (line_str_eq(line
, ".")) {
804 log_warn(LD_CONSDIFF
, "Cannot generate consensus diff because "
805 "one of the lines to be added is \".\".");
808 smartlist_add(result
, line
);
810 smartlist_add_linecpy(result
, area
, ".");
814 smartlist_free(cons1
);
815 bitarray_free(changed1
);
816 bitarray_free(changed2
);
822 smartlist_free(cons1
);
823 bitarray_free(changed1
);
824 bitarray_free(changed2
);
826 smartlist_free(result
);
831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832 * store it in <b>num_out</b>. Advance <b>s</b> to the character immediately
833 * after the number. Return 0 on success, -1 on failure. */
835 get_linenum(const char **s
, int *num_out
)
839 if (!TOR_ISDIGIT(**s
)) {
842 *num_out
= (int) tor_parse_long(*s
, 10, 0, INT32_MAX
, &ok
, &next
);
851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852 * and return a new consensus, also as a line-based smartlist. Will return
853 * NULL if the ed diff is not properly formatted.
855 * All cdline_t objects in the resulting object are references to lines
856 * in one of the inputs; nothing is copied.
859 apply_ed_diff(const smartlist_t
*cons1
, const smartlist_t
*diff
,
860 int diff_starting_line
)
862 int diff_len
= smartlist_len(diff
);
863 int j
= smartlist_len(cons1
);
864 smartlist_t
*cons2
= smartlist_new();
866 for (int i
=diff_starting_line
; i
<diff_len
; ++i
) {
867 const cdline_t
*diff_cdline
= smartlist_get(diff
, i
);
870 if (diff_cdline
->len
> sizeof(diff_line
) - 1) {
871 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
872 "an ed command was far too long");
875 /* Copy the line to make it nul-terminated. */
876 memcpy(diff_line
, diff_cdline
->s
, diff_cdline
->len
);
877 diff_line
[diff_cdline
->len
] = 0;
878 const char *ptr
= diff_line
;
879 int start
= 0, end
= 0;
882 if (get_linenum(&ptr
, &start
) < 0) {
883 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
884 "an ed command was missing a line number.");
893 end
= smartlist_len(cons1
);
895 } else if (get_linenum(&ptr
, &end
) < 0) {
896 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
897 "an ed command was missing a range end line number.");
900 /* Incoherent range. */
902 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
903 "an invalid range was found in an ed command.");
907 /* We'll take <n1> as <n1>,<n1> for simplicity. */
912 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
913 "its commands are not properly sorted in reverse order.");
918 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
919 "a line with no ed command was found");
923 if (*(ptr
+1) != '\0') {
924 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
925 "an ed command longer than one char was found.");
937 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
938 "an unrecognised ed command was found.");
942 /** $ is not allowed with non-d actions. */
943 if (end_was_eof
&& action
!= 'd') {
944 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
945 "it wanted to use $ with a command other than delete");
949 /* 'a' commands are not allowed to have ranges. */
950 if (had_range
&& action
== 'a') {
951 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
952 "it wanted to add lines after a range.");
956 /* Add unchanged lines. */
957 for (; j
&& j
> end
; --j
) {
958 cdline_t
*cons_line
= smartlist_get(cons1
, j
-1);
959 smartlist_add(cons2
, cons_line
);
962 /* Ignore removed lines. */
963 if (action
== 'c' || action
== 'd') {
964 while (--j
>= start
) {
969 /* Add new lines in reverse order, since it will all be reversed at the
972 if (action
== 'a' || action
== 'c') {
975 i
++; /* Skip the line with the range and command. */
976 while (i
< diff_len
) {
977 if (line_str_eq(smartlist_get(diff
, i
), ".")) {
980 if (++i
== diff_len
) {
981 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
982 "it has lines to be inserted that don't end with a \".\".");
989 /* It would make no sense to add zero new lines. */
990 if (added_i
== added_end
) {
991 log_warn(LD_CONSDIFF
, "Could not apply consensus diff because "
992 "it has an ed command that tries to insert zero lines.");
996 while (added_i
> added_end
) {
997 cdline_t
*added_line
= smartlist_get(diff
, added_i
--);
998 smartlist_add(cons2
, added_line
);
1003 /* Add remaining unchanged lines. */
1004 for (; j
> 0; --j
) {
1005 cdline_t
*cons_line
= smartlist_get(cons1
, j
-1);
1006 smartlist_add(cons2
, cons_line
);
1009 /* Reverse the whole thing since we did it from the end. */
1010 smartlist_reverse(cons2
);
1015 smartlist_free(cons2
);
1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
1021 * as smartlists. Will return NULL if the consensus diff could not be
1022 * generated. Neither of the two consensuses are modified in any way, so it's
1023 * up to the caller to free their resources.
1026 consdiff_gen_diff(const smartlist_t
*cons1
,
1027 const smartlist_t
*cons2
,
1028 const consensus_digest_t
*digests1
,
1029 const consensus_digest_t
*digests2
,
1032 smartlist_t
*ed_diff
= gen_ed_diff(cons1
, cons2
, area
);
1033 /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1038 /* See that the script actually produces what we want. */
1039 smartlist_t
*ed_cons2
= apply_ed_diff(cons1
, ed_diff
, 0);
1041 /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042 log_warn(LD_BUG
|LD_CONSDIFF
, "Refusing to generate consensus diff because "
1043 "the generated ed diff could not be tested to successfully generate "
1044 "the target consensus.");
1046 /* LCOV_EXCL_STOP */
1050 if (smartlist_len(cons2
) == smartlist_len(ed_cons2
)) {
1051 SMARTLIST_FOREACH_BEGIN(cons2
, const cdline_t
*, line1
) {
1052 const cdline_t
*line2
= smartlist_get(ed_cons2
, line1_sl_idx
);
1053 if (!lines_eq(line1
, line2
)) {
1057 } SMARTLIST_FOREACH_END(line1
);
1061 smartlist_free(ed_cons2
);
1063 /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064 log_warn(LD_BUG
|LD_CONSDIFF
, "Refusing to generate consensus diff because "
1065 "the generated ed diff did not generate the target consensus "
1066 "successfully when tested.");
1068 /* LCOV_EXCL_STOP */
1071 char cons1_hash_hex
[HEX_DIGEST256_LEN
+1];
1072 char cons2_hash_hex
[HEX_DIGEST256_LEN
+1];
1073 base16_encode(cons1_hash_hex
, HEX_DIGEST256_LEN
+1,
1074 (const char*)digests1
->sha3_256
, DIGEST256_LEN
);
1075 base16_encode(cons2_hash_hex
, HEX_DIGEST256_LEN
+1,
1076 (const char*)digests2
->sha3_256
, DIGEST256_LEN
);
1078 /* Create the resulting consensus diff. */
1080 smartlist_t
*result
= smartlist_new();
1081 tor_snprintf(buf
, sizeof(buf
), "%s", ns_diff_version
);
1082 smartlist_add_linecpy(result
, area
, buf
);
1083 tor_snprintf(buf
, sizeof(buf
), "%s %s %s", hash_token
,
1084 cons1_hash_hex
, cons2_hash_hex
);
1085 smartlist_add_linecpy(result
, area
, buf
);
1086 smartlist_add_all(result
, ed_diff
);
1087 smartlist_free(ed_diff
);
1093 /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094 smartlist_free(ed_diff
);
1095 /* LCOV_EXCL_STOP */
1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102 * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103 * length DIGEST256_LEN or larger if not NULL.
1106 consdiff_get_digests(const smartlist_t
*diff
,
1110 smartlist_t
*hash_words
= NULL
;
1111 const cdline_t
*format
;
1112 char cons1_hash
[DIGEST256_LEN
], cons2_hash
[DIGEST256_LEN
];
1113 char *cons1_hash_hex
, *cons2_hash_hex
;
1114 if (smartlist_len(diff
) < 2) {
1115 log_info(LD_CONSDIFF
, "The provided consensus diff is too short.");
1119 /* Check that it's the format and version we know. */
1120 format
= smartlist_get(diff
, 0);
1121 if (!line_str_eq(format
, ns_diff_version
)) {
1122 log_warn(LD_CONSDIFF
, "The provided consensus diff format is not known.");
1126 /* Grab the base16 digests. */
1127 hash_words
= smartlist_new();
1129 const cdline_t
*line2
= smartlist_get(diff
, 1);
1130 char *h
= tor_memdup_nulterm(line2
->s
, line2
->len
);
1131 smartlist_split_string(hash_words
, h
, " ", 0, 4);
1135 /* There have to be three words, the first of which must be hash_token. */
1136 if (smartlist_len(hash_words
) != 3 ||
1137 strcmp(smartlist_get(hash_words
, 0), hash_token
)) {
1138 log_info(LD_CONSDIFF
, "The provided consensus diff does not include "
1139 "the necessary digests.");
1143 /* Expected hashes as found in the consensus diff header. They must be of
1144 * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145 * If any of the decodings fail, error to make sure that the hashes are
1146 * proper base16-encoded digests.
1148 cons1_hash_hex
= smartlist_get(hash_words
, 1);
1149 cons2_hash_hex
= smartlist_get(hash_words
, 2);
1150 if (strlen(cons1_hash_hex
) != HEX_DIGEST256_LEN
||
1151 strlen(cons2_hash_hex
) != HEX_DIGEST256_LEN
) {
1152 log_info(LD_CONSDIFF
, "The provided consensus diff includes "
1153 "base16-encoded digests of incorrect size.");
1157 if (base16_decode(cons1_hash
, DIGEST256_LEN
,
1158 cons1_hash_hex
, HEX_DIGEST256_LEN
) != DIGEST256_LEN
||
1159 base16_decode(cons2_hash
, DIGEST256_LEN
,
1160 cons2_hash_hex
, HEX_DIGEST256_LEN
) != DIGEST256_LEN
) {
1161 log_info(LD_CONSDIFF
, "The provided consensus diff includes "
1162 "malformed digests.");
1167 memcpy(digest1_out
, cons1_hash
, DIGEST256_LEN
);
1170 memcpy(digest2_out
, cons2_hash
, DIGEST256_LEN
);
1173 SMARTLIST_FOREACH(hash_words
, char *, cp
, tor_free(cp
));
1174 smartlist_free(hash_words
);
1180 SMARTLIST_FOREACH(hash_words
, char *, cp
, tor_free(cp
));
1181 smartlist_free(hash_words
);
1186 /** Apply the consensus diff to the given consensus and return a new
1187 * consensus, also as a line-based smartlist. Will return NULL if the diff
1188 * could not be applied. Neither the consensus nor the diff are modified in
1189 * any way, so it's up to the caller to free their resources.
1192 consdiff_apply_diff(const smartlist_t
*cons1
,
1193 const smartlist_t
*diff
,
1194 const consensus_digest_t
*digests1
)
1196 smartlist_t
*cons2
= NULL
;
1197 char *cons2_str
= NULL
;
1198 char e_cons1_hash
[DIGEST256_LEN
];
1199 char e_cons2_hash
[DIGEST256_LEN
];
1201 if (consdiff_get_digests(diff
, e_cons1_hash
, e_cons2_hash
) != 0) {
1205 /* See that the consensus that was given to us matches its hash. */
1206 if (!consensus_digest_eq(digests1
->sha3_256
,
1207 (const uint8_t*)e_cons1_hash
)) {
1208 char hex_digest1
[HEX_DIGEST256_LEN
+1];
1209 char e_hex_digest1
[HEX_DIGEST256_LEN
+1];
1210 log_warn(LD_CONSDIFF
, "Refusing to apply consensus diff because "
1211 "the base consensus doesn't match the digest as found in "
1212 "the consensus diff header.");
1213 base16_encode(hex_digest1
, HEX_DIGEST256_LEN
+1,
1214 (const char *)digests1
->sha3_256
, DIGEST256_LEN
);
1215 base16_encode(e_hex_digest1
, HEX_DIGEST256_LEN
+1,
1216 e_cons1_hash
, DIGEST256_LEN
);
1217 log_warn(LD_CONSDIFF
, "Expected: %s; found: %s",
1218 hex_digest1
, e_hex_digest1
);
1222 /* Grab the ed diff and calculate the resulting consensus. */
1223 /* Skip the first two lines. */
1224 cons2
= apply_ed_diff(cons1
, diff
, 2);
1226 /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1231 cons2_str
= consensus_join_lines(cons2
);
1233 consensus_digest_t cons2_digests
;
1234 if (consensus_compute_digest(cons2_str
, strlen(cons2_str
),
1235 &cons2_digests
) < 0) {
1236 /* LCOV_EXCL_START -- digest can't fail */
1237 log_warn(LD_CONSDIFF
, "Could not compute digests of the consensus "
1238 "resulting from applying a consensus diff.");
1240 /* LCOV_EXCL_STOP */
1243 /* See that the resulting consensus matches its hash. */
1244 if (!consensus_digest_eq(cons2_digests
.sha3_256
,
1245 (const uint8_t*)e_cons2_hash
)) {
1246 log_warn(LD_CONSDIFF
, "Refusing to apply consensus diff because "
1247 "the resulting consensus doesn't match the digest as found in "
1248 "the consensus diff header.");
1249 char hex_digest2
[HEX_DIGEST256_LEN
+1];
1250 char e_hex_digest2
[HEX_DIGEST256_LEN
+1];
1251 base16_encode(hex_digest2
, HEX_DIGEST256_LEN
+1,
1252 (const char *)cons2_digests
.sha3_256
, DIGEST256_LEN
);
1253 base16_encode(e_hex_digest2
, HEX_DIGEST256_LEN
+1,
1254 e_cons2_hash
, DIGEST256_LEN
);
1255 log_warn(LD_CONSDIFF
, "Expected: %s; found: %s",
1256 hex_digest2
, e_hex_digest2
);
1263 tor_free(cons2_str
); /* Sets it to NULL */
1267 smartlist_free(cons2
);
1273 /** Any consensus line longer than this means that the input is invalid. */
1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
1277 * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278 * that line (without trailing newline) to <b>out</b>. Return -1 if there are
1279 * any non-NL terminated lines; 0 otherwise.
1281 * Unlike tor_split_lines, this function avoids ambiguity on its
1282 * handling of a final line that isn't NL-terminated.
1284 * All cdline_t objects are allocated in the provided memarea. Strings
1285 * are not copied: if <b>s</b> changes or becomes invalid, then all
1286 * generated cdlines will become invalid.
1289 consensus_split_lines(smartlist_t
*out
,
1290 const char *s
, size_t len
,
1293 const char *end_of_str
= s
+ len
;
1295 while (s
< end_of_str
) {
1296 const char *eol
= memchr(s
, '\n', end_of_str
- s
);
1298 /* File doesn't end with newline. */
1301 if (eol
- s
> CONSENSUS_LINE_MAX_LEN
) {
1302 /* Line is far too long. */
1305 cdline_t
*line
= memarea_alloc(area
, sizeof(cdline_t
));
1307 line
->len
= (uint32_t)(eol
- s
);
1308 smartlist_add(out
, line
);
1314 /** Given a list of cdline_t, return a newly allocated string containing
1315 * all of the lines, terminated with NL, concatenated.
1317 * Unlike smartlist_join_strings(), avoids lossy operations on empty
1320 consensus_join_lines(const smartlist_t
*inp
)
1323 SMARTLIST_FOREACH(inp
, const cdline_t
*, cdline
, n
+= cdline
->len
+ 1);
1325 char *result
= tor_malloc(n
);
1327 SMARTLIST_FOREACH_BEGIN(inp
, const cdline_t
*, cdline
) {
1328 memcpy(out
, cdline
->s
, cdline
->len
);
1331 } SMARTLIST_FOREACH_END(cdline
);
1333 tor_assert(out
== result
+n
);
1337 /** Given two consensus documents, try to compute a diff between them. On
1338 * success, return a newly allocated string containing that diff. On failure,
1341 consensus_diff_generate(const char *cons1
, size_t cons1len
,
1342 const char *cons2
, size_t cons2len
)
1344 consensus_digest_t d1
, d2
;
1345 smartlist_t
*lines1
= NULL
, *lines2
= NULL
, *result_lines
= NULL
;
1347 char *result
= NULL
;
1349 r1
= consensus_compute_digest_as_signed(cons1
, cons1len
, &d1
);
1350 r2
= consensus_compute_digest(cons2
, cons2len
, &d2
);
1351 if (BUG(r1
< 0 || r2
< 0))
1352 return NULL
; // LCOV_EXCL_LINE
1354 memarea_t
*area
= memarea_new();
1355 lines1
= smartlist_new();
1356 lines2
= smartlist_new();
1357 if (consensus_split_lines(lines1
, cons1
, cons1len
, area
) < 0)
1359 if (consensus_split_lines(lines2
, cons2
, cons2len
, area
) < 0)
1362 result_lines
= consdiff_gen_diff(lines1
, lines2
, &d1
, &d2
, area
);
1366 result
= consensus_join_lines(result_lines
);
1367 smartlist_free(result_lines
);
1370 memarea_drop_all(area
);
1371 smartlist_free(lines1
);
1372 smartlist_free(lines2
);
1377 /** Given a consensus document and a diff, try to apply the diff to the
1378 * consensus. On success return a newly allocated string containing the new
1379 * consensus. On failure, return NULL. */
1381 consensus_diff_apply(const char *consensus
,
1382 size_t consensus_len
,
1386 consensus_digest_t d1
;
1387 smartlist_t
*lines1
= NULL
, *lines2
= NULL
;
1389 char *result
= NULL
;
1390 memarea_t
*area
= memarea_new();
1392 r1
= consensus_compute_digest_as_signed(consensus
, consensus_len
, &d1
);
1396 lines1
= smartlist_new();
1397 lines2
= smartlist_new();
1398 if (consensus_split_lines(lines1
, consensus
, consensus_len
, area
) < 0)
1400 if (consensus_split_lines(lines2
, diff
, diff_len
, area
) < 0)
1403 result
= consdiff_apply_diff(lines1
, lines2
, &d1
);
1406 smartlist_free(lines1
);
1407 smartlist_free(lines2
);
1408 memarea_drop_all(area
);
1413 /** Return true iff, based on its header, <b>document</b> is likely
1414 * to be a consensus diff. */
1416 looks_like_a_consensus_diff(const char *document
, size_t len
)
1418 return (len
>= strlen(ns_diff_version
) &&
1419 fast_memeq(document
, ns_diff_version
, strlen(ns_diff_version
)));