Merge remote-tracking branch 'public/bug3122_memcmp_022' into maint-0.2.2
[tor.git] / src / or / microdesc.c
blob7c67d51448d706f62239cad51aa4fa41d368a406
1 /* Copyright (c) 2009-2011, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 #include "or.h"
5 #include "config.h"
6 #include "microdesc.h"
7 #include "routerparse.h"
9 /** A data structure to hold a bunch of cached microdescriptors. There are
10 * two active files in the cache: a "cache file" that we mmap, and a "journal
11 * file" that we append to. Periodically, we rebuild the cache file to hold
12 * only the microdescriptors that we want to keep */
13 struct microdesc_cache_t {
14 /** Map from sha256-digest to microdesc_t for every microdesc_t in the
15 * cache. */
16 HT_HEAD(microdesc_map, microdesc_t) map;
18 /** Name of the cache file. */
19 char *cache_fname;
20 /** Name of the journal file. */
21 char *journal_fname;
22 /** Mmap'd contents of the cache file, or NULL if there is none. */
23 tor_mmap_t *cache_content;
24 /** Number of bytes used in the journal file. */
25 size_t journal_len;
26 /** Number of bytes in descriptors removed as too old. */
27 size_t bytes_dropped;
29 /** Total bytes of microdescriptor bodies we have added to this cache */
30 uint64_t total_len_seen;
31 /** Total number of microdescriptors we have added to this cache */
32 unsigned n_seen;
35 /** Helper: computes a hash of <b>md</b> to place it in a hash table. */
36 static INLINE unsigned int
37 _microdesc_hash(microdesc_t *md)
39 unsigned *d = (unsigned*)md->digest;
40 #if SIZEOF_INT == 4
41 return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
42 #else
43 return d[0] ^ d[1] ^ d[2] ^ d[3];
44 #endif
47 /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */
48 static INLINE int
49 _microdesc_eq(microdesc_t *a, microdesc_t *b)
51 return tor_memeq(a->digest, b->digest, DIGEST256_LEN);
54 HT_PROTOTYPE(microdesc_map, microdesc_t, node,
55 _microdesc_hash, _microdesc_eq);
56 HT_GENERATE(microdesc_map, microdesc_t, node,
57 _microdesc_hash, _microdesc_eq, 0.6,
58 malloc, realloc, free);
60 /** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
61 * On success, return the total number of bytes written, and set
62 * *<b>annotation_len_out</b> to the number of bytes written as
63 * annotations. */
64 static ssize_t
65 dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out)
67 ssize_t r = 0;
68 size_t written;
69 /* XXXX drops unkown annotations. */
70 if (md->last_listed) {
71 char buf[ISO_TIME_LEN+1];
72 char annotation[ISO_TIME_LEN+32];
73 format_iso_time(buf, md->last_listed);
74 tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf);
75 fputs(annotation, f);
76 r += strlen(annotation);
77 *annotation_len_out = r;
78 } else {
79 *annotation_len_out = 0;
82 md->off = (off_t) ftell(f);
83 written = fwrite(md->body, 1, md->bodylen, f);
84 if (written != md->bodylen) {
85 log_warn(LD_DIR,
86 "Couldn't dump microdescriptor (wrote %lu out of %lu): %s",
87 (unsigned long)written, (unsigned long)md->bodylen,
88 strerror(ferror(f)));
89 return -1;
91 r += md->bodylen;
92 return r;
95 /** Holds a pointer to the current microdesc_cache_t object, or NULL if no
96 * such object has been allocated. */
97 static microdesc_cache_t *the_microdesc_cache = NULL;
99 /** Return a pointer to the microdescriptor cache, loading it if necessary. */
100 microdesc_cache_t *
101 get_microdesc_cache(void)
103 if (PREDICT_UNLIKELY(the_microdesc_cache==NULL)) {
104 microdesc_cache_t *cache = tor_malloc_zero(sizeof(microdesc_cache_t));
105 HT_INIT(microdesc_map, &cache->map);
106 cache->cache_fname = get_datadir_fname("cached-microdescs");
107 cache->journal_fname = get_datadir_fname("cached-microdescs.new");
108 microdesc_cache_reload(cache);
109 the_microdesc_cache = cache;
111 return the_microdesc_cache;
114 /* There are three sources of microdescriptors:
115 1) Generated by us while acting as a directory authority.
116 2) Loaded from the cache on disk.
117 3) Downloaded.
120 /** Decode the microdescriptors from the string starting at <b>s</b> and
121 * ending at <b>eos</b>, and store them in <b>cache</b>. If <b>no-save</b>,
122 * mark them as non-writable to disk. If <b>where</b> is SAVED_IN_CACHE,
123 * leave their bodies as pointers to the mmap'd cache. If where is
124 * <b>SAVED_NOWHERE</b>, do not allow annotations. Return a list of the added
125 * microdescriptors. */
126 smartlist_t *
127 microdescs_add_to_cache(microdesc_cache_t *cache,
128 const char *s, const char *eos, saved_location_t where,
129 int no_save)
131 /*XXXX need an argument that sets last_listed as appropriate. */
133 smartlist_t *descriptors, *added;
134 const int allow_annotations = (where != SAVED_NOWHERE);
135 const int copy_body = (where != SAVED_IN_CACHE);
137 descriptors = microdescs_parse_from_string(s, eos,
138 allow_annotations,
139 copy_body);
141 added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
142 smartlist_free(descriptors);
143 return added;
146 /* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of
147 * a string to encode. Frees any members of <b>descriptors</b> that it does
148 * not add. */
149 smartlist_t *
150 microdescs_add_list_to_cache(microdesc_cache_t *cache,
151 smartlist_t *descriptors, saved_location_t where,
152 int no_save)
154 smartlist_t *added;
155 open_file_t *open_file = NULL;
156 FILE *f = NULL;
157 // int n_added = 0;
158 ssize_t size = 0;
160 if (where == SAVED_NOWHERE && !no_save) {
161 f = start_writing_to_stdio_file(cache->journal_fname,
162 OPEN_FLAGS_APPEND|O_BINARY,
163 0600, &open_file);
164 if (!f) {
165 log_warn(LD_DIR, "Couldn't append to journal in %s: %s",
166 cache->journal_fname, strerror(errno));
167 return NULL;
171 added = smartlist_create();
172 SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
173 microdesc_t *md2;
174 md2 = HT_FIND(microdesc_map, &cache->map, md);
175 if (md2) {
176 /* We already had this one. */
177 if (md2->last_listed < md->last_listed)
178 md2->last_listed = md->last_listed;
179 microdesc_free(md);
180 if (where != SAVED_NOWHERE)
181 cache->bytes_dropped += size;
182 continue;
185 /* Okay, it's a new one. */
186 if (f) {
187 size_t annotation_len;
188 size = dump_microdescriptor(f, md, &annotation_len);
189 if (size < 0) {
190 /* XXX handle errors from dump_microdescriptor() */
191 /* log? return -1? die? coredump the universe? */
192 continue;
194 md->saved_location = SAVED_IN_JOURNAL;
195 cache->journal_len += size;
196 } else {
197 md->saved_location = where;
200 md->no_save = no_save;
202 HT_INSERT(microdesc_map, &cache->map, md);
203 smartlist_add(added, md);
204 ++cache->n_seen;
205 cache->total_len_seen += md->bodylen;
206 } SMARTLIST_FOREACH_END(md);
208 if (f)
209 finish_writing_to_file(open_file); /*XXX Check me.*/
211 return added;
214 /** Remove every microdescriptor in <b>cache</b>. */
215 void
216 microdesc_cache_clear(microdesc_cache_t *cache)
218 microdesc_t **entry, **next;
219 for (entry = HT_START(microdesc_map, &cache->map); entry; entry = next) {
220 microdesc_t *md = *entry;
221 next = HT_NEXT_RMV(microdesc_map, &cache->map, entry);
222 microdesc_free(md);
224 HT_CLEAR(microdesc_map, &cache->map);
225 if (cache->cache_content) {
226 tor_munmap_file(cache->cache_content);
227 cache->cache_content = NULL;
229 cache->total_len_seen = 0;
230 cache->n_seen = 0;
231 cache->bytes_dropped = 0;
234 /** Reload the contents of <b>cache</b> from disk. If it is empty, load it
235 * for the first time. Return 0 on success, -1 on failure. */
237 microdesc_cache_reload(microdesc_cache_t *cache)
239 struct stat st;
240 char *journal_content;
241 smartlist_t *added;
242 tor_mmap_t *mm;
243 int total = 0;
245 microdesc_cache_clear(cache);
247 mm = cache->cache_content = tor_mmap_file(cache->cache_fname);
248 if (mm) {
249 added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size,
250 SAVED_IN_CACHE, 0);
251 if (added) {
252 total += smartlist_len(added);
253 smartlist_free(added);
257 journal_content = read_file_to_str(cache->journal_fname,
258 RFTS_IGNORE_MISSING, &st);
259 if (journal_content) {
260 cache->journal_len = (size_t) st.st_size;
261 added = microdescs_add_to_cache(cache, journal_content,
262 journal_content+st.st_size,
263 SAVED_IN_JOURNAL, 0);
264 if (added) {
265 total += smartlist_len(added);
266 smartlist_free(added);
268 tor_free(journal_content);
270 log_notice(LD_DIR, "Reloaded microdescriptor cache. Found %d descriptors.",
271 total);
273 microdesc_cache_rebuild(cache, 0 /* don't force */);
275 return 0;
278 /** By default, we remove any microdescriptors that have gone at least this
279 * long without appearing in a current consensus. */
280 #define TOLERATE_MICRODESC_AGE (7*24*60*60)
282 /** Remove all microdescriptors from <b>cache</b> that haven't been listed for
283 * a long time. Does not rebuild the cache on disk. If <b>cutoff</b> is
284 * positive, specifically remove microdescriptors that have been unlisted
285 * since <b>cutoff</b>. If <b>force</b> is true, remove microdescriptors even
286 * if we have no current live microdescriptor consensus.
288 void
289 microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
291 microdesc_t **mdp, *victim;
292 int dropped=0, kept=0;
293 size_t bytes_dropped = 0;
294 time_t now = time(NULL);
296 (void) force;
297 /* In 0.2.2, we let this proceed unconditionally: only authorities have
298 * microdesc caches. */
300 if (cutoff <= 0)
301 cutoff = now - TOLERATE_MICRODESC_AGE;
303 for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) {
304 if ((*mdp)->last_listed < cutoff) {
305 ++dropped;
306 victim = *mdp;
307 mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp);
308 bytes_dropped += victim->bodylen;
309 microdesc_free(victim);
310 } else {
311 ++kept;
312 mdp = HT_NEXT(microdesc_map, &cache->map, mdp);
316 if (dropped) {
317 log_notice(LD_DIR, "Removed %d/%d microdescriptors as old.",
318 dropped,dropped+kept);
319 cache->bytes_dropped += bytes_dropped;
323 static int
324 should_rebuild_md_cache(microdesc_cache_t *cache)
326 const size_t old_len =
327 cache->cache_content ? cache->cache_content->size : 0;
328 const size_t journal_len = cache->journal_len;
329 const size_t dropped = cache->bytes_dropped;
331 if (journal_len < 16384)
332 return 0; /* Don't bother, not enough has happened yet. */
333 if (dropped > (journal_len + old_len) / 3)
334 return 1; /* We could save 1/3 or more of the currently used space. */
335 if (journal_len > old_len / 2)
336 return 1; /* We should append to the regular file */
338 return 0;
341 /** Regenerate the main cache file for <b>cache</b>, clear the journal file,
342 * and update every microdesc_t in the cache with pointers to its new
343 * location. If <b>force</b> is true, do this unconditionally. If
344 * <b>force</b> is false, do it only if we expect to save space on disk. */
346 microdesc_cache_rebuild(microdesc_cache_t *cache, int force)
348 open_file_t *open_file;
349 FILE *f;
350 microdesc_t **mdp;
351 smartlist_t *wrote;
352 ssize_t size;
353 off_t off = 0;
354 int orig_size, new_size;
356 if (cache == NULL) {
357 cache = the_microdesc_cache;
358 if (cache == NULL)
359 return 0;
362 /* Remove dead descriptors */
363 microdesc_cache_clean(cache, 0/*cutoff*/, 0/*force*/);
365 if (!force && !should_rebuild_md_cache(cache))
366 return 0;
368 log_info(LD_DIR, "Rebuilding the microdescriptor cache...");
370 orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0);
371 orig_size += (int)cache->journal_len;
373 f = start_writing_to_stdio_file(cache->cache_fname,
374 OPEN_FLAGS_REPLACE|O_BINARY,
375 0600, &open_file);
376 if (!f)
377 return -1;
379 wrote = smartlist_create();
381 HT_FOREACH(mdp, microdesc_map, &cache->map) {
382 microdesc_t *md = *mdp;
383 size_t annotation_len;
384 if (md->no_save)
385 continue;
387 size = dump_microdescriptor(f, md, &annotation_len);
388 if (size < 0) {
389 /* XXX handle errors from dump_microdescriptor() */
390 /* log? return -1? die? coredump the universe? */
391 continue;
393 md->off = off + annotation_len;
394 off += size;
395 if (md->saved_location != SAVED_IN_CACHE) {
396 tor_free(md->body);
397 md->saved_location = SAVED_IN_CACHE;
399 smartlist_add(wrote, md);
402 if (cache->cache_content)
403 tor_munmap_file(cache->cache_content);
405 finish_writing_to_file(open_file); /*XXX Check me.*/
407 cache->cache_content = tor_mmap_file(cache->cache_fname);
409 if (!cache->cache_content && smartlist_len(wrote)) {
410 log_err(LD_DIR, "Couldn't map file that we just wrote to %s!",
411 cache->cache_fname);
412 smartlist_free(wrote);
413 return -1;
415 SMARTLIST_FOREACH_BEGIN(wrote, microdesc_t *, md) {
416 tor_assert(md->saved_location == SAVED_IN_CACHE);
417 md->body = (char*)cache->cache_content->data + md->off;
418 tor_assert(fast_memeq(md->body, "onion-key", 9));
419 } SMARTLIST_FOREACH_END(md);
421 smartlist_free(wrote);
423 write_str_to_file(cache->journal_fname, "", 1);
424 cache->journal_len = 0;
425 cache->bytes_dropped = 0;
427 new_size = cache->cache_content ? (int)cache->cache_content->size : 0;
428 log_info(LD_DIR, "Done rebuilding microdesc cache. "
429 "Saved %d bytes; %d still used.",
430 orig_size-new_size, new_size);
432 return 0;
435 /** Deallocate a single microdescriptor. Note: the microdescriptor MUST have
436 * previously been removed from the cache if it had ever been inserted. */
437 void
438 microdesc_free(microdesc_t *md)
440 if (!md)
441 return;
442 /* Must be removed from hash table! */
443 if (md->onion_pkey)
444 crypto_free_pk_env(md->onion_pkey);
445 if (md->body && md->saved_location != SAVED_IN_CACHE)
446 tor_free(md->body);
448 if (md->family) {
449 SMARTLIST_FOREACH(md->family, char *, cp, tor_free(cp));
450 smartlist_free(md->family);
452 tor_free(md->exitsummary);
454 tor_free(md);
457 /** Free all storage held in the microdesc.c module. */
458 void
459 microdesc_free_all(void)
461 if (the_microdesc_cache) {
462 microdesc_cache_clear(the_microdesc_cache);
463 tor_free(the_microdesc_cache->cache_fname);
464 tor_free(the_microdesc_cache->journal_fname);
465 tor_free(the_microdesc_cache);
469 /** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
470 * <b>d</b>, return it. Otherwise return NULL. */
471 microdesc_t *
472 microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d)
474 microdesc_t *md, search;
475 if (!cache)
476 cache = get_microdesc_cache();
477 memcpy(search.digest, d, DIGEST256_LEN);
478 md = HT_FIND(microdesc_map, &cache->map, &search);
479 return md;
482 /** Return the mean size of decriptors added to <b>cache</b> since it was last
483 * cleared. Used to estimate the size of large downloads. */
484 size_t
485 microdesc_average_size(microdesc_cache_t *cache)
487 if (!cache)
488 cache = get_microdesc_cache();
489 if (!cache->n_seen)
490 return 512;
491 return (size_t)(cache->total_len_seen / cache->n_seen);