1 /* Copyright (c) 2009-2011, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
7 #include "routerparse.h"
9 /** A data structure to hold a bunch of cached microdescriptors. There are
10 * two active files in the cache: a "cache file" that we mmap, and a "journal
11 * file" that we append to. Periodically, we rebuild the cache file to hold
12 * only the microdescriptors that we want to keep */
13 struct microdesc_cache_t
{
14 /** Map from sha256-digest to microdesc_t for every microdesc_t in the
16 HT_HEAD(microdesc_map
, microdesc_t
) map
;
18 /** Name of the cache file. */
20 /** Name of the journal file. */
22 /** Mmap'd contents of the cache file, or NULL if there is none. */
23 tor_mmap_t
*cache_content
;
24 /** Number of bytes used in the journal file. */
26 /** Number of bytes in descriptors removed as too old. */
29 /** Total bytes of microdescriptor bodies we have added to this cache */
30 uint64_t total_len_seen
;
31 /** Total number of microdescriptors we have added to this cache */
35 /** Helper: computes a hash of <b>md</b> to place it in a hash table. */
36 static INLINE
unsigned int
37 _microdesc_hash(microdesc_t
*md
)
39 unsigned *d
= (unsigned*)md
->digest
;
41 return d
[0] ^ d
[1] ^ d
[2] ^ d
[3] ^ d
[4] ^ d
[5] ^ d
[6] ^ d
[7];
43 return d
[0] ^ d
[1] ^ d
[2] ^ d
[3];
47 /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */
49 _microdesc_eq(microdesc_t
*a
, microdesc_t
*b
)
51 return tor_memeq(a
->digest
, b
->digest
, DIGEST256_LEN
);
54 HT_PROTOTYPE(microdesc_map
, microdesc_t
, node
,
55 _microdesc_hash
, _microdesc_eq
);
56 HT_GENERATE(microdesc_map
, microdesc_t
, node
,
57 _microdesc_hash
, _microdesc_eq
, 0.6,
58 malloc
, realloc
, free
);
60 /** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
61 * On success, return the total number of bytes written, and set
62 * *<b>annotation_len_out</b> to the number of bytes written as
65 dump_microdescriptor(FILE *f
, microdesc_t
*md
, size_t *annotation_len_out
)
69 /* XXXX drops unkown annotations. */
70 if (md
->last_listed
) {
71 char buf
[ISO_TIME_LEN
+1];
72 char annotation
[ISO_TIME_LEN
+32];
73 format_iso_time(buf
, md
->last_listed
);
74 tor_snprintf(annotation
, sizeof(annotation
), "@last-listed %s\n", buf
);
76 r
+= strlen(annotation
);
77 *annotation_len_out
= r
;
79 *annotation_len_out
= 0;
82 md
->off
= (off_t
) ftell(f
);
83 written
= fwrite(md
->body
, 1, md
->bodylen
, f
);
84 if (written
!= md
->bodylen
) {
86 "Couldn't dump microdescriptor (wrote %lu out of %lu): %s",
87 (unsigned long)written
, (unsigned long)md
->bodylen
,
95 /** Holds a pointer to the current microdesc_cache_t object, or NULL if no
96 * such object has been allocated. */
97 static microdesc_cache_t
*the_microdesc_cache
= NULL
;
99 /** Return a pointer to the microdescriptor cache, loading it if necessary. */
101 get_microdesc_cache(void)
103 if (PREDICT_UNLIKELY(the_microdesc_cache
==NULL
)) {
104 microdesc_cache_t
*cache
= tor_malloc_zero(sizeof(microdesc_cache_t
));
105 HT_INIT(microdesc_map
, &cache
->map
);
106 cache
->cache_fname
= get_datadir_fname("cached-microdescs");
107 cache
->journal_fname
= get_datadir_fname("cached-microdescs.new");
108 microdesc_cache_reload(cache
);
109 the_microdesc_cache
= cache
;
111 return the_microdesc_cache
;
114 /* There are three sources of microdescriptors:
115 1) Generated by us while acting as a directory authority.
116 2) Loaded from the cache on disk.
120 /** Decode the microdescriptors from the string starting at <b>s</b> and
121 * ending at <b>eos</b>, and store them in <b>cache</b>. If <b>no-save</b>,
122 * mark them as non-writable to disk. If <b>where</b> is SAVED_IN_CACHE,
123 * leave their bodies as pointers to the mmap'd cache. If where is
124 * <b>SAVED_NOWHERE</b>, do not allow annotations. Return a list of the added
125 * microdescriptors. */
127 microdescs_add_to_cache(microdesc_cache_t
*cache
,
128 const char *s
, const char *eos
, saved_location_t where
,
131 /*XXXX need an argument that sets last_listed as appropriate. */
133 smartlist_t
*descriptors
, *added
;
134 const int allow_annotations
= (where
!= SAVED_NOWHERE
);
135 const int copy_body
= (where
!= SAVED_IN_CACHE
);
137 descriptors
= microdescs_parse_from_string(s
, eos
,
141 added
= microdescs_add_list_to_cache(cache
, descriptors
, where
, no_save
);
142 smartlist_free(descriptors
);
146 /* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of
147 * a string to encode. Frees any members of <b>descriptors</b> that it does
150 microdescs_add_list_to_cache(microdesc_cache_t
*cache
,
151 smartlist_t
*descriptors
, saved_location_t where
,
155 open_file_t
*open_file
= NULL
;
160 if (where
== SAVED_NOWHERE
&& !no_save
) {
161 f
= start_writing_to_stdio_file(cache
->journal_fname
,
162 OPEN_FLAGS_APPEND
|O_BINARY
,
165 log_warn(LD_DIR
, "Couldn't append to journal in %s: %s",
166 cache
->journal_fname
, strerror(errno
));
171 added
= smartlist_create();
172 SMARTLIST_FOREACH_BEGIN(descriptors
, microdesc_t
*, md
) {
174 md2
= HT_FIND(microdesc_map
, &cache
->map
, md
);
176 /* We already had this one. */
177 if (md2
->last_listed
< md
->last_listed
)
178 md2
->last_listed
= md
->last_listed
;
180 if (where
!= SAVED_NOWHERE
)
181 cache
->bytes_dropped
+= size
;
185 /* Okay, it's a new one. */
187 size_t annotation_len
;
188 size
= dump_microdescriptor(f
, md
, &annotation_len
);
190 /* XXX handle errors from dump_microdescriptor() */
191 /* log? return -1? die? coredump the universe? */
194 md
->saved_location
= SAVED_IN_JOURNAL
;
195 cache
->journal_len
+= size
;
197 md
->saved_location
= where
;
200 md
->no_save
= no_save
;
202 HT_INSERT(microdesc_map
, &cache
->map
, md
);
203 smartlist_add(added
, md
);
205 cache
->total_len_seen
+= md
->bodylen
;
206 } SMARTLIST_FOREACH_END(md
);
209 finish_writing_to_file(open_file
); /*XXX Check me.*/
214 /** Remove every microdescriptor in <b>cache</b>. */
216 microdesc_cache_clear(microdesc_cache_t
*cache
)
218 microdesc_t
**entry
, **next
;
219 for (entry
= HT_START(microdesc_map
, &cache
->map
); entry
; entry
= next
) {
220 microdesc_t
*md
= *entry
;
221 next
= HT_NEXT_RMV(microdesc_map
, &cache
->map
, entry
);
224 HT_CLEAR(microdesc_map
, &cache
->map
);
225 if (cache
->cache_content
) {
226 tor_munmap_file(cache
->cache_content
);
227 cache
->cache_content
= NULL
;
229 cache
->total_len_seen
= 0;
231 cache
->bytes_dropped
= 0;
234 /** Reload the contents of <b>cache</b> from disk. If it is empty, load it
235 * for the first time. Return 0 on success, -1 on failure. */
237 microdesc_cache_reload(microdesc_cache_t
*cache
)
240 char *journal_content
;
245 microdesc_cache_clear(cache
);
247 mm
= cache
->cache_content
= tor_mmap_file(cache
->cache_fname
);
249 added
= microdescs_add_to_cache(cache
, mm
->data
, mm
->data
+mm
->size
,
252 total
+= smartlist_len(added
);
253 smartlist_free(added
);
257 journal_content
= read_file_to_str(cache
->journal_fname
,
258 RFTS_IGNORE_MISSING
, &st
);
259 if (journal_content
) {
260 cache
->journal_len
= (size_t) st
.st_size
;
261 added
= microdescs_add_to_cache(cache
, journal_content
,
262 journal_content
+st
.st_size
,
263 SAVED_IN_JOURNAL
, 0);
265 total
+= smartlist_len(added
);
266 smartlist_free(added
);
268 tor_free(journal_content
);
270 log_notice(LD_DIR
, "Reloaded microdescriptor cache. Found %d descriptors.",
273 microdesc_cache_rebuild(cache
, 0 /* don't force */);
278 /** By default, we remove any microdescriptors that have gone at least this
279 * long without appearing in a current consensus. */
280 #define TOLERATE_MICRODESC_AGE (7*24*60*60)
282 /** Remove all microdescriptors from <b>cache</b> that haven't been listed for
283 * a long time. Does not rebuild the cache on disk. If <b>cutoff</b> is
284 * positive, specifically remove microdescriptors that have been unlisted
285 * since <b>cutoff</b>. If <b>force</b> is true, remove microdescriptors even
286 * if we have no current live microdescriptor consensus.
289 microdesc_cache_clean(microdesc_cache_t
*cache
, time_t cutoff
, int force
)
291 microdesc_t
**mdp
, *victim
;
292 int dropped
=0, kept
=0;
293 size_t bytes_dropped
= 0;
294 time_t now
= time(NULL
);
297 /* In 0.2.2, we let this proceed unconditionally: only authorities have
298 * microdesc caches. */
301 cutoff
= now
- TOLERATE_MICRODESC_AGE
;
303 for (mdp
= HT_START(microdesc_map
, &cache
->map
); mdp
!= NULL
; ) {
304 if ((*mdp
)->last_listed
< cutoff
) {
307 mdp
= HT_NEXT_RMV(microdesc_map
, &cache
->map
, mdp
);
308 bytes_dropped
+= victim
->bodylen
;
309 microdesc_free(victim
);
312 mdp
= HT_NEXT(microdesc_map
, &cache
->map
, mdp
);
317 log_notice(LD_DIR
, "Removed %d/%d microdescriptors as old.",
318 dropped
,dropped
+kept
);
319 cache
->bytes_dropped
+= bytes_dropped
;
324 should_rebuild_md_cache(microdesc_cache_t
*cache
)
326 const size_t old_len
=
327 cache
->cache_content
? cache
->cache_content
->size
: 0;
328 const size_t journal_len
= cache
->journal_len
;
329 const size_t dropped
= cache
->bytes_dropped
;
331 if (journal_len
< 16384)
332 return 0; /* Don't bother, not enough has happened yet. */
333 if (dropped
> (journal_len
+ old_len
) / 3)
334 return 1; /* We could save 1/3 or more of the currently used space. */
335 if (journal_len
> old_len
/ 2)
336 return 1; /* We should append to the regular file */
341 /** Regenerate the main cache file for <b>cache</b>, clear the journal file,
342 * and update every microdesc_t in the cache with pointers to its new
343 * location. If <b>force</b> is true, do this unconditionally. If
344 * <b>force</b> is false, do it only if we expect to save space on disk. */
346 microdesc_cache_rebuild(microdesc_cache_t
*cache
, int force
)
348 open_file_t
*open_file
;
354 int orig_size
, new_size
;
357 cache
= the_microdesc_cache
;
362 /* Remove dead descriptors */
363 microdesc_cache_clean(cache
, 0/*cutoff*/, 0/*force*/);
365 if (!force
&& !should_rebuild_md_cache(cache
))
368 log_info(LD_DIR
, "Rebuilding the microdescriptor cache...");
370 orig_size
= (int)(cache
->cache_content
? cache
->cache_content
->size
: 0);
371 orig_size
+= (int)cache
->journal_len
;
373 f
= start_writing_to_stdio_file(cache
->cache_fname
,
374 OPEN_FLAGS_REPLACE
|O_BINARY
,
379 wrote
= smartlist_create();
381 HT_FOREACH(mdp
, microdesc_map
, &cache
->map
) {
382 microdesc_t
*md
= *mdp
;
383 size_t annotation_len
;
387 size
= dump_microdescriptor(f
, md
, &annotation_len
);
389 /* XXX handle errors from dump_microdescriptor() */
390 /* log? return -1? die? coredump the universe? */
393 md
->off
= off
+ annotation_len
;
395 if (md
->saved_location
!= SAVED_IN_CACHE
) {
397 md
->saved_location
= SAVED_IN_CACHE
;
399 smartlist_add(wrote
, md
);
402 if (cache
->cache_content
)
403 tor_munmap_file(cache
->cache_content
);
405 finish_writing_to_file(open_file
); /*XXX Check me.*/
407 cache
->cache_content
= tor_mmap_file(cache
->cache_fname
);
409 if (!cache
->cache_content
&& smartlist_len(wrote
)) {
410 log_err(LD_DIR
, "Couldn't map file that we just wrote to %s!",
412 smartlist_free(wrote
);
415 SMARTLIST_FOREACH_BEGIN(wrote
, microdesc_t
*, md
) {
416 tor_assert(md
->saved_location
== SAVED_IN_CACHE
);
417 md
->body
= (char*)cache
->cache_content
->data
+ md
->off
;
418 tor_assert(fast_memeq(md
->body
, "onion-key", 9));
419 } SMARTLIST_FOREACH_END(md
);
421 smartlist_free(wrote
);
423 write_str_to_file(cache
->journal_fname
, "", 1);
424 cache
->journal_len
= 0;
425 cache
->bytes_dropped
= 0;
427 new_size
= cache
->cache_content
? (int)cache
->cache_content
->size
: 0;
428 log_info(LD_DIR
, "Done rebuilding microdesc cache. "
429 "Saved %d bytes; %d still used.",
430 orig_size
-new_size
, new_size
);
435 /** Deallocate a single microdescriptor. Note: the microdescriptor MUST have
436 * previously been removed from the cache if it had ever been inserted. */
438 microdesc_free(microdesc_t
*md
)
442 /* Must be removed from hash table! */
444 crypto_free_pk_env(md
->onion_pkey
);
445 if (md
->body
&& md
->saved_location
!= SAVED_IN_CACHE
)
449 SMARTLIST_FOREACH(md
->family
, char *, cp
, tor_free(cp
));
450 smartlist_free(md
->family
);
452 tor_free(md
->exitsummary
);
457 /** Free all storage held in the microdesc.c module. */
459 microdesc_free_all(void)
461 if (the_microdesc_cache
) {
462 microdesc_cache_clear(the_microdesc_cache
);
463 tor_free(the_microdesc_cache
->cache_fname
);
464 tor_free(the_microdesc_cache
->journal_fname
);
465 tor_free(the_microdesc_cache
);
469 /** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
470 * <b>d</b>, return it. Otherwise return NULL. */
472 microdesc_cache_lookup_by_digest256(microdesc_cache_t
*cache
, const char *d
)
474 microdesc_t
*md
, search
;
476 cache
= get_microdesc_cache();
477 memcpy(search
.digest
, d
, DIGEST256_LEN
);
478 md
= HT_FIND(microdesc_map
, &cache
->map
, &search
);
482 /** Return the mean size of decriptors added to <b>cache</b> since it was last
483 * cleared. Used to estimate the size of large downloads. */
485 microdesc_average_size(microdesc_cache_t
*cache
)
488 cache
= get_microdesc_cache();
491 return (size_t)(cache
->total_len_seen
/ cache
->n_seen
);