1 /* Copyright (c) 2001-2004, Roger Dingledine.
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2019, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
6 #include "core/or/or.h"
8 #include "app/config/config.h"
9 #include "core/mainloop/connection.h"
10 #include "feature/dircache/dircache.h"
11 #include "feature/dircache/dirserv.h"
12 #include "feature/dirclient/dirclient.h"
13 #include "feature/dircommon/directory.h"
14 #include "feature/dircommon/fp_pair.h"
15 #include "feature/stats/geoip_stats.h"
16 #include "lib/compress/compress.h"
18 #include "feature/dircommon/dir_connection_st.h"
19 #include "feature/nodelist/routerinfo_st.h"
23 * \brief Code to send and fetch information from directory authorities and
26 * Directory caches and authorities use dirserv.c to generate the results of a
27 * query and stream them to the connection; clients use routerparse.c to parse
30 * Every directory request has a dir_connection_t on the client side and on
31 * the server side. In most cases, the dir_connection_t object is a linked
32 * connection, tunneled through an edge_connection_t so that it can be a
33 * stream on the Tor network. The only non-tunneled connections are those
34 * that are used to upload material (descriptors and votes) to authorities.
35 * Among tunneled connections, some use one-hop circuits, and others use
36 * multi-hop circuits for anonymity.
38 * Directory requests are launched by calling
39 * directory_initiate_request(). This
40 * launch the connection, will construct an HTTP request with
41 * directory_send_command(), send the and wait for a response. The client
42 * later handles the response with connection_dir_client_reached_eof(),
43 * which passes the information received to another part of Tor.
45 * On the server side, requests are read in directory_handle_command(),
46 * which dispatches first on the request type (GET or POST), and then on
47 * the URL requested. GET requests are processed with a table-based
48 * dispatcher in url_table[]. The process of handling larger GET requests
49 * is complicated because we need to avoid allocating a copy of all the
50 * data to be sent to the client in one huge buffer. Instead, we spool the
51 * data into the buffer using logic in connection_dirserv_flushed_some() in
52 * dirserv.c. (TODO: If we extended buf.c to have a zero-copy
53 * reference-based buffer type, we could remove most of that code, at the
54 * cost of a bit more reference counting.)
57 /* In-points to directory.c:
59 * - directory_post_to_dirservers(), called from
60 * router_upload_dir_desc_to_dirservers() in router.c
61 * upload_service_descriptor() in rendservice.c
62 * - directory_get_from_dirserver(), called from
63 * rend_client_refetch_renddesc() in rendclient.c
64 * run_scheduled_events() in main.c
66 * - connection_dir_process_inbuf(), called from
67 * connection_process_inbuf() in connection.c
68 * - connection_dir_finished_flushing(), called from
69 * connection_finished_flushing() in connection.c
70 * - connection_dir_finished_connecting(), called from
71 * connection_finished_connecting() in connection.c
74 /** Convert a connection_t* to a dir_connection_t*; assert if the cast is
77 TO_DIR_CONN(connection_t
*c
)
79 tor_assert(c
->magic
== DIR_CONNECTION_MAGIC
);
80 return DOWNCAST(dir_connection_t
, c
);
83 /** Return false if the directory purpose <b>dir_purpose</b>
84 * does not require an anonymous (three-hop) connection.
86 * Return true 1) by default, 2) if all directory actions have
87 * specifically been configured to be over an anonymous connection,
88 * or 3) if the router is a bridge */
90 purpose_needs_anonymity(uint8_t dir_purpose
, uint8_t router_purpose
,
93 if (get_options()->AllDirActionsPrivate
)
96 if (router_purpose
== ROUTER_PURPOSE_BRIDGE
) {
97 if (dir_purpose
== DIR_PURPOSE_FETCH_SERVERDESC
98 && resource
&& !strcmp(resource
, "authority.z")) {
99 /* We are asking a bridge for its own descriptor. That doesn't need
103 /* Assume all other bridge stuff needs anonymity. */
104 return 1; /* if no circuits yet, this might break bootstrapping, but it's
105 * needed to be safe. */
110 case DIR_PURPOSE_UPLOAD_DIR
:
111 case DIR_PURPOSE_UPLOAD_VOTE
:
112 case DIR_PURPOSE_UPLOAD_SIGNATURES
:
113 case DIR_PURPOSE_FETCH_STATUS_VOTE
:
114 case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES
:
115 case DIR_PURPOSE_FETCH_CONSENSUS
:
116 case DIR_PURPOSE_FETCH_CERTIFICATE
:
117 case DIR_PURPOSE_FETCH_SERVERDESC
:
118 case DIR_PURPOSE_FETCH_EXTRAINFO
:
119 case DIR_PURPOSE_FETCH_MICRODESC
:
121 case DIR_PURPOSE_HAS_FETCHED_HSDESC
:
122 case DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2
:
123 case DIR_PURPOSE_UPLOAD_RENDDESC_V2
:
124 case DIR_PURPOSE_FETCH_RENDDESC_V2
:
125 case DIR_PURPOSE_FETCH_HSDESC
:
126 case DIR_PURPOSE_UPLOAD_HSDESC
:
128 case DIR_PURPOSE_SERVER
:
130 log_warn(LD_BUG
, "Called with dir_purpose=%d, router_purpose=%d",
131 dir_purpose
, router_purpose
);
132 tor_assert_nonfatal_unreached();
133 return 1; /* Assume it needs anonymity; better safe than sorry. */
137 /** Return a newly allocated string describing <b>auth</b>. Only describes
138 * authority features. */
140 authdir_type_to_string(dirinfo_type_t auth
)
143 smartlist_t
*lst
= smartlist_new();
144 if (auth
& V3_DIRINFO
)
145 smartlist_add(lst
, (void*)"V3");
146 if (auth
& BRIDGE_DIRINFO
)
147 smartlist_add(lst
, (void*)"Bridge");
148 if (smartlist_len(lst
)) {
149 result
= smartlist_join_strings(lst
, ", ", 0, NULL
);
151 result
= tor_strdup("[Not an authority]");
157 /** Return true iff anything we say on <b>conn</b> is being encrypted before
158 * we send it to the client/server. */
160 connection_dir_is_encrypted(const dir_connection_t
*conn
)
162 /* Right now it's sufficient to see if conn is or has been linked, since
163 * the only thing it could be linked to is an edge connection on a
164 * circuit, and the only way it could have been unlinked is at the edge
165 * connection getting closed.
167 return TO_CONN(conn
)->linked
;
170 /** Parse an HTTP request line at the start of a headers string. On failure,
171 * return -1. On success, set *<b>command_out</b> to a copy of the HTTP
172 * command ("get", "post", etc), set *<b>url_out</b> to a copy of the URL, and
175 parse_http_command(const char *headers
, char **command_out
, char **url_out
)
177 const char *command
, *end_of_command
;
178 char *s
, *start
, *tmp
;
180 s
= (char *)eat_whitespace_no_nl(headers
);
183 s
= (char *)find_whitespace(s
); /* get past GET/POST */
186 s
= (char *)eat_whitespace_no_nl(s
);
188 start
= s
; /* this is the URL, assuming it's valid */
189 s
= (char *)find_whitespace(start
);
192 /* tolerate the http[s] proxy style of putting the hostname in the url */
193 if (s
-start
>= 4 && !strcmpstart(start
,"http")) {
197 if (s
-tmp
>= 3 && !strcmpstart(tmp
,"://")) {
198 tmp
= strchr(tmp
+3, '/');
199 if (tmp
&& tmp
< s
) {
200 log_debug(LD_DIR
,"Skipping over 'http[s]://hostname/' string");
206 /* Check if the header is well formed (next sequence
207 * should be HTTP/1.X\r\n). Assumes we're supporting 1.0? */
211 char *e
= (char *)eat_whitespace_no_nl(s
);
212 if (2 != tor_sscanf(e
, "HTTP/1.%u%c", &minor_ver
, &ch
)) {
219 *url_out
= tor_memdup_nulterm(start
, s
-start
);
220 *command_out
= tor_memdup_nulterm(command
, end_of_command
- command
);
224 /** Return a copy of the first HTTP header in <b>headers</b> whose key is
225 * <b>which</b>. The key should be given with a terminating colon and space;
226 * this function copies everything after, up to but not including the
227 * following \\r\\n. */
229 http_get_header(const char *headers
, const char *which
)
231 const char *cp
= headers
;
233 if (!strcasecmpstart(cp
, which
)) {
236 if ((eos
= strchr(cp
,'\r')))
237 return tor_strndup(cp
, eos
-cp
);
239 return tor_strdup(cp
);
241 cp
= strchr(cp
, '\n');
247 /** Parse an HTTP response string <b>headers</b> of the form
249 * "HTTP/1.\%d \%d\%s\r\n...".
252 * If it's well-formed, assign the status code to *<b>code</b> and
253 * return 0. Otherwise, return -1.
255 * On success: If <b>date</b> is provided, set *date to the Date
256 * header in the http headers, or 0 if no such header is found. If
257 * <b>compression</b> is provided, set *<b>compression</b> to the
258 * compression method given in the Content-Encoding header, or 0 if no
259 * such header is found, or -1 if the value of the header is not
260 * recognized. If <b>reason</b> is provided, strdup the reason string
264 parse_http_response(const char *headers
, int *code
, time_t *date
,
265 compress_method_t
*compression
, char **reason
)
268 char datestr
[RFC1123_TIME_LEN
+1];
269 smartlist_t
*parsed_headers
;
273 while (TOR_ISSPACE(*headers
)) headers
++; /* tolerate leading whitespace */
275 if (tor_sscanf(headers
, "HTTP/1.%u %u", &n1
, &n2
) < 2 ||
276 (n1
!= 0 && n1
!= 1) ||
277 (n2
< 100 || n2
>= 600)) {
278 log_warn(LD_HTTP
,"Failed to parse header %s",escaped(headers
));
283 parsed_headers
= smartlist_new();
284 smartlist_split_string(parsed_headers
, headers
, "\n",
285 SPLIT_SKIP_SPACE
|SPLIT_IGNORE_BLANK
, -1);
287 smartlist_t
*status_line_elements
= smartlist_new();
288 tor_assert(smartlist_len(parsed_headers
));
289 smartlist_split_string(status_line_elements
,
290 smartlist_get(parsed_headers
, 0),
291 " ", SPLIT_SKIP_SPACE
|SPLIT_IGNORE_BLANK
, 3);
292 tor_assert(smartlist_len(status_line_elements
) <= 3);
293 if (smartlist_len(status_line_elements
) == 3) {
294 *reason
= smartlist_get(status_line_elements
, 2);
295 smartlist_set(status_line_elements
, 2, NULL
); /* Prevent free */
297 SMARTLIST_FOREACH(status_line_elements
, char *, cp
, tor_free(cp
));
298 smartlist_free(status_line_elements
);
302 SMARTLIST_FOREACH(parsed_headers
, const char *, s
,
303 if (!strcmpstart(s
, "Date: ")) {
304 strlcpy(datestr
, s
+6, sizeof(datestr
));
305 /* This will do nothing on failure, so we don't need to check
306 the result. We shouldn't warn, since there are many other valid
307 date formats besides the one we use. */
308 parse_rfc1123_time(datestr
, date
);
313 const char *enc
= NULL
;
314 SMARTLIST_FOREACH(parsed_headers
, const char *, s
,
315 if (!strcmpstart(s
, "Content-Encoding: ")) {
320 *compression
= NO_METHOD
;
322 *compression
= compression_method_get_by_name(enc
);
324 if (*compression
== UNKNOWN_METHOD
)
325 log_info(LD_HTTP
, "Unrecognized content encoding: %s. Trying to deal.",
329 SMARTLIST_FOREACH(parsed_headers
, char *, s
, tor_free(s
));
330 smartlist_free(parsed_headers
);
335 /** If any directory object is arriving, and it's over 10MB large, we're
336 * getting DoS'd. (As of 0.1.2.x, raw directories are about 1MB, and we never
337 * ask for more than 96 router descriptors at a time.)
339 #define MAX_DIRECTORY_OBJECT_SIZE (10*(1<<20))
341 #define MAX_VOTE_DL_SIZE (MAX_DIRECTORY_OBJECT_SIZE * 5)
343 /** Read handler for directory connections. (That's connections <em>to</em>
344 * directory servers and connections <em>at</em> directory servers.)
347 connection_dir_process_inbuf(dir_connection_t
*conn
)
351 tor_assert(conn
->base_
.type
== CONN_TYPE_DIR
);
353 /* Directory clients write, then read data until they receive EOF;
354 * directory servers read data until they get an HTTP command, then
355 * write their response (when it's finished flushing, they mark for
359 /* If we're on the dirserver side, look for a command. */
360 if (conn
->base_
.state
== DIR_CONN_STATE_SERVER_COMMAND_WAIT
) {
361 if (directory_handle_command(conn
) < 0) {
362 connection_mark_for_close(TO_CONN(conn
));
369 (TO_CONN(conn
)->purpose
== DIR_PURPOSE_FETCH_STATUS_VOTE
) ?
370 MAX_VOTE_DL_SIZE
: MAX_DIRECTORY_OBJECT_SIZE
;
372 if (connection_get_inbuf_len(TO_CONN(conn
)) > max_size
) {
374 "Too much data received from directory connection (%s): "
375 "denial of service attempt, or you need to upgrade?",
376 conn
->base_
.address
);
377 connection_mark_for_close(TO_CONN(conn
));
381 if (!conn
->base_
.inbuf_reached_eof
)
382 log_debug(LD_HTTP
,"Got data, not eof. Leaving on inbuf.");
386 /** Called when we're about to finally unlink and free a directory connection:
387 * perform necessary accounting and cleanup */
389 connection_dir_about_to_close(dir_connection_t
*dir_conn
)
391 connection_t
*conn
= TO_CONN(dir_conn
);
393 if (conn
->state
< DIR_CONN_STATE_CLIENT_FINISHED
) {
394 /* It's a directory connection and connecting or fetching
395 * failed: forget about this router, and maybe try again. */
396 connection_dir_client_request_failed(dir_conn
);
399 connection_dir_client_refetch_hsdesc_if_needed(dir_conn
);
402 /** Write handler for directory connections; called when all data has
403 * been flushed. Close the connection or wait for a response as
407 connection_dir_finished_flushing(dir_connection_t
*conn
)
410 tor_assert(conn
->base_
.type
== CONN_TYPE_DIR
);
412 if (conn
->base_
.marked_for_close
)
415 /* Note that we have finished writing the directory response. For direct
416 * connections this means we're done; for tunneled connections it's only
417 * an intermediate step. */
419 geoip_change_dirreq_state(conn
->dirreq_id
, DIRREQ_TUNNELED
,
420 DIRREQ_FLUSHING_DIR_CONN_FINISHED
);
422 geoip_change_dirreq_state(TO_CONN(conn
)->global_identifier
,
424 DIRREQ_FLUSHING_DIR_CONN_FINISHED
);
425 switch (conn
->base_
.state
) {
426 case DIR_CONN_STATE_CONNECTING
:
427 case DIR_CONN_STATE_CLIENT_SENDING
:
428 log_debug(LD_DIR
,"client finished sending command.");
429 conn
->base_
.state
= DIR_CONN_STATE_CLIENT_READING
;
431 case DIR_CONN_STATE_SERVER_WRITING
:
433 log_warn(LD_BUG
, "Emptied a dirserv buffer, but it's still spooling!");
434 connection_mark_for_close(TO_CONN(conn
));
436 log_debug(LD_DIRSERV
, "Finished writing server response. Closing.");
437 connection_mark_for_close(TO_CONN(conn
));
441 log_warn(LD_BUG
,"called in unexpected state %d.",
443 tor_fragile_assert();
449 /** Connected handler for directory connections: begin sending data to the
450 * server, and return 0.
451 * Only used when connections don't immediately connect. */
453 connection_dir_finished_connecting(dir_connection_t
*conn
)
456 tor_assert(conn
->base_
.type
== CONN_TYPE_DIR
);
457 tor_assert(conn
->base_
.state
== DIR_CONN_STATE_CONNECTING
);
459 log_debug(LD_HTTP
,"Dir connection to router %s:%u established.",
460 conn
->base_
.address
,conn
->base_
.port
);
462 /* start flushing conn */
463 conn
->base_
.state
= DIR_CONN_STATE_CLIENT_SENDING
;
467 /** Helper. Compare two fp_pair_t objects, and return negative, 0, or
468 * positive as appropriate. */
470 compare_pairs_(const void **a
, const void **b
)
472 const fp_pair_t
*fp1
= *a
, *fp2
= *b
;
474 if ((r
= fast_memcmp(fp1
->first
, fp2
->first
, DIGEST_LEN
)))
477 return fast_memcmp(fp1
->second
, fp2
->second
, DIGEST_LEN
);
480 /** Divide a string <b>res</b> of the form FP1-FP2+FP3-FP4...[.z], where each
481 * FP is a hex-encoded fingerprint, into a sequence of distinct sorted
482 * fp_pair_t. Skip malformed pairs. On success, return 0 and add those
483 * fp_pair_t into <b>pairs_out</b>. On failure, return -1. */
485 dir_split_resource_into_fingerprint_pairs(const char *res
,
486 smartlist_t
*pairs_out
)
488 smartlist_t
*pairs_tmp
= smartlist_new();
489 smartlist_t
*pairs_result
= smartlist_new();
491 smartlist_split_string(pairs_tmp
, res
, "+", 0, 0);
492 if (smartlist_len(pairs_tmp
)) {
493 char *last
= smartlist_get(pairs_tmp
,smartlist_len(pairs_tmp
)-1);
494 size_t last_len
= strlen(last
);
495 if (last_len
> 2 && !strcmp(last
+last_len
-2, ".z")) {
496 last
[last_len
-2] = '\0';
499 SMARTLIST_FOREACH_BEGIN(pairs_tmp
, char *, cp
) {
500 if (strlen(cp
) != HEX_DIGEST_LEN
*2+1) {
502 "Skipping digest pair %s with non-standard length.", escaped(cp
));
503 } else if (cp
[HEX_DIGEST_LEN
] != '-') {
505 "Skipping digest pair %s with missing dash.", escaped(cp
));
508 if (base16_decode(pair
.first
, DIGEST_LEN
,
509 cp
, HEX_DIGEST_LEN
) != DIGEST_LEN
||
510 base16_decode(pair
.second
,DIGEST_LEN
,
511 cp
+HEX_DIGEST_LEN
+1, HEX_DIGEST_LEN
) != DIGEST_LEN
) {
512 log_info(LD_DIR
, "Skipping non-decodable digest pair %s", escaped(cp
));
514 smartlist_add(pairs_result
, tor_memdup(&pair
, sizeof(pair
)));
518 } SMARTLIST_FOREACH_END(cp
);
519 smartlist_free(pairs_tmp
);
522 smartlist_sort(pairs_result
, compare_pairs_
);
523 smartlist_uniq(pairs_result
, compare_pairs_
, tor_free_
);
525 smartlist_add_all(pairs_out
, pairs_result
);
526 smartlist_free(pairs_result
);
530 /** Given a directory <b>resource</b> request, containing zero
531 * or more strings separated by plus signs, followed optionally by ".z", store
532 * the strings, in order, into <b>fp_out</b>. If <b>compressed_out</b> is
533 * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0.
535 * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and
536 * decode the rest. If (flags & DSR_BASE64), then use "-" rather than "+" as
537 * a separator, delete all the elements that aren't base64-encoded digests,
538 * and decode the rest. If (flags & DSR_DIGEST256), these digests should be
539 * 256 bits long; else they should be 160.
541 * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates.
544 dir_split_resource_into_fingerprints(const char *resource
,
545 smartlist_t
*fp_out
, int *compressed_out
,
548 const int decode_hex
= flags
& DSR_HEX
;
549 const int decode_base64
= flags
& DSR_BASE64
;
550 const int digests_are_256
= flags
& DSR_DIGEST256
;
551 const int sort_uniq
= flags
& DSR_SORT_UNIQ
;
553 const int digest_len
= digests_are_256
? DIGEST256_LEN
: DIGEST_LEN
;
554 const int hex_digest_len
= digests_are_256
?
555 HEX_DIGEST256_LEN
: HEX_DIGEST_LEN
;
556 const int base64_digest_len
= digests_are_256
?
557 BASE64_DIGEST256_LEN
: BASE64_DIGEST_LEN
;
558 smartlist_t
*fp_tmp
= smartlist_new();
560 tor_assert(!(decode_hex
&& decode_base64
));
563 smartlist_split_string(fp_tmp
, resource
, decode_base64
?"-":"+", 0, 0);
566 if (smartlist_len(fp_tmp
)) {
567 char *last
= smartlist_get(fp_tmp
,smartlist_len(fp_tmp
)-1);
568 size_t last_len
= strlen(last
);
569 if (last_len
> 2 && !strcmp(last
+last_len
-2, ".z")) {
570 last
[last_len
-2] = '\0';
575 if (decode_hex
|| decode_base64
) {
576 const size_t encoded_len
= decode_hex
? hex_digest_len
: base64_digest_len
;
579 for (i
= 0; i
< smartlist_len(fp_tmp
); ++i
) {
580 cp
= smartlist_get(fp_tmp
, i
);
581 if (strlen(cp
) != encoded_len
) {
583 "Skipping digest %s with non-standard length.", escaped(cp
));
584 smartlist_del_keeporder(fp_tmp
, i
--);
587 d
= tor_malloc_zero(digest_len
);
589 (base16_decode(d
, digest_len
, cp
, hex_digest_len
) != digest_len
) :
590 (base64_decode(d
, digest_len
, cp
, base64_digest_len
)
592 log_info(LD_DIR
, "Skipping non-decodable digest %s", escaped(cp
));
593 smartlist_del_keeporder(fp_tmp
, i
--);
596 smartlist_set(fp_tmp
, i
, d
);
604 if (decode_hex
|| decode_base64
) {
605 if (digests_are_256
) {
606 smartlist_sort_digests256(fp_tmp
);
607 smartlist_uniq_digests256(fp_tmp
);
609 smartlist_sort_digests(fp_tmp
);
610 smartlist_uniq_digests(fp_tmp
);
613 smartlist_sort_strings(fp_tmp
);
614 smartlist_uniq_strings(fp_tmp
);
617 smartlist_add_all(fp_out
, fp_tmp
);
618 smartlist_free(fp_tmp
);
622 /** As dir_split_resource_into_fingerprints, but instead fills
623 * <b>spool_out</b> with a list of spoolable_resource_t for the resource
624 * identified through <b>source</b>. */
626 dir_split_resource_into_spoolable(const char *resource
,
627 dir_spool_source_t source
,
628 smartlist_t
*spool_out
,
632 smartlist_t
*fingerprints
= smartlist_new();
634 tor_assert(flags
& (DSR_HEX
|DSR_BASE64
));
635 const size_t digest_len
=
636 (flags
& DSR_DIGEST256
) ? DIGEST256_LEN
: DIGEST_LEN
;
638 int r
= dir_split_resource_into_fingerprints(resource
, fingerprints
,
639 compressed_out
, flags
);
640 /* This is not a very efficient implementation XXXX */
641 SMARTLIST_FOREACH_BEGIN(fingerprints
, uint8_t *, digest
) {
642 spooled_resource_t
*spooled
=
643 spooled_resource_new(source
, digest
, digest_len
);
645 smartlist_add(spool_out
, spooled
);
647 } SMARTLIST_FOREACH_END(digest
);
649 smartlist_free(fingerprints
);