cc: Export sendme_inc validation into public function
[tor.git] / src / core / or / circuitpadding_machines.c
blobf29a9be4551556c3071ea85eb09b4edf74e75d98
1 /* Copyright (c) 2019 The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file circuitpadding_machines.c
6 * \brief Circuit padding state machines
8 * Introduce circuit padding machines that will be used by Tor circuits, as
9 * specified by proposal 302 "Hiding onion service clients using padding".
11 * Right now this file introduces two machines that aim to hide the client-side
12 * of onion service circuits against naive classifiers like the ones from the
13 * "Circuit Fingerprinting Attacks: Passive Deanonymization of Tor Hidden
14 * Services" paper from USENIX. By naive classifiers we mean classifiers that
15 * use basic features like "circuit construction circuits" and "incoming and
16 * outgoing cell counts" and "duration of activity".
18 * In particular, these machines aim to be lightweight and protect against
19 * these basic classifiers. They don't aim to protect against more advanced
20 * attacks that use deep learning or even correlate various circuit
21 * construction events together. Machines that fool such advanced classifiers
22 * are also possible, but they can't be so lightweight and might require more
23 * WTF-PAD features. So for now we opt for the following two machines:
25 * Client-side introduction circuit hiding machine:
27 * This machine hides client-side introduction circuits by making their
28 * circuit construction sequence look like normal general circuits that
29 * download directory information. Furthermore, the circuits are kept open
30 * until all the padding has been sent, since intro circuits are usually
31 * very short lived and this act as a distinguisher. For more info see
32 * circpad_machine_client_hide_intro_circuits() and the sec.
34 * Client-side rendezvous circuit hiding machine:
36 * This machine hides client-side rendezvous circuits by making their
37 * circuit construction sequence look like normal general circuits. For more
38 * details see circpad_machine_client_hide_rend_circuits() and the spec.
40 * TODO: These are simple machines that carefully manipulate the cells of the
41 * initial circuit setup procedure to make them look like general
42 * circuits. In the future, more states can be baked into their state machine
43 * to do more advanced obfuscation.
44 **/
46 #define CIRCUITPADDING_MACHINES_PRIVATE
48 #include "core/or/or.h"
49 #include "feature/nodelist/networkstatus.h"
51 #include "lib/crypt_ops/crypto_rand.h"
53 #include "core/or/circuitlist.h"
55 #include "core/or/circuitpadding_machines.h"
56 #include "core/or/circuitpadding.h"
58 /** Create a client-side padding machine that aims to hide IP circuits. In
59 * particular, it keeps intro circuits alive until a bunch of fake traffic has
60 * been pushed through.
62 void
63 circpad_machine_client_hide_intro_circuits(smartlist_t *machines_sl)
65 circpad_machine_spec_t *client_machine
66 = tor_malloc_zero(sizeof(circpad_machine_spec_t));
68 client_machine->name = "client_ip_circ";
70 client_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED;
71 client_machine->target_hopnum = 2;
73 /* This is a client machine */
74 client_machine->is_origin_side = 1;
76 /* We only want to pad introduction circuits, and we want to start padding
77 * only after the INTRODUCE1 cell has been sent, so set the purposes
78 * appropriately.
80 * In particular we want introduction circuits to blend as much as possible
81 * with general circuits. Most general circuits have the following initial
82 * relay cell sequence (outgoing cells marked in [brackets]):
84 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED
85 * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue)
87 * Whereas normal introduction circuits usually look like:
89 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2
90 * -> [INTRO1] -> INTRODUCE_ACK
92 * This means that up to the sixth cell (first line of each sequence above),
93 * both general and intro circuits have identical cell sequences. After that
94 * we want to mimic the second line sequence of
95 * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue)
97 * We achieve this by starting padding INTRODUCE1 has been sent. With padding
98 * negotiation cells, in the common case of the second line looks like:
99 * -> [INTRO1] -> [PADDING_NEGOTIATE] -> PADDING_NEGOTIATED -> INTRO_ACK
101 * Then, the middle node will send between INTRO_MACHINE_MINIMUM_PADDING and
102 * INTRO_MACHINE_MAXIMUM_PADDING cells, to match the "...(inbound data cells
103 * continue)" portion of the trace (aka the rest of an HTTPS response body).
106 /* Start the machine on fresh intro circs. */
107 client_machine->conditions.apply_purpose_mask =
108 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT);
110 /* If the client purpose changes back to CIRCUIT_PURPOSE_C_INTRODUCING,
111 * or transitions to CIRCUIT_PURPOSE_C_INTRODUCE_ACKED, keep the machine
112 * alive, but do not launch new machines for these purposes. Also
113 * keep the machine around if it is in the CIRCUIT_PADDING purpose
114 * (but do not try to take over other machines in that purpose). */
115 client_machine->conditions.keep_purpose_mask =
116 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_INTRODUCE_ACKED) |
117 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_CIRCUIT_PADDING);
119 /* Keep the circuit alive even after the introduction has been finished,
120 * otherwise the short-term lifetime of the circuit will blow our cover */
121 client_machine->manage_circ_lifetime = 1;
123 /* Set padding machine limits to help guard against excessive padding */
124 client_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING;
125 client_machine->max_padding_percent = 1;
127 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
128 circpad_machine_states_init(client_machine, 2);
130 /* For the origin-side machine, we transition to OBFUSCATE_CIRC_SETUP after
131 * sending PADDING_NEGOTIATE, and we stay there (without sending any padding)
132 * until we receive a STOP from the other side. */
133 client_machine->states[CIRCPAD_STATE_START].
134 next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
135 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
137 /* origin-side machine has no event reactions while in
138 * CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP, so no more state transitions here. */
140 /* The client side should never send padding, so it does not need
141 * to specify token removal, or a histogram definition or state lengths.
142 * That is all controlled by the middle node. */
144 /* Register the machine */
145 client_machine->machine_num = smartlist_len(machines_sl);
146 circpad_register_padding_machine(client_machine, machines_sl);
148 log_info(LD_CIRC,
149 "Registered client intro point hiding padding machine (%u)",
150 client_machine->machine_num);
153 /** Create a relay-side padding machine that aims to hide IP circuits. See
154 * comments on the function above for more details on the workings of the
155 * machine. */
156 void
157 circpad_machine_relay_hide_intro_circuits(smartlist_t *machines_sl)
159 circpad_machine_spec_t *relay_machine
160 = tor_malloc_zero(sizeof(circpad_machine_spec_t));
162 relay_machine->name = "relay_ip_circ";
164 relay_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED;
166 /* This is a relay-side machine */
167 relay_machine->is_origin_side = 0;
169 /* We want to negotiate END from this side after all our padding is done, so
170 * that the origin-side machine goes into END state, and eventually closes
171 * the circuit. */
172 relay_machine->should_negotiate_end = 1;
174 /* Set padding machine limits to help guard against excessive padding */
175 relay_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING;
176 relay_machine->max_padding_percent = 1;
178 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
179 circpad_machine_states_init(relay_machine, 2);
181 /* For the relay-side machine, we want to transition
182 * START -> OBFUSCATE_CIRC_SETUP upon first non-padding
183 * cell sent (PADDING_NEGOTIATED in this case). */
184 relay_machine->states[CIRCPAD_STATE_START].
185 next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
186 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
188 /* For the relay-side, we want to transition from OBFUSCATE_CIRC_SETUP to END
189 * state when the length finishes. */
190 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
191 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
193 /* Now let's define the OBF -> OBF transitions that maintain our padding
194 * flow:
196 * For the relay-side machine, we want to keep on sending padding bytes even
197 * when nothing else happens on this circuit. */
198 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
199 next_state[CIRCPAD_EVENT_PADDING_SENT] =
200 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
201 /* For the relay-side machine, we need this transition so that we re-enter
202 the state, after PADDING_NEGOTIATED is sent. Otherwise, the remove token
203 function will disable the timer, and nothing will restart it since there
204 is no other motion on an intro circuit. */
205 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
206 next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
207 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
209 /* Token removal strategy for OBFUSCATE_CIRC_SETUP state: Don't
210 * remove any tokens.
212 * We rely on the state length sampling and not token removal, to avoid
213 * the mallocs required to copy the histograms for token removal,
214 * and to avoid monotime calls needed to determine histogram
215 * bins for token removal. */
216 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
217 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
219 /* Figure out the length of the OBFUSCATE_CIRC_SETUP state so that it's
220 * randomized. The relay side will send between INTRO_MACHINE_MINIMUM_PADDING
221 * and INTRO_MACHINE_MAXIMUM_PADDING padding cells towards the client. */
222 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
223 length_dist.type = CIRCPAD_DIST_UNIFORM;
224 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
225 length_dist.param1 = INTRO_MACHINE_MINIMUM_PADDING;
226 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
227 length_dist.param2 = INTRO_MACHINE_MAXIMUM_PADDING;
229 /* Configure histogram */
230 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
231 histogram_len = 2;
233 /* For the relay-side machine we want to batch padding instantly to pretend
234 * its an incoming directory download. So set the histogram edges tight:
235 * (1, 10ms, infinity). */
236 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
237 histogram_edges[0] = 1000;
238 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
239 histogram_edges[1] = 10000;
241 /* We put all our tokens in bin 0, which means we want 100% probability
242 * for choosing a inter-packet delay of between 1000 and 10000 microseconds
243 * (1 to 10ms). Since we only have 1 bin, it doesn't matter how many tokens
244 * there are, 1000 out of 1000 is 100% */
245 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
246 histogram[0] = 1000;
248 /* just one bin, so setup the total tokens */
249 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
250 histogram_total_tokens =
251 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].histogram[0];
253 /* Register the machine */
254 relay_machine->machine_num = smartlist_len(machines_sl);
255 circpad_register_padding_machine(relay_machine, machines_sl);
257 log_info(LD_CIRC,
258 "Registered relay intro circuit hiding padding machine (%u)",
259 relay_machine->machine_num);
262 /************************** Rendezvous-circuit machine ***********************/
264 /** Create a client-side padding machine that aims to hide rendezvous
265 * circuits.*/
266 void
267 circpad_machine_client_hide_rend_circuits(smartlist_t *machines_sl)
269 circpad_machine_spec_t *client_machine
270 = tor_malloc_zero(sizeof(circpad_machine_spec_t));
272 client_machine->name = "client_rp_circ";
274 /* Only pad after the circuit has been built and pad to the middle */
275 client_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED;
276 client_machine->target_hopnum = 2;
278 /* This is a client machine */
279 client_machine->is_origin_side = 1;
281 /* We only want to pad rendezvous circuits, and we want to start padding only
282 * after the rendezvous circuit has been established.
284 * Following a similar argument as for intro circuits, we are aiming for
285 * padded rendezvous circuits to blend in with the initial cell sequence of
286 * general circuits which usually look like this:
288 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED
289 * -> [DATA] -> [DATA] -> DATA -> DATA...(incoming cells continue)
291 * Whereas normal rendezvous circuits usually look like:
293 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST
294 * -> REND2 -> [BEGIN]
296 * This means that up to the sixth cell (in the first line), both general and
297 * rend circuits have identical cell sequences.
299 * After that we want to mimic a [DATA] -> [DATA] -> DATA -> DATA sequence.
301 * With padding negotiation right after the REND_ESTABLISHED, the sequence
302 * becomes:
304 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST
305 * -> [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP...
307 * After which normal application DATA cells continue on the circuit.
309 * Hence this way we make rendezvous circuits look like general circuits up
310 * till the end of the circuit setup. */
311 client_machine->conditions.apply_purpose_mask =
312 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_JOINED)|
313 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_READY)|
314 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED);
316 /* Set padding machine limits to help guard against excessive padding */
317 client_machine->allowed_padding_count = 1;
318 client_machine->max_padding_percent = 1;
320 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
321 circpad_machine_states_init(client_machine, 2);
323 /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first
324 * non-padding cell (which is PADDING_NEGOTIATE) */
325 client_machine->states[CIRCPAD_STATE_START].
326 next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
327 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
329 /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first
330 * padding packet and/or hit the state length (the state length is 1). */
331 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
332 next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_END;
333 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
334 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
336 /* Don't use a token removal strategy since we don't want to use monotime
337 * functions and we want to avoid mallocing histogram copies. We want
338 * this machine to be light. */
339 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
340 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
342 /* Instead, to control the volume of padding (we just want to send a single
343 * padding cell) we will use a static state length. We just want one token,
344 * since we want to make the following pattern:
345 * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */
346 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
347 length_dist.type = CIRCPAD_DIST_UNIFORM;
348 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
349 length_dist.param1 = 1;
350 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
351 length_dist.param2 = 2; // rand(1,2) is always 1
353 /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so
354 * that we send our outgoing [DROP] before the PADDING_NEGOTIATED comes
355 * back from the relay side. */
356 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
357 histogram_len = 2;
358 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
359 histogram_edges[0] = 0;
360 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
361 histogram_edges[1] = 1000;
363 /* We want a 100% probability of choosing an inter-packet delay of
364 * between 0 and 1ms. Since we don't use token removal,
365 * the number of tokens does not matter. (And also, state_length
366 * governs how many packets we send). */
367 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
368 histogram[0] = 1;
369 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
370 histogram_total_tokens = 1;
372 /* Register the machine */
373 client_machine->machine_num = smartlist_len(machines_sl);
374 circpad_register_padding_machine(client_machine, machines_sl);
376 log_info(LD_CIRC,
377 "Registered client rendezvous circuit hiding padding machine (%u)",
378 client_machine->machine_num);
381 /** Create a relay-side padding machine that aims to hide IP circuits.
383 * This is meant to follow the client-side machine.
385 void
386 circpad_machine_relay_hide_rend_circuits(smartlist_t *machines_sl)
388 circpad_machine_spec_t *relay_machine
389 = tor_malloc_zero(sizeof(circpad_machine_spec_t));
391 relay_machine->name = "relay_rp_circ";
393 /* Only pad after the circuit has been built and pad to the middle */
394 relay_machine->conditions.min_hops = 2;
395 relay_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED;
397 /* This is a relay-side machine */
398 relay_machine->is_origin_side = 0;
400 /* Set padding machine limits to help guard against excessive padding */
401 relay_machine->allowed_padding_count = 1;
402 relay_machine->max_padding_percent = 1;
404 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
405 circpad_machine_states_init(relay_machine, 2);
407 /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first
408 * non-padding cell (which is PADDING_NEGOTIATED) */
409 relay_machine->states[CIRCPAD_STATE_START].
410 next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
411 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
413 /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first
414 * padding packet and/or hit the state length (the state length is 1). */
415 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
416 next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_END;
417 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
418 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
420 /* Don't use a token removal strategy since we don't want to use monotime
421 * functions and we want to avoid mallocing histogram copies. We want
422 * this machine to be light. */
423 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
424 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
426 /* Instead, to control the volume of padding (we just want to send a single
427 * padding cell) we will use a static state length. We just want one token,
428 * since we want to make the following pattern:
429 * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */
430 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
431 length_dist.type = CIRCPAD_DIST_UNIFORM;
432 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
433 length_dist.param1 = 1;
434 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
435 length_dist.param2 = 2; // rand(1,2) is always 1
437 /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so
438 * that the outgoing DROP cell is sent immediately after the
439 * PADDING_NEGOTIATED. */
440 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
441 histogram_len = 2;
442 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
443 histogram_edges[0] = 0;
444 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
445 histogram_edges[1] = 1000;
447 /* We want a 100% probability of choosing an inter-packet delay of
448 * between 0 and 1ms. Since we don't use token removal,
449 * the number of tokens does not matter. (And also, state_length
450 * governs how many packets we send). */
451 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
452 histogram[0] = 1;
453 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
454 histogram_total_tokens = 1;
456 /* Register the machine */
457 relay_machine->machine_num = smartlist_len(machines_sl);
458 circpad_register_padding_machine(relay_machine, machines_sl);
460 log_info(LD_CIRC,
461 "Registered relay rendezvous circuit hiding padding machine (%u)",
462 relay_machine->machine_num);