fallbackdir: Update list generated on August 30, 2023
[tor.git] / src / core / or / conflux.c
blob0a2806b1dc3eb66d645c4136a7ecff957b6e0149
1 /* Copyright (c) 2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file conflux.c
6 * \brief Conflux multipath core algorithms
7 */
9 #define TOR_CONFLUX_PRIVATE
11 #include "core/or/or.h"
13 #include "core/or/circuit_st.h"
14 #include "core/or/sendme.h"
15 #include "core/or/relay.h"
16 #include "core/or/congestion_control_common.h"
17 #include "core/or/congestion_control_st.h"
18 #include "core/or/origin_circuit_st.h"
19 #include "core/or/circuitlist.h"
20 #include "core/or/circuituse.h"
21 #include "core/or/conflux.h"
22 #include "core/or/conflux_params.h"
23 #include "core/or/conflux_util.h"
24 #include "core/or/conflux_pool.h"
25 #include "core/or/conflux_st.h"
26 #include "core/or/conflux_cell.h"
27 #include "lib/time/compat_time.h"
28 #include "app/config/config.h"
30 /** One million microseconds in a second */
31 #define USEC_PER_SEC 1000000
33 static inline uint64_t cwnd_sendable(const circuit_t *on_circ,
34 uint64_t in_usec, uint64_t our_usec);
36 /* Track the total number of bytes used by all ooo_q so it can be used by the
37 * OOM handler to assess. */
38 static uint64_t total_ooo_q_bytes = 0;
40 /**
41 * Determine if we should multiplex a specific relay command or not.
43 * TODO: Version of this that is the set of forbidden commands
44 * on linked circuits
46 bool
47 conflux_should_multiplex(int relay_command)
49 switch (relay_command) {
50 /* These are all fine to multiplex, and must be
51 * so that ordering is preserved */
52 case RELAY_COMMAND_BEGIN:
53 case RELAY_COMMAND_DATA:
54 case RELAY_COMMAND_END:
55 case RELAY_COMMAND_CONNECTED:
56 return true;
58 /* We can't multiplex these because they are
59 * circuit-specific */
60 case RELAY_COMMAND_SENDME:
61 case RELAY_COMMAND_EXTEND:
62 case RELAY_COMMAND_EXTENDED:
63 case RELAY_COMMAND_TRUNCATE:
64 case RELAY_COMMAND_TRUNCATED:
65 case RELAY_COMMAND_DROP:
66 return false;
68 /* We must multiplex RESOLVEs because their ordering
69 * impacts begin/end. */
70 case RELAY_COMMAND_RESOLVE:
71 case RELAY_COMMAND_RESOLVED:
72 return true;
74 /* These are all circuit-specific */
75 case RELAY_COMMAND_BEGIN_DIR:
76 case RELAY_COMMAND_EXTEND2:
77 case RELAY_COMMAND_EXTENDED2:
78 case RELAY_COMMAND_ESTABLISH_INTRO:
79 case RELAY_COMMAND_ESTABLISH_RENDEZVOUS:
80 case RELAY_COMMAND_INTRODUCE1:
81 case RELAY_COMMAND_INTRODUCE2:
82 case RELAY_COMMAND_RENDEZVOUS1:
83 case RELAY_COMMAND_RENDEZVOUS2:
84 case RELAY_COMMAND_INTRO_ESTABLISHED:
85 case RELAY_COMMAND_RENDEZVOUS_ESTABLISHED:
86 case RELAY_COMMAND_INTRODUCE_ACK:
87 case RELAY_COMMAND_PADDING_NEGOTIATE:
88 case RELAY_COMMAND_PADDING_NEGOTIATED:
89 return false;
91 /* These must be multiplexed because their ordering
92 * relative to BEGIN/END must be preserved */
93 case RELAY_COMMAND_XOFF:
94 case RELAY_COMMAND_XON:
95 return true;
97 /* These two are not multiplexed, because they must
98 * be processed immediately to update sequence numbers
99 * before any other cells are processed on the circuit */
100 case RELAY_COMMAND_CONFLUX_SWITCH:
101 case RELAY_COMMAND_CONFLUX_LINK:
102 case RELAY_COMMAND_CONFLUX_LINKED:
103 case RELAY_COMMAND_CONFLUX_LINKED_ACK:
104 return false;
106 default:
107 log_warn(LD_BUG, "Conflux asked to multiplex unknown relay command %d",
108 relay_command);
109 return false;
113 /** Return the leg for a circuit in a conflux set. Return NULL if not found. */
114 conflux_leg_t *
115 conflux_get_leg(conflux_t *cfx, const circuit_t *circ)
117 conflux_leg_t *leg_found = NULL;
119 // Find the leg that the cell is written on
120 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
121 if (leg->circ == circ) {
122 leg_found = leg;
123 break;
125 } CONFLUX_FOR_EACH_LEG_END(leg);
127 return leg_found;
131 * Gets the maximum last_seq_sent from all legs.
133 uint64_t
134 conflux_get_max_seq_sent(const conflux_t *cfx)
136 uint64_t max_seq_sent = 0;
138 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
139 if (leg->last_seq_sent > max_seq_sent) {
140 max_seq_sent = leg->last_seq_sent;
142 } CONFLUX_FOR_EACH_LEG_END(leg);
144 return max_seq_sent;
148 * Gets the maximum last_seq_recv from all legs.
150 uint64_t
151 conflux_get_max_seq_recv(const conflux_t *cfx)
153 uint64_t max_seq_recv = 0;
155 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
156 if (leg->last_seq_recv > max_seq_recv) {
157 max_seq_recv = leg->last_seq_recv;
159 } CONFLUX_FOR_EACH_LEG_END(leg);
161 return max_seq_recv;
164 /** Return the total memory allocation the circuit is using by conflux. If this
165 * circuit is not a Conflux circuit, 0 is returned. */
166 uint64_t
167 conflux_get_circ_bytes_allocation(const circuit_t *circ)
169 if (circ->conflux) {
170 return smartlist_len(circ->conflux->ooo_q) * sizeof(conflux_cell_t);
172 return 0;
175 /** Return the total memory allocation in bytes by the subsystem.
177 * At the moment, only out of order queues are consiered. */
178 uint64_t
179 conflux_get_total_bytes_allocation(void)
181 return total_ooo_q_bytes;
184 /** The OOM handler is asking us to try to free at least bytes_to_remove. */
185 size_t
186 conflux_handle_oom(size_t bytes_to_remove)
188 (void) bytes_to_remove;
190 /* We are not doing anything on the sets, the OOM handler will trigger a
191 * circuit clean up which will affect conflux sets, by pruning oldest
192 * circuits. */
194 log_info(LD_CIRC, "OOM handler triggered. OOO queus allocation: %" PRIu64,
195 total_ooo_q_bytes);
196 return 0;
200 * Returns true if a circuit has package window space to send, and is
201 * not blocked locally.
203 static inline bool
204 circuit_ready_to_send(const circuit_t *circ)
206 const congestion_control_t *cc = circuit_ccontrol(circ);
207 bool cc_sendable = true;
209 /* We consider ourselves blocked if we're within 1 sendme of the
210 * cwnd, because inflight is decremented before this check */
211 // TODO-329-TUNING: This subtraction not be right.. It depends
212 // on call order wrt decisions and sendme arrival
213 if (cc->inflight >= cc->cwnd) {
214 cc_sendable = false;
217 /* Origin circuits use the package window of the last hop, and
218 * have an outbound cell direction (towards exit). Otherwise,
219 * there is no cpath and direction is inbound. */
220 if (CIRCUIT_IS_ORIGIN(circ)) {
221 return cc_sendable && !circ->circuit_blocked_on_n_chan;
222 } else {
223 return cc_sendable && !circ->circuit_blocked_on_p_chan;
228 * Return the circuit with the minimum RTT. Do not use any
229 * other circuit.
231 * This algorithm will minimize RTT always, and will not provide
232 * any throughput benefit. We expect it to be useful for VoIP/UDP
233 * use cases. Because it only uses one circuit on a leg at a time,
234 * it can have more than one circuit per guard (ie: to find
235 * lower-latency middles for the path).
237 static const circuit_t *
238 conflux_decide_circ_minrtt(const conflux_t *cfx)
240 uint64_t min_rtt = UINT64_MAX;
241 const circuit_t *circ = NULL;
243 /* Can't get here without any legs. */
244 tor_assert(CONFLUX_NUM_LEGS(cfx));
246 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
248 /* Ignore circuits with no RTT measurement */
249 if (leg->circ_rtts_usec && leg->circ_rtts_usec < min_rtt) {
250 circ = leg->circ;
251 min_rtt = leg->circ_rtts_usec;
253 } CONFLUX_FOR_EACH_LEG_END(leg);
255 /* If the minRTT circuit can't send, dont send on any circuit. */
256 if (!circ || !circuit_ready_to_send(circ)) {
257 return NULL;
259 return circ;
263 * Favor the circuit with the lowest RTT that still has space in the
264 * congestion window.
266 * This algorithm will maximize total throughput at the expense of
267 * bloating out-of-order queues.
269 static const circuit_t *
270 conflux_decide_circ_lowrtt(const conflux_t *cfx)
272 uint64_t low_rtt = UINT64_MAX;
273 const circuit_t *circ = NULL;
275 /* Can't get here without any legs. */
276 tor_assert(CONFLUX_NUM_LEGS(cfx));
278 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
279 /* If the package window is full, skip it */
280 if (!circuit_ready_to_send(leg->circ)) {
281 continue;
284 /* Ignore circuits with no RTT */
285 if (leg->circ_rtts_usec && leg->circ_rtts_usec < low_rtt) {
286 low_rtt = leg->circ_rtts_usec;
287 circ = leg->circ;
289 } CONFLUX_FOR_EACH_LEG_END(leg);
291 /* At this point, if we found a circuit, we've already validated that its
292 * congestion window has room. */
293 return circ;
297 * Returns the amount of room in a cwnd on a circuit.
299 static inline uint64_t
300 cwnd_available(const circuit_t *on_circ)
302 const congestion_control_t *cc = circuit_ccontrol(on_circ);
303 tor_assert(cc);
305 if (cc->cwnd < cc->inflight)
306 return 0;
308 return cc->cwnd - cc->inflight;
312 * Return the amount of congestion window we can send on
313 * on_circ during in_usec. However, if we're still in
314 * slow-start, send the whole window to establish the true
315 * cwnd.
317 static inline uint64_t
318 cwnd_sendable(const circuit_t *on_circ, uint64_t in_usec,
319 uint64_t our_usec)
321 const congestion_control_t *cc = circuit_ccontrol(on_circ);
322 tor_assert(cc);
323 uint64_t cwnd_adjusted = cwnd_available(on_circ);
325 if (our_usec == 0 || in_usec == 0) {
326 log_fn(LOG_PROTOCOL_WARN, LD_CIRC,
327 "cwnd_sendable: Missing RTT data. in_usec: %" PRIu64
328 " our_usec: %" PRIu64, in_usec, our_usec);
329 return cwnd_adjusted;
332 if (cc->in_slow_start) {
333 return cwnd_adjusted;
334 } else {
335 /* For any given leg, it has min_rtt/2 time before the 'primary'
336 * leg's acks start arriving. So, the amount of data this
337 * 'secondary' leg can send while the min_rtt leg transmits these
338 * acks is:
339 * (cwnd_leg/(leg_rtt/2))*min_rtt/2 = cwnd_leg*min_rtt/leg_rtt.
341 uint64_t sendable = cwnd_adjusted*in_usec/our_usec;
342 return MIN(cc->cwnd, sendable);
347 * Returns true if we can switch to a new circuit, false otherwise.
349 * This function assumes we're primarily switching between two circuits,
350 * the current and the prev. If we're using more than two circuits, we
351 * need to set cfx_drain_pct to 100.
353 static inline bool
354 conflux_can_switch(const conflux_t *cfx)
356 /* If we still expected to send more cells on this circuit,
357 * we're only allowed to switch if the previous circuit emptied. */
358 if (cfx->cells_until_switch > 0) {
359 /* If there is no prev leg, skip the inflight check. */
360 if (!cfx->prev_leg) {
361 return false;
363 const congestion_control_t *ccontrol =
364 circuit_ccontrol(cfx->prev_leg->circ);
366 /* If the inflight count has drained to below cfx_drain_pct
367 * of the congestion window, then we can switch.
368 * We check the sendme_inc because there may be un-ackable
369 * data in inflight as well, and we can still switch then. */
370 // TODO-329-TUNING: Should we try to switch if the prev_leg is
371 // ready to send, instead of this?
372 if (ccontrol->inflight < ccontrol->sendme_inc ||
373 100*ccontrol->inflight <=
374 conflux_params_get_drain_pct()*ccontrol->cwnd) {
375 return true;
378 return false;
381 return true;
385 * Favor the circuit with the lowest RTT that still has space in the
386 * congestion window up to the ratio of RTTs.
388 * This algorithm should only use auxillary legs up to the point
389 * where their data arrives roughly the same time as the lowest
390 * RTT leg. It will not utilize the full cwnd of auxillary legs,
391 * except in slow start. Therefore, out-of-order queue bloat should
392 * be minimized to just the slow-start phase.
394 static const circuit_t *
395 conflux_decide_circ_cwndrtt(const conflux_t *cfx)
397 uint64_t min_rtt = UINT64_MAX;
398 const conflux_leg_t *leg = NULL;
400 /* Can't get here without any legs. */
401 tor_assert(!CONFLUX_NUM_LEGS(cfx));
403 /* Find the leg with the minimum RTT.*/
404 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, l) {
405 /* Ignore circuits with invalid RTT */
406 if (l->circ_rtts_usec && l->circ_rtts_usec < min_rtt) {
407 min_rtt = l->circ_rtts_usec;
408 leg = l;
410 } CONFLUX_FOR_EACH_LEG_END(l);
412 /* If the package window is has room, use it */
413 if (leg && circuit_ready_to_send(leg->circ)) {
414 return leg->circ;
417 leg = NULL;
419 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, l) {
420 if (!circuit_ready_to_send(l->circ)) {
421 continue;
424 /* Pick a 'min_leg' with the lowest RTT that still has
425 * room in the congestion window. Note that this works for
426 * min_leg itself, up to inflight. */
427 if (l->circ_rtts_usec &&
428 cwnd_sendable(l->circ, min_rtt, l->circ_rtts_usec) > 0) {
429 leg = l;
431 } CONFLUX_FOR_EACH_LEG_END(l);
433 /* If the circuit can't send, don't send on any circuit. */
434 if (!leg || !circuit_ready_to_send(leg->circ)) {
435 return NULL;
437 return leg->circ;
441 * This function is called when we want to send a relay cell on a
442 * conflux, as well as when we want to compute available space in
443 * to package from streams.
445 * It determines the circuit that relay command should be sent on,
446 * and sends a SWITCH cell if necessary.
448 * It returns the circuit we should send on. If no circuits are ready
449 * to send, it returns NULL.
451 circuit_t *
452 conflux_decide_circ_for_send(conflux_t *cfx,
453 circuit_t *orig_circ,
454 uint8_t relay_command)
456 /* If this command should not be multiplexed, send it on the original
457 * circuit */
458 if (!conflux_should_multiplex(relay_command)) {
459 return orig_circ;
462 circuit_t *new_circ = conflux_decide_next_circ(cfx);
464 /* Because our congestion window only cover relay data command, we can end up
465 * in a situation where we need to send non data command when all circuits
466 * are at capacity. For those cases, keep using the *current* leg,
467 * so these commands arrive in-order. */
468 if (!new_circ && relay_command != RELAY_COMMAND_DATA) {
469 /* Curr leg should be set, because conflux_decide_next_circ() should
470 * have set it earlier. No BUG() here because the only caller BUG()s. */
471 if (!cfx->curr_leg) {
472 log_warn(LD_BUG, "No current leg for conflux with relay command %d",
473 relay_command);
474 return NULL;
476 return cfx->curr_leg->circ;
480 * If we are switching to a new circuit, we need to send a SWITCH command.
481 * We also need to compute an estimate of how much data we can send on
482 * the new circuit before we are allowed to switch again, to rate
483 * limit the frequency of switching.
485 if (new_circ) {
486 conflux_leg_t *new_leg = conflux_get_leg(cfx, new_circ);
487 tor_assert(cfx->curr_leg);
489 if (new_circ != cfx->curr_leg->circ) {
490 // TODO-329-TUNING: This is one mechanism to rate limit switching,
491 // which should reduce the OOQ mem. However, we're not going to do that
492 // until we get some data on if the memory usage is high
493 cfx->cells_until_switch = 0;
494 //cwnd_sendable(new_circ,cfx->curr_leg->circ_rtts_usec,
495 // new_leg->circ_rtts_usec);
497 conflux_validate_stream_lists(cfx);
499 cfx->prev_leg = cfx->curr_leg;
500 cfx->curr_leg = new_leg;
502 tor_assert(cfx->prev_leg);
503 tor_assert(cfx->curr_leg);
505 uint64_t relative_seq = cfx->prev_leg->last_seq_sent -
506 cfx->curr_leg->last_seq_sent;
508 tor_assert(cfx->prev_leg->last_seq_sent >=
509 cfx->curr_leg->last_seq_sent);
510 conflux_send_switch_command(cfx->curr_leg->circ, relative_seq);
511 cfx->curr_leg->last_seq_sent = cfx->prev_leg->last_seq_sent;
515 return new_circ;
518 /** Called after conflux actually sent a cell on a circuit.
519 * This function updates sequence number counters, and
520 * switch counters.
522 void
523 conflux_note_cell_sent(conflux_t *cfx, circuit_t *circ, uint8_t relay_command)
525 conflux_leg_t *leg = NULL;
527 if (!conflux_should_multiplex(relay_command)) {
528 return;
531 leg = conflux_get_leg(cfx, circ);
532 tor_assert(leg);
534 leg->last_seq_sent++;
536 if (cfx->cells_until_switch > 0) {
537 cfx->cells_until_switch--;
541 /** Find the leg with lowest non-zero curr_rtt_usec, and
542 * pick it for our current leg. */
543 static inline bool
544 conflux_pick_first_leg(conflux_t *cfx)
546 conflux_leg_t *min_leg = NULL;
548 CONFLUX_FOR_EACH_LEG_BEGIN(cfx, leg) {
549 /* We need to skip 0-RTT legs, since this can happen at the exit
550 * when there is a race between BEGIN and LINKED_ACK, and BEGIN
551 * wins the race. The good news is that because BEGIN won,
552 * we don't need to consider those other legs, since they are
553 * slower. */
554 if (leg->circ_rtts_usec > 0) {
555 if (!min_leg || leg->circ_rtts_usec < min_leg->circ_rtts_usec) {
556 min_leg = leg;
559 } CONFLUX_FOR_EACH_LEG_END(leg);
561 if (!min_leg) {
562 // Get the 0th leg; if it does not exist, log the set.
563 // Bug 40827 managed to hit this, so let's dump the sets
564 // in case it happens again.
565 if (BUG(smartlist_len(cfx->legs) <= 0)) {
566 // Since we have no legs, we have no idea if this is really a client
567 // or server set. Try to find any that match:
568 log_warn(LD_BUG, "Matching client sets:");
569 conflux_log_set(LOG_WARN, cfx, true);
570 log_warn(LD_BUG, "Matching server sets:");
571 conflux_log_set(LOG_WARN, cfx, false);
572 log_warn(LD_BUG, "End conflux set dump");
573 return false;
576 min_leg = smartlist_get(cfx->legs, 0);
577 tor_assert(min_leg);
578 if (BUG(min_leg->linked_sent_usec == 0)) {
579 log_warn(LD_BUG, "Conflux has no legs with non-zero RTT. "
580 "Using first leg.");
581 conflux_log_set(LOG_WARN, cfx, CIRCUIT_IS_ORIGIN(min_leg->circ));
585 // TODO-329-TUNING: We may want to initialize this to a cwnd, to
586 // minimize early switching?
587 //cfx->cells_until_switch = circuit_ccontrol(min_leg->circ)->cwnd;
588 cfx->cells_until_switch = 0;
590 cfx->curr_leg = min_leg;
592 return true;
596 * Returns the circuit that conflux would send on next, if
597 * conflux_decide_circ_for_send were called. This is used to compute
598 * available space in the package window.
600 circuit_t *
601 conflux_decide_next_circ(conflux_t *cfx)
603 // TODO-329-TUNING: Temporarily validate legs here. We can remove
604 // this once tuning is complete.
605 conflux_validate_legs(cfx);
607 /* If the conflux set is tearing down and has no current leg,
608 * bail and give up */
609 if (cfx->in_full_teardown) {
610 return NULL;
613 /* If we don't have a current leg yet, pick one.
614 * (This is the only non-const operation in this function). */
615 if (!cfx->curr_leg) {
616 if (!conflux_pick_first_leg(cfx))
617 return NULL;
620 /* First, check if we can switch. */
621 if (!conflux_can_switch(cfx)) {
622 tor_assert(cfx->curr_leg);
623 circuit_t *curr_circ = cfx->curr_leg->circ;
625 /* If we can't switch, and the current circuit can't send,
626 * then return null. */
627 if (circuit_ready_to_send(curr_circ)) {
628 return curr_circ;
630 log_info(LD_CIRC, "Conflux can't switch; no circuit to send on.");
631 return NULL;
634 switch (cfx->params.alg) {
635 case CONFLUX_ALG_MINRTT: // latency (no ooq)
636 return (circuit_t*)conflux_decide_circ_minrtt(cfx);
637 case CONFLUX_ALG_LOWRTT: // high throughput (high oooq)
638 return (circuit_t*)conflux_decide_circ_lowrtt(cfx);
639 case CONFLUX_ALG_CWNDRTT: // throughput (low oooq)
640 return (circuit_t*)conflux_decide_circ_cwndrtt(cfx);
641 default:
642 return NULL;
647 * Called when we have a new RTT estimate for a circuit.
649 void
650 conflux_update_rtt(conflux_t *cfx, circuit_t *circ, uint64_t rtt_usec)
652 conflux_leg_t *leg = conflux_get_leg(cfx, circ);
654 if (!leg) {
655 log_warn(LD_BUG, "Got RTT update for circuit not in conflux");
656 return;
659 // Update RTT
660 leg->circ_rtts_usec = rtt_usec;
662 // TODO-329-ARTI: For UDP latency targeting, arti could decide to launch
663 // new a test leg to potentially replace this one, if a latency target
664 // was requested and we now exceed it. Since C-Tor client likely
665 // will not have UDP support, we aren't doing this here.
669 * Comparison function for ooo_q pqueue.
671 * Ensures that lower sequence numbers are at the head of the pqueue.
673 static int
674 conflux_queue_cmp(const void *a, const void *b)
676 // Compare a and b as conflux_cell_t using the seq field, and return a
677 // comparison result such that the lowest seq is at the head of the pqueue.
678 const conflux_cell_t *cell_a = a;
679 const conflux_cell_t *cell_b = b;
681 tor_assert(cell_a);
682 tor_assert(cell_b);
684 if (cell_a->seq < cell_b->seq) {
685 return -1;
686 } else if (cell_a->seq > cell_b->seq) {
687 return 1;
688 } else {
689 return 0;
694 * Get the congestion control object for a conflux circuit.
696 * Because conflux can only be negotiated with the last hop, we
697 * can use the last hop of the cpath to obtain the congestion
698 * control object for origin circuits. For non-origin circuits,
699 * we can use the circuit itself.
701 const congestion_control_t *
702 circuit_ccontrol(const circuit_t *circ)
704 const congestion_control_t *ccontrol = NULL;
705 tor_assert(circ);
707 if (CIRCUIT_IS_ORIGIN(circ)) {
708 tor_assert(CONST_TO_ORIGIN_CIRCUIT(circ)->cpath);
709 tor_assert(CONST_TO_ORIGIN_CIRCUIT(circ)->cpath->prev);
710 ccontrol = CONST_TO_ORIGIN_CIRCUIT(circ)->cpath->prev->ccontrol;
711 } else {
712 ccontrol = circ->ccontrol;
715 /* Conflux circuits always have congestion control*/
716 tor_assert(ccontrol);
717 return ccontrol;
720 // TODO-329-TUNING: For LowRTT, we can at most switch every SENDME,
721 // but for BLEST, we should switch at most every cwnd.. But
722 // we do not know the other side's CWND here.. We can at best
723 // asssume it is above the cwnd_min
724 #define CONFLUX_MIN_LINK_INCREMENT 31
726 * Validate and handle RELAY_COMMAND_CONFLUX_SWITCH.
729 conflux_process_switch_command(circuit_t *in_circ,
730 crypt_path_t *layer_hint, cell_t *cell,
731 relay_header_t *rh)
733 tor_assert(in_circ);
734 tor_assert(cell);
735 tor_assert(rh);
737 conflux_t *cfx = in_circ->conflux;
738 uint32_t relative_seq;
739 conflux_leg_t *leg;
741 if (!conflux_is_enabled(in_circ)) {
742 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
743 return -1;
746 /* If there is no conflux object negotiated, this is invalid.
747 * log and close circ */
748 if (!cfx) {
749 log_warn(LD_BUG, "Got a conflux switch command on a circuit without "
750 "conflux negotiated. Closing circuit.");
752 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
753 return -1;
756 // TODO-329-TUNING: Temporarily validate that we have all legs.
757 // After tuning is complete, we can remove this.
758 conflux_validate_legs(cfx);
760 leg = conflux_get_leg(cfx, in_circ);
762 /* If we can't find the conflux leg, we got big problems..
763 * Close the circuit. */
764 if (!leg) {
765 log_warn(LD_BUG, "Got a conflux switch command on a circuit without "
766 "conflux leg. Closing circuit.");
767 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
768 return -1;
771 // Check source hop via layer_hint
772 if (!conflux_validate_source_hop(in_circ, layer_hint)) {
773 log_warn(LD_BUG, "Got a conflux switch command on a circuit with "
774 "invalid source hop. Closing circuit.");
775 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
776 return -1;
779 relative_seq = conflux_cell_parse_switch(cell, rh->length);
782 * We have to make sure that the switch command is truely
783 * incrementing the sequence number, or else it becomes
784 * a side channel that can be spammed for traffic analysis.
786 // TODO-329-TUNING: This can happen. Disabling for now..
787 //if (relative_seq < CONFLUX_MIN_LINK_INCREMENT) {
788 // log_warn(LD_CIRC, "Got a conflux switch command with a relative "
789 // "sequence number less than the minimum increment. Closing "
790 // "circuit.");
791 // circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
792 // return -1;
795 // TODO-329-UDP: When Prop#340 exits and was negotiated, ensure we're
796 // in a packed cell, with another cell following, otherwise
797 // this is a spammed side-channel.
798 // - We definitely should never get switches back-to-back.
799 // - We should not get switches across all legs with no data
800 // But before Prop#340, it doesn't make much sense to do this.
801 // C-Tor is riddled with side-channels like this anyway, unless
802 // vanguards is in use. And this feature is not supported by
803 // onion servicees in C-Tor, so we're good there.
805 /* Update the absolute sequence number on this leg by the delta.
806 * Since this cell is not multiplexed, we do not count it towards
807 * absolute sequence numbers. We only increment the sequence
808 * numbers for multiplexed cells. Hence there is no +1 here. */
809 leg->last_seq_recv += relative_seq;
811 /* Mark this data as validated for controlport and vanguards
812 * dropped cell handling */
813 if (CIRCUIT_IS_ORIGIN(in_circ)) {
814 circuit_read_valid_data(TO_ORIGIN_CIRCUIT(in_circ), rh->length);
817 return 0;
821 * Process an incoming relay cell for conflux. Called from
822 * connection_edge_process_relay_cell().
824 * Returns true if the conflux system now has well-ordered cells to deliver
825 * to streams, false otherwise.
827 bool
828 conflux_process_cell(conflux_t *cfx, circuit_t *in_circ,
829 crypt_path_t *layer_hint, cell_t *cell)
831 // TODO-329-TUNING: Temporarily validate legs here. We can remove
832 // this after tuning is complete.
833 conflux_validate_legs(cfx);
835 conflux_leg_t *leg = conflux_get_leg(cfx, in_circ);
836 if (!leg) {
837 log_warn(LD_BUG, "Got a conflux cell on a circuit without "
838 "conflux leg. Closing circuit.");
839 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
840 return false;
843 /* We need to make sure this cell came from the expected hop, or
844 * else it could be a data corruption attack from a middle node. */
845 if (!conflux_validate_source_hop(in_circ, layer_hint)) {
846 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
847 return false;
850 /* Update the running absolute sequence number */
851 leg->last_seq_recv++;
853 /* If this cell is next, fast-path it by processing the cell in-place */
854 if (leg->last_seq_recv == cfx->last_seq_delivered + 1) {
855 /* The cell is now ready to be processed, and rest of the queue should
856 * now be checked for remaining elements */
857 cfx->last_seq_delivered++;
858 return true;
859 } else if (BUG(leg->last_seq_recv <= cfx->last_seq_delivered)) {
860 log_warn(LD_BUG, "Got a conflux cell with a sequence number "
861 "less than the last delivered. Closing circuit.");
862 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
863 return false;
864 } else {
865 conflux_cell_t *c_cell = tor_malloc_zero(sizeof(conflux_cell_t));
866 c_cell->seq = leg->last_seq_recv;
868 memcpy(&c_cell->cell, cell, sizeof(cell_t));
870 smartlist_pqueue_add(cfx->ooo_q, conflux_queue_cmp,
871 offsetof(conflux_cell_t, heap_idx), c_cell);
872 total_ooo_q_bytes += sizeof(cell_t);
874 /* This cell should not be processed yet, and the queue is not ready
875 * to process because the next absolute seqnum has not yet arrived */
876 return false;
881 * Dequeue the top cell from our queue.
883 * Returns the cell as a conflux_cell_t, or NULL if the queue is empty
884 * or has a hole.
886 conflux_cell_t *
887 conflux_dequeue_cell(conflux_t *cfx)
889 conflux_cell_t *top = NULL;
890 if (smartlist_len(cfx->ooo_q) == 0)
891 return NULL;
893 top = smartlist_get(cfx->ooo_q, 0);
895 /* If the top cell is the next sequence number we need, then
896 * pop and return it. */
897 if (top->seq == cfx->last_seq_delivered+1) {
898 smartlist_pqueue_pop(cfx->ooo_q, conflux_queue_cmp,
899 offsetof(conflux_cell_t, heap_idx));
900 total_ooo_q_bytes -= sizeof(cell_t);
901 cfx->last_seq_delivered++;
902 return top;
903 } else {
904 return NULL;