fix typos/etc found while starting to look at scheduler
[tor.git] / src / or / scheduler.c
blob6ce7cb1e07b85177bcb752dc0547e673a74968af
1 /* * Copyright (c) 2013-2017, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 #include "or.h"
5 #include "config.h"
7 #include "compat_libevent.h"
8 #define SCHEDULER_PRIVATE_
9 #define SCHEDULER_KIST_PRIVATE
10 #include "scheduler.h"
12 #include <event2/event.h>
14 /**
15 * \file scheduler.c
16 * \brief Channel scheduling system: decides which channels should send and
17 * receive when.
19 * This module is the global/common parts of the scheduling system. This system
20 * is what decides what channels get to send cells on their circuits and when.
22 * Terms:
23 * - "Scheduling system": the collection of scheduler*.{h,c} files and their
24 * aggregate behavior.
25 * - "Scheduler implementation": a scheduler_t. The scheduling system has one
26 * active scheduling implementation at a time.
28 * In this file you will find state that any scheduler implmentation can have
29 * access to as well as the functions the rest of Tor uses to interact with the
30 * scheduling system.
32 * The earliest versions of Tor approximated a kind of round-robin system
33 * among active connections, but only approximated it. It would only consider
34 * one connection (roughly equal to a channel in today's terms) at a time, and
35 * thus could only prioritize circuits against others on the same connection.
37 * Then in response to the KIST paper[0], Tor implemented a global
38 * circuit scheduler. It was supposed to prioritize circuits across many
39 * channels, but wasn't effective. It is preserved in scheduler_vanilla.c.
41 * [0]: http://www.robgjansen.com/publications/kist-sec2014.pdf
43 * Then we actually got around to implementing KIST for real. We decided to
44 * modularize the scheduler so new ones can be implemented. You can find KIST
45 * in scheduler_kist.c.
47 * Channels have one of four scheduling states based on whether or not they
48 * have cells to send and whether or not they are able to send.
50 * <ol>
51 * <li>
52 * Not open for writes, no cells to send.
53 * <ul><li> Not much to do here, and the channel will have scheduler_state
54 * == SCHED_CHAN_IDLE
55 * <li> Transitions from:
56 * <ul>
57 * <li>Open for writes/has cells by simultaneously draining all circuit
58 * queues and filling the output buffer.
59 * </ul>
60 * <li> Transitions to:
61 * <ul>
62 * <li> Not open for writes/has cells by arrival of cells on an attached
63 * circuit (this would be driven from append_cell_to_circuit_queue())
64 * <li> Open for writes/no cells by a channel type specific path;
65 * driven from connection_or_flushed_some() for channel_tls_t.
66 * </ul>
67 * </ul>
69 * <li> Open for writes, no cells to send
70 * <ul>
71 * <li>Not much here either; this will be the state an idle but open
72 * channel can be expected to settle in. It will have scheduler_state
73 * == SCHED_CHAN_WAITING_FOR_CELLS
74 * <li> Transitions from:
75 * <ul>
76 * <li>Not open for writes/no cells by flushing some of the output
77 * buffer.
78 * <li>Open for writes/has cells by the scheduler moving cells from
79 * circuit queues to channel output queue, but not having enough
80 * to fill the output queue.
81 * </ul>
82 * <li> Transitions to:
83 * <ul>
84 * <li>Open for writes/has cells by arrival of new cells on an attached
85 * circuit, in append_cell_to_circuit_queue()
86 * </ul>
87 * </ul>
89 * <li>Not open for writes, cells to send
90 * <ul>
91 * <li>This is the state of a busy circuit limited by output bandwidth;
92 * cells have piled up in the circuit queues waiting to be relayed.
93 * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE.
94 * <li> Transitions from:
95 * <ul>
96 * <li>Not open for writes/no cells by arrival of cells on an attached
97 * circuit
98 * <li>Open for writes/has cells by filling an output buffer without
99 * draining all cells from attached circuits
100 * </ul>
101 * <li> Transitions to:
102 * <ul>
103 * <li>Opens for writes/has cells by draining some of the output buffer
104 * via the connection_or_flushed_some() path (for channel_tls_t).
105 * </ul>
106 * </ul>
108 * <li>Open for writes, cells to send
109 * <ul>
110 * <li>This connection is ready to relay some cells and waiting for
111 * the scheduler to choose it. The channel will have scheduler_state ==
112 * SCHED_CHAN_PENDING.
113 * <li>Transitions from:
114 * <ul>
115 * <li>Not open for writes/has cells by the connection_or_flushed_some()
116 * path
117 * <li>Open for writes/no cells by the append_cell_to_circuit_queue()
118 * path
119 * </ul>
120 * <li> Transitions to:
121 * <ul>
122 * <li>Not open for writes/no cells by draining all circuit queues and
123 * simultaneously filling the output buffer.
124 * <li>Not open for writes/has cells by writing enough cells to fill the
125 * output buffer
126 * <li>Open for writes/no cells by draining all attached circuit queues
127 * without also filling the output buffer
128 * </ul>
129 * </ul>
130 * </ol>
132 * Other event-driven parts of the code move channels between these scheduling
133 * states by calling scheduler functions. The scheduling system builds up a
134 * list of channels in the SCHED_CHAN_PENDING state that the scheduler
135 * implementation should then use when it runs. Scheduling implementations need
136 * to properly update channel states during their scheduler_t->run() function
137 * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING
138 * to any other state.
140 * The remainder of this file is a small amount of state that any scheduler
141 * implementation should have access to, and the functions the rest of Tor uses
142 * to interact with the scheduling system.
145 /*****************************************************************************
146 * Scheduling system state
148 * State that can be accessed from any scheduler implementation (but not
149 * outside the scheduling system)
150 *****************************************************************************/
152 /** DOCDOC */
153 STATIC const scheduler_t *the_scheduler;
156 * We keep a list of channels that are pending - i.e, have cells to write
157 * and can accept them to send. The enum scheduler_state in channel_t
158 * is reserved for our use.
160 * Priority queue of channels that can write and have cells (pending work)
162 STATIC smartlist_t *channels_pending = NULL;
165 * This event runs the scheduler from its callback, and is manually
166 * activated whenever a channel enters open for writes/cells to send.
168 STATIC struct event *run_sched_ev = NULL;
170 /*****************************************************************************
171 * Scheduling system static function definitions
173 * Functions that can only be accessed from this file.
174 *****************************************************************************/
177 * Scheduler event callback; this should get triggered once per event loop
178 * if any scheduling work was created during the event loop.
180 static void
181 scheduler_evt_callback(evutil_socket_t fd, short events, void *arg)
183 (void) fd;
184 (void) events;
185 (void) arg;
187 log_debug(LD_SCHED, "Scheduler event callback called");
189 /* Run the scheduler. This is a mandatory function. */
191 /* We might as well assert on this. If this function doesn't exist, no cells
192 * are getting scheduled. Things are very broken. scheduler_t says the run()
193 * function is mandatory. */
194 tor_assert(the_scheduler->run);
195 the_scheduler->run();
197 /* Schedule itself back in if it has more work. */
199 /* Again, might as well assert on this mandatory scheduler_t function. If it
200 * doesn't exist, there's no way to tell libevent to run the scheduler again
201 * in the future. */
202 tor_assert(the_scheduler->schedule);
203 the_scheduler->schedule();
206 /*****************************************************************************
207 * Scheduling system private function definitions
209 * Functions that can only be accessed from scheduler*.c
210 *****************************************************************************/
212 /** Return the pending channel list. */
213 smartlist_t *
214 get_channels_pending(void)
216 return channels_pending;
219 /** Comparison function to use when sorting pending channels. */
220 MOCK_IMPL(int,
221 scheduler_compare_channels, (const void *c1_v, const void *c2_v))
223 const channel_t *c1 = NULL, *c2 = NULL;
224 /* These are a workaround for -Wbad-function-cast throwing a fit */
225 const circuitmux_policy_t *p1, *p2;
226 uintptr_t p1_i, p2_i;
228 tor_assert(c1_v);
229 tor_assert(c2_v);
231 c1 = (const channel_t *)(c1_v);
232 c2 = (const channel_t *)(c2_v);
234 if (c1 != c2) {
235 if (circuitmux_get_policy(c1->cmux) ==
236 circuitmux_get_policy(c2->cmux)) {
237 /* Same cmux policy, so use the mux comparison */
238 return circuitmux_compare_muxes(c1->cmux, c2->cmux);
239 } else {
241 * Different policies; not important to get this edge case perfect
242 * because the current code never actually gives different channels
243 * different cmux policies anyway. Just use this arbitrary but
244 * definite choice.
246 p1 = circuitmux_get_policy(c1->cmux);
247 p2 = circuitmux_get_policy(c2->cmux);
248 p1_i = (uintptr_t)p1;
249 p2_i = (uintptr_t)p2;
251 return (p1_i < p2_i) ? -1 : 1;
253 } else {
254 /* c1 == c2, so always equal */
255 return 0;
259 /*****************************************************************************
260 * Scheduling system global functions
262 * Functions that can be accessed from anywhere in Tor.
263 *****************************************************************************/
265 /** Using the global options, select the scheduler we should be using. */
266 static void
267 select_scheduler(void)
269 const char *chosen_sched_type = NULL;
271 #ifdef TOR_UNIT_TESTS
272 /* This is hella annoying to set in the options for every test that passes
273 * through the scheduler and there are many so if we don't explicitly have
274 * a list of types set, just put the vanilla one. */
275 if (get_options()->SchedulerTypes_ == NULL) {
276 the_scheduler = get_vanilla_scheduler();
277 return;
279 #endif /* defined(TOR_UNIT_TESTS) */
281 /* This list is ordered that is first entry has the first priority. Thus, as
282 * soon as we find a scheduler type that we can use, we use it and stop. */
283 SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) {
284 switch (*type) {
285 case SCHEDULER_VANILLA:
286 the_scheduler = get_vanilla_scheduler();
287 chosen_sched_type = "Vanilla";
288 goto end;
289 case SCHEDULER_KIST:
290 if (!scheduler_can_use_kist()) {
291 #ifdef HAVE_KIST_SUPPORT
292 if (get_options()->KISTSchedRunInterval == -1) {
293 log_info(LD_SCHED, "Scheduler type KIST can not be used. It is "
294 "disabled because KISTSchedRunInterval=-1");
295 } else {
296 log_notice(LD_SCHED, "Scheduler type KIST has been disabled by "
297 "the consensus.");
299 #else /* !(defined(HAVE_KIST_SUPPORT)) */
300 log_info(LD_SCHED, "Scheduler type KIST not built in");
301 #endif /* defined(HAVE_KIST_SUPPORT) */
302 continue;
304 the_scheduler = get_kist_scheduler();
305 chosen_sched_type = "KIST";
306 scheduler_kist_set_full_mode();
307 goto end;
308 case SCHEDULER_KIST_LITE:
309 chosen_sched_type = "KISTLite";
310 the_scheduler = get_kist_scheduler();
311 scheduler_kist_set_lite_mode();
312 goto end;
313 default:
314 /* Our option validation should have caught this. */
315 tor_assert_unreached();
317 } SMARTLIST_FOREACH_END(type);
319 end:
320 log_notice(LD_CONFIG, "Scheduler type %s has been enabled.",
321 chosen_sched_type);
325 * Helper function called from a few different places. It changes the
326 * scheduler implementation, if necessary. And if it did, it then tells the
327 * old one to free its state and the new one to initialize.
329 static void
330 set_scheduler(void)
332 const scheduler_t *old_scheduler = the_scheduler;
334 /* From the options, select the scheduler type to set. */
335 select_scheduler();
337 if (old_scheduler != the_scheduler) {
338 /* Allow the old scheduler to clean up, if needed. */
339 if (old_scheduler && old_scheduler->free_all) {
340 old_scheduler->free_all();
343 /* Initialize the new scheduler. */
344 if (the_scheduler->init) {
345 the_scheduler->init();
351 * This is how the scheduling system is notified of Tor's configuration
352 * changing. For example: a SIGHUP was issued.
354 void
355 scheduler_conf_changed(void)
357 /* Let the scheduler decide what it should do. */
358 set_scheduler();
360 /* Then tell the (possibly new) scheduler that we have new options. */
361 if (the_scheduler->on_new_options) {
362 the_scheduler->on_new_options();
367 * Whenever we get a new consensus, this function is called.
369 void
370 scheduler_notify_networkstatus_changed(const networkstatus_t *old_c,
371 const networkstatus_t *new_c)
373 /* Maybe the consensus param made us change the scheduler. */
374 set_scheduler();
376 /* Then tell the (possibly new) scheduler that we have a new consensus */
377 if (the_scheduler->on_new_consensus) {
378 the_scheduler->on_new_consensus(old_c, new_c);
383 * Free everything scheduling-related from main.c. Note this is only called
384 * when Tor is shutting down, while scheduler_t->free_all() is called both when
385 * Tor is shutting down and when we are switching schedulers.
387 void
388 scheduler_free_all(void)
390 log_debug(LD_SCHED, "Shutting down scheduler");
392 if (run_sched_ev) {
393 if (event_del(run_sched_ev) < 0) {
394 log_warn(LD_BUG, "Problem deleting run_sched_ev");
396 tor_event_free(run_sched_ev);
397 run_sched_ev = NULL;
400 if (channels_pending) {
401 /* We don't have ownership of the objects in this list. */
402 smartlist_free(channels_pending);
403 channels_pending = NULL;
406 if (the_scheduler && the_scheduler->free_all) {
407 the_scheduler->free_all();
409 the_scheduler = NULL;
412 /** Mark a channel as no longer ready to accept writes. */
413 MOCK_IMPL(void,
414 scheduler_channel_doesnt_want_writes,(channel_t *chan))
416 IF_BUG_ONCE(!chan) {
417 return;
419 IF_BUG_ONCE(!channels_pending) {
420 return;
423 /* If it's already in pending, we can put it in waiting_to_write */
424 if (chan->scheduler_state == SCHED_CHAN_PENDING) {
426 * It's in channels_pending, so it shouldn't be in any of
427 * the other lists. It can't write any more, so it goes to
428 * channels_waiting_to_write.
430 smartlist_pqueue_remove(channels_pending,
431 scheduler_compare_channels,
432 offsetof(channel_t, sched_heap_idx),
433 chan);
434 chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
435 log_debug(LD_SCHED,
436 "Channel " U64_FORMAT " at %p went from pending "
437 "to waiting_to_write",
438 U64_PRINTF_ARG(chan->global_identifier), chan);
439 } else {
441 * It's not in pending, so it can't become waiting_to_write; it's
442 * either not in any of the lists (nothing to do) or it's already in
443 * waiting_for_cells (remove it, can't write any more).
445 if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
446 chan->scheduler_state = SCHED_CHAN_IDLE;
447 log_debug(LD_SCHED,
448 "Channel " U64_FORMAT " at %p left waiting_for_cells",
449 U64_PRINTF_ARG(chan->global_identifier), chan);
454 /** Mark a channel as having waiting cells. */
455 MOCK_IMPL(void,
456 scheduler_channel_has_waiting_cells,(channel_t *chan))
458 IF_BUG_ONCE(!chan) {
459 return;
461 IF_BUG_ONCE(!channels_pending) {
462 return;
465 /* First, check if it's also writeable */
466 if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
468 * It's in channels_waiting_for_cells, so it shouldn't be in any of
469 * the other lists. It has waiting cells now, so it goes to
470 * channels_pending.
472 chan->scheduler_state = SCHED_CHAN_PENDING;
473 smartlist_pqueue_add(channels_pending,
474 scheduler_compare_channels,
475 offsetof(channel_t, sched_heap_idx),
476 chan);
477 log_debug(LD_SCHED,
478 "Channel " U64_FORMAT " at %p went from waiting_for_cells "
479 "to pending",
480 U64_PRINTF_ARG(chan->global_identifier), chan);
481 /* If we made a channel pending, we potentially have scheduling work to
482 * do. */
483 the_scheduler->schedule();
484 } else {
486 * It's not in waiting_for_cells, so it can't become pending; it's
487 * either not in any of the lists (we add it to waiting_to_write)
488 * or it's already in waiting_to_write or pending (we do nothing)
490 if (!(chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE ||
491 chan->scheduler_state == SCHED_CHAN_PENDING)) {
492 chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
493 log_debug(LD_SCHED,
494 "Channel " U64_FORMAT " at %p entered waiting_to_write",
495 U64_PRINTF_ARG(chan->global_identifier), chan);
500 /** Add the scheduler event to the set of pending events with next_run being
501 * the longest time libevent should wait before triggering the event. */
502 void
503 scheduler_ev_add(const struct timeval *next_run)
505 tor_assert(run_sched_ev);
506 tor_assert(next_run);
507 if (BUG(event_add(run_sched_ev, next_run) < 0)) {
508 log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: "
509 "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec);
510 return;
514 /** Make the scheduler event active with the given flags. */
515 void
516 scheduler_ev_active(int flags)
518 tor_assert(run_sched_ev);
519 event_active(run_sched_ev, flags, 1);
523 * Initialize everything scheduling-related from config.c. Note this is only
524 * called when Tor is starting up, while scheduler_t->init() is called both
525 * when Tor is starting up and when we are switching schedulers.
527 void
528 scheduler_init(void)
530 log_debug(LD_SCHED, "Initting scheduler");
532 // Two '!' because we really do want to check if the pointer is non-NULL
533 IF_BUG_ONCE(!!run_sched_ev) {
534 log_warn(LD_SCHED, "We should not already have a libevent scheduler event."
535 "I'll clean the old one up, but this is odd.");
536 tor_event_free(run_sched_ev);
537 run_sched_ev = NULL;
539 run_sched_ev = tor_event_new(tor_libevent_get_base(), -1,
540 0, scheduler_evt_callback, NULL);
541 channels_pending = smartlist_new();
543 set_scheduler();
547 * If a channel is going away, this is how the scheduling system is informed
548 * so it can do any freeing necessary. This ultimately calls
549 * scheduler_t->on_channel_free() so the current scheduler can release any
550 * state specific to this channel.
552 MOCK_IMPL(void,
553 scheduler_release_channel,(channel_t *chan))
555 IF_BUG_ONCE(!chan) {
556 return;
558 IF_BUG_ONCE(!channels_pending) {
559 return;
562 if (chan->scheduler_state == SCHED_CHAN_PENDING) {
563 if (smartlist_pos(channels_pending, chan) == -1) {
564 log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " "
565 "but it wasn't in channels_pending",
566 chan->global_identifier);
567 } else {
568 smartlist_pqueue_remove(channels_pending,
569 scheduler_compare_channels,
570 offsetof(channel_t, sched_heap_idx),
571 chan);
575 if (the_scheduler->on_channel_free) {
576 the_scheduler->on_channel_free(chan);
578 chan->scheduler_state = SCHED_CHAN_IDLE;
581 /** Mark a channel as ready to accept writes */
583 void
584 scheduler_channel_wants_writes(channel_t *chan)
586 IF_BUG_ONCE(!chan) {
587 return;
589 IF_BUG_ONCE(!channels_pending) {
590 return;
593 /* If it's already in waiting_to_write, we can put it in pending */
594 if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) {
596 * It can write now, so it goes to channels_pending.
598 log_debug(LD_SCHED, "chan=%" PRIu64 " became pending",
599 chan->global_identifier);
600 smartlist_pqueue_add(channels_pending,
601 scheduler_compare_channels,
602 offsetof(channel_t, sched_heap_idx),
603 chan);
604 chan->scheduler_state = SCHED_CHAN_PENDING;
605 log_debug(LD_SCHED,
606 "Channel " U64_FORMAT " at %p went from waiting_to_write "
607 "to pending",
608 U64_PRINTF_ARG(chan->global_identifier), chan);
609 /* We just made a channel pending, we have scheduling work to do. */
610 the_scheduler->schedule();
611 } else {
613 * It's not in SCHED_CHAN_WAITING_TO_WRITE, so it can't become pending;
614 * it's either idle and goes to WAITING_FOR_CELLS, or it's a no-op.
616 if (!(chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS ||
617 chan->scheduler_state == SCHED_CHAN_PENDING)) {
618 chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS;
619 log_debug(LD_SCHED,
620 "Channel " U64_FORMAT " at %p entered waiting_for_cells",
621 U64_PRINTF_ARG(chan->global_identifier), chan);
626 #ifdef TOR_UNIT_TESTS
629 * Notify scheduler that a channel's queue position may have changed.
631 void
632 scheduler_touch_channel(channel_t *chan)
634 IF_BUG_ONCE(!chan) {
635 return;
638 if (chan->scheduler_state == SCHED_CHAN_PENDING) {
639 /* Remove and re-add it */
640 smartlist_pqueue_remove(channels_pending,
641 scheduler_compare_channels,
642 offsetof(channel_t, sched_heap_idx),
643 chan);
644 smartlist_pqueue_add(channels_pending,
645 scheduler_compare_channels,
646 offsetof(channel_t, sched_heap_idx),
647 chan);
649 /* else no-op, since it isn't in the queue */
652 #endif /* defined(TOR_UNIT_TESTS) */