src/core/mainloop/mainloop.c

   1 /* Copyright (c) 2001 Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2018, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file mainloop.c
   9  * \brief Toplevel module. Handles signals, multiplexes between
  10  *     connections, implements main loop, and drives scheduled events.
  11  *
  12  * For the main loop itself; see run_main_loop_once().  It invokes the rest of
  13  * Tor mostly through Libevent callbacks.  Libevent callbacks can happen when
  14  * a timer elapses, a signal is received, a socket is ready to read or write,
  15  * or an event is manually activated.
  16  *
  17  * Most events in Tor are driven from these callbacks:
  18  *  <ul>
  19  *   <li>conn_read_callback() and conn_write_callback() here, which are
  20  *     invoked when a socket is ready to read or write respectively.
  21  *   <li>signal_callback(), which handles incoming signals.
  22  *  </ul>
  23  * Other events are used for specific purposes, or for building more complex
  24  * control structures.  If you search for usage of tor_libevent_new(), you
  25  * will find all the events that we construct in Tor.
  26  *
  27  * Tor has numerous housekeeping operations that need to happen
  28  * regularly. They are handled in different ways:
  29  * <ul>
  30  *   <li>The most frequent operations are handled after every read or write
  31  *    event, at the end of connection_handle_read() and
  32  *    connection_handle_write().
  33  *
  34  *   <li>The next most frequent operations happen after each invocation of the
  35  *     main loop, in run_main_loop_once().
  36  *
  37  *   <li>Once per second, we run all of the operations listed in
  38  *     second_elapsed_callback(), and in its child, run_scheduled_events().
  39  *
  40  *   <li>Once-a-second operations are handled in second_elapsed_callback().
  41  *
  42  *   <li>More infrequent operations take place based on the periodic event
  43  *     driver in periodic.c .  These are stored in the periodic_events[]
  44  *     table.
  45  * </ul>
  46  *
  47  **/
  48
  49 #define MAINLOOP_PRIVATE
  50 #include "core/or/or.h"
  51
  52 #include "app/config/config.h"
  53 #include "app/config/statefile.h"
  54 #include "app/main/ntmain.h"
  55 #include "core/mainloop/connection.h"
  56 #include "core/mainloop/cpuworker.h"
  57 #include "core/mainloop/mainloop.h"
  58 #include "core/mainloop/netstatus.h"
  59 #include "core/mainloop/periodic.h"
  60 #include "core/or/channel.h"
  61 #include "core/or/channelpadding.h"
  62 #include "core/or/channeltls.h"
  63 #include "core/or/circuitbuild.h"
  64 #include "core/or/circuitlist.h"
  65 #include "core/or/circuituse.h"
  66 #include "core/or/connection_edge.h"
  67 #include "core/or/connection_or.h"
  68 #include "core/or/dos.h"
  69 #include "core/or/status.h"
  70 #include "feature/client/addressmap.h"
  71 #include "feature/client/bridges.h"
  72 #include "feature/client/dnsserv.h"
  73 #include "feature/client/entrynodes.h"
  74 #include "feature/client/transports.h"
  75 #include "feature/control/control.h"
  76 #include "feature/dirauth/authmode.h"
  77 #include "feature/dirauth/reachability.h"
  78 #include "feature/dircache/consdiffmgr.h"
  79 #include "feature/dircache/dirserv.h"
  80 #include "feature/dircommon/directory.h"
  81 #include "feature/hibernate/hibernate.h"
  82 #include "feature/hs/hs_cache.h"
  83 #include "feature/hs/hs_client.h"
  84 #include "feature/hs/hs_service.h"
  85 #include "feature/nodelist/microdesc.h"
  86 #include "feature/nodelist/networkstatus.h"
  87 #include "feature/nodelist/nodelist.h"
  88 #include "feature/nodelist/routerlist.h"
  89 #include "feature/relay/dns.h"
  90 #include "feature/relay/routerkeys.h"
  91 #include "feature/relay/routermode.h"
  92 #include "feature/relay/selftest.h"
  93 #include "feature/rend/rendcache.h"
  94 #include "feature/rend/rendservice.h"
  95 #include "feature/stats/geoip_stats.h"
  96 #include "feature/stats/predict_ports.h"
  97 #include "feature/stats/rephist.h"
  98 #include "lib/container/buffers.h"
  99 #include "lib/crypt_ops/crypto_rand.h"
 100 #include "lib/err/backtrace.h"
 101 #include "lib/tls/buffers_tls.h"
 102
 103 #include "lib/net/buffers_net.h"
 104 #include "lib/evloop/compat_libevent.h"
 105
 106 #include <event2/event.h>
 107
 108 #include "feature/dirauth/dirvote.h"
 109 #include "feature/dirauth/authmode.h"
 110
 111 #include "core/or/cell_st.h"
 112 #include "core/or/entry_connection_st.h"
 113 #include "feature/nodelist/networkstatus_st.h"
 114 #include "core/or/or_connection_st.h"
 115 #include "app/config/or_state_st.h"
 116 #include "feature/nodelist/routerinfo_st.h"
 117 #include "core/or/socks_request_st.h"
 118
 119 #ifdef HAVE_UNISTD_H
 120 #include <unistd.h>
 121 #endif
 122
 123 #ifdef HAVE_SYSTEMD
 124 #   if defined(__COVERITY__) && !defined(__INCLUDE_LEVEL__)
 125 /* Systemd's use of gcc's __INCLUDE_LEVEL__ extension macro appears to confuse
 126  * Coverity. Here's a kludge to unconfuse it.
 127  */
 128 #   define __INCLUDE_LEVEL__ 2
 129 #endif /* defined(__COVERITY__) && !defined(__INCLUDE_LEVEL__) */
 130 #include <systemd/sd-daemon.h>
 131 #endif /* defined(HAVE_SYSTEMD) */
 132
 133 /* Token bucket for all traffic. */
 134 token_bucket_rw_t global_bucket;
 135
 136 /* Token bucket for relayed traffic. */
 137 token_bucket_rw_t global_relayed_bucket;
 138
 139 /* XXX we might want to keep stats about global_relayed_*_bucket too. Or not.*/
 140 /** How many bytes have we read since we started the process? */
 141 static uint64_t stats_n_bytes_read = 0;
 142 /** How many bytes have we written since we started the process? */
 143 static uint64_t stats_n_bytes_written = 0;
 144 /** What time did this process start up? */
 145 time_t time_of_process_start = 0;
 146 /** How many seconds have we been running? */
 147 static long stats_n_seconds_working = 0;
 148 /** How many times have we returned from the main loop successfully? */
 149 static uint64_t stats_n_main_loop_successes = 0;
 150 /** How many times have we received an error from the main loop? */
 151 static uint64_t stats_n_main_loop_errors = 0;
 152 /** How many times have we returned from the main loop with no events. */
 153 static uint64_t stats_n_main_loop_idle = 0;
 154
 155 /** How often will we honor SIGNEWNYM requests? */
 156 #define MAX_SIGNEWNYM_RATE 10
 157 /** When did we last process a SIGNEWNYM request? */
 158 static time_t time_of_last_signewnym = 0;
 159 /** Is there a signewnym request we're currently waiting to handle? */
 160 static int signewnym_is_pending = 0;
 161 /** Mainloop event for the deferred signewnym call. */
 162 static mainloop_event_t *handle_deferred_signewnym_ev = NULL;
 163 /** How many times have we called newnym? */
 164 static unsigned newnym_epoch = 0;
 165
 166 /** Smartlist of all open connections. */
 167 STATIC smartlist_t *connection_array = NULL;
 168 /** List of connections that have been marked for close and need to be freed
 169  * and removed from connection_array. */
 170 static smartlist_t *closeable_connection_lst = NULL;
 171 /** List of linked connections that are currently reading data into their
 172  * inbuf from their partner's outbuf. */
 173 static smartlist_t *active_linked_connection_lst = NULL;
 174 /** Flag: Set to true iff we entered the current libevent main loop via
 175  * <b>loop_once</b>. If so, there's no need to trigger a loopexit in order
 176  * to handle linked connections. */
 177 static int called_loop_once = 0;
 178 /** Flag: if true, it's time to shut down, so the main loop should exit as
 179  * soon as possible.
 180  */
 181 static int main_loop_should_exit = 0;
 182 /** The return value that the main loop should yield when it exits, if
 183  * main_loop_should_exit is true.
 184  */
 185 static int main_loop_exit_value = 0;
 186
 187 /** We set this to 1 when we've opened a circuit, so we can print a log
 188  * entry to inform the user that Tor is working.  We set it to 0 when
 189  * we think the fact that we once opened a circuit doesn't mean we can do so
 190  * any longer (a big time jump happened, when we notice our directory is
 191  * heinously out-of-date, etc.
 192  */
 193 static int can_complete_circuits = 0;
 194
 195 /** How often do we check for router descriptors that we should download
 196  * when we have too little directory info? */
 197 #define GREEDY_DESCRIPTOR_RETRY_INTERVAL (10)
 198 /** How often do we check for router descriptors that we should download
 199  * when we have enough directory info? */
 200 #define LAZY_DESCRIPTOR_RETRY_INTERVAL (60)
 201
 202 static int conn_close_if_marked(int i);
 203 static int run_main_loop_until_done(void);
 204 static void connection_start_reading_from_linked_conn(connection_t *conn);
 205 static int connection_should_read_from_linked_conn(connection_t *conn);
 206 static void conn_read_callback(evutil_socket_t fd, short event, void *_conn);
 207 static void conn_write_callback(evutil_socket_t fd, short event, void *_conn);
 208 static void shutdown_did_not_work_callback(evutil_socket_t fd, short event,
 209                                            void *arg) ATTR_NORETURN;
 210
 211 /****************************************************************************
 212  *
 213  * This section contains accessors and other methods on the connection_array
 214  * variables (which are global within this file and unavailable outside it).
 215  *
 216  ****************************************************************************/
 217
 218 /** Return 1 if we have successfully built a circuit, and nothing has changed
 219  * to make us think that maybe we can't.
 220  */
 221 int
 222 have_completed_a_circuit(void)
 223 {
 224   return can_complete_circuits;
 225 }
 226
 227 /** Note that we have successfully built a circuit, so that reachability
 228  * testing and introduction points and so on may be attempted. */
 229 void
 230 note_that_we_completed_a_circuit(void)
 231 {
 232   can_complete_circuits = 1;
 233 }
 234
 235 /** Note that something has happened (like a clock jump, or DisableNetwork) to
 236  * make us think that maybe we can't complete circuits. */
 237 void
 238 note_that_we_maybe_cant_complete_circuits(void)
 239 {
 240   can_complete_circuits = 0;
 241 }
 242
 243 /** Add <b>conn</b> to the array of connections that we can poll on.  The
 244  * connection's socket must be set; the connection starts out
 245  * non-reading and non-writing.
 246  */
 247 int
 248 connection_add_impl(connection_t *conn, int is_connecting)
 249 {
 250   tor_assert(conn);
 251   tor_assert(SOCKET_OK(conn->s) ||
 252              conn->linked ||
 253              (conn->type == CONN_TYPE_AP &&
 254               TO_EDGE_CONN(conn)->is_dns_request));
 255
 256   tor_assert(conn->conn_array_index == -1); /* can only connection_add once */
 257   conn->conn_array_index = smartlist_len(connection_array);
 258   smartlist_add(connection_array, conn);
 259
 260   (void) is_connecting;
 261
 262   if (SOCKET_OK(conn->s) || conn->linked) {
 263     conn->read_event = tor_event_new(tor_libevent_get_base(),
 264          conn->s, EV_READ|EV_PERSIST, conn_read_callback, conn);
 265     conn->write_event = tor_event_new(tor_libevent_get_base(),
 266          conn->s, EV_WRITE|EV_PERSIST, conn_write_callback, conn);
 267     /* XXXX CHECK FOR NULL RETURN! */
 268   }
 269
 270   log_debug(LD_NET,"new conn type %s, socket %d, address %s, n_conns %d.",
 271             conn_type_to_string(conn->type), (int)conn->s, conn->address,
 272             smartlist_len(connection_array));
 273
 274   return 0;
 275 }
 276
 277 /** Tell libevent that we don't care about <b>conn</b> any more. */
 278 void
 279 connection_unregister_events(connection_t *conn)
 280 {
 281   if (conn->read_event) {
 282     if (event_del(conn->read_event))
 283       log_warn(LD_BUG, "Error removing read event for %d", (int)conn->s);
 284     tor_free(conn->read_event);
 285   }
 286   if (conn->write_event) {
 287     if (event_del(conn->write_event))
 288       log_warn(LD_BUG, "Error removing write event for %d", (int)conn->s);
 289     tor_free(conn->write_event);
 290   }
 291   if (conn->type == CONN_TYPE_AP_DNS_LISTENER) {
 292     dnsserv_close_listener(conn);
 293   }
 294 }
 295
 296 /** Remove the connection from the global list, and remove the
 297  * corresponding poll entry.  Calling this function will shift the last
 298  * connection (if any) into the position occupied by conn.
 299  */
 300 int
 301 connection_remove(connection_t *conn)
 302 {
 303   int current_index;
 304   connection_t *tmp;
 305
 306   tor_assert(conn);
 307
 308   log_debug(LD_NET,"removing socket %d (type %s), n_conns now %d",
 309             (int)conn->s, conn_type_to_string(conn->type),
 310             smartlist_len(connection_array));
 311
 312   if (conn->type == CONN_TYPE_AP && conn->socket_family == AF_UNIX) {
 313     log_info(LD_NET, "Closing SOCKS Unix socket connection");
 314   }
 315
 316   control_event_conn_bandwidth(conn);
 317
 318   tor_assert(conn->conn_array_index >= 0);
 319   current_index = conn->conn_array_index;
 320   connection_unregister_events(conn); /* This is redundant, but cheap. */
 321   if (current_index == smartlist_len(connection_array)-1) { /* at the end */
 322     smartlist_del(connection_array, current_index);
 323     return 0;
 324   }
 325
 326   /* replace this one with the one at the end */
 327   smartlist_del(connection_array, current_index);
 328   tmp = smartlist_get(connection_array, current_index);
 329   tmp->conn_array_index = current_index;
 330
 331   return 0;
 332 }
 333
 334 /** If <b>conn</b> is an edge conn, remove it from the list
 335  * of conn's on this circuit. If it's not on an edge,
 336  * flush and send destroys for all circuits on this conn.
 337  *
 338  * Remove it from connection_array (if applicable) and
 339  * from closeable_connection_list.
 340  *
 341  * Then free it.
 342  */
 343 static void
 344 connection_unlink(connection_t *conn)
 345 {
 346   connection_about_to_close_connection(conn);
 347   if (conn->conn_array_index >= 0) {
 348     connection_remove(conn);
 349   }
 350   if (conn->linked_conn) {
 351     conn->linked_conn->linked_conn = NULL;
 352     if (! conn->linked_conn->marked_for_close &&
 353         conn->linked_conn->reading_from_linked_conn)
 354       connection_start_reading(conn->linked_conn);
 355     conn->linked_conn = NULL;
 356   }
 357   smartlist_remove(closeable_connection_lst, conn);
 358   smartlist_remove(active_linked_connection_lst, conn);
 359   if (conn->type == CONN_TYPE_EXIT) {
 360     assert_connection_edge_not_dns_pending(TO_EDGE_CONN(conn));
 361   }
 362   if (conn->type == CONN_TYPE_OR) {
 363     if (!tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest))
 364       connection_or_clear_identity(TO_OR_CONN(conn));
 365     /* connection_unlink() can only get called if the connection
 366      * was already on the closeable list, and it got there by
 367      * connection_mark_for_close(), which was called from
 368      * connection_or_close_normally() or
 369      * connection_or_close_for_error(), so the channel should
 370      * already be in CHANNEL_STATE_CLOSING, and then the
 371      * connection_about_to_close_connection() goes to
 372      * connection_or_about_to_close(), which calls channel_closed()
 373      * to notify the channel_t layer, and closed the channel, so
 374      * nothing more to do here to deal with the channel associated
 375      * with an orconn.
 376      */
 377   }
 378   connection_free(conn);
 379 }
 380
 381 /**
 382  * Callback: used to activate read events for all linked connections, so
 383  * libevent knows to call their read callbacks.  This callback run as a
 384  * postloop event, so that the events _it_ activates don't happen until
 385  * Libevent has a chance to check for other events.
 386  */
 387 static void
 388 schedule_active_linked_connections_cb(mainloop_event_t *event, void *arg)
 389 {
 390   (void)event;
 391   (void)arg;
 392
 393   /* All active linked conns should get their read events activated,
 394    * so that libevent knows to run their callbacks. */
 395   SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn,
 396                     event_active(conn->read_event, EV_READ, 1));
 397 }
 398
 399 /** Event that invokes schedule_active_linked_connections_cb. */
 400 static mainloop_event_t *schedule_active_linked_connections_event = NULL;
 401
 402 /** Initialize the global connection list, closeable connection list,
 403  * and active connection list. */
 404 void
 405 tor_init_connection_lists(void)
 406 {
 407   if (!connection_array)
 408     connection_array = smartlist_new();
 409   if (!closeable_connection_lst)
 410     closeable_connection_lst = smartlist_new();
 411   if (!active_linked_connection_lst)
 412     active_linked_connection_lst = smartlist_new();
 413 }
 414
 415 /** Schedule <b>conn</b> to be closed. **/
 416 void
 417 add_connection_to_closeable_list(connection_t *conn)
 418 {
 419   tor_assert(!smartlist_contains(closeable_connection_lst, conn));
 420   tor_assert(conn->marked_for_close);
 421   assert_connection_ok(conn, time(NULL));
 422   smartlist_add(closeable_connection_lst, conn);
 423   mainloop_schedule_postloop_cleanup();
 424 }
 425
 426 /** Return 1 if conn is on the closeable list, else return 0. */
 427 int
 428 connection_is_on_closeable_list(connection_t *conn)
 429 {
 430   return smartlist_contains(closeable_connection_lst, conn);
 431 }
 432
 433 /** Return true iff conn is in the current poll array. */
 434 int
 435 connection_in_array(connection_t *conn)
 436 {
 437   return smartlist_contains(connection_array, conn);
 438 }
 439
 440 /** Set <b>*array</b> to an array of all connections. <b>*array</b> must not
 441  * be modified.
 442  */
 443 MOCK_IMPL(smartlist_t *,
 444 get_connection_array, (void))
 445 {
 446   if (!connection_array)
 447     connection_array = smartlist_new();
 448   return connection_array;
 449 }
 450
 451 /**
 452  * Return the amount of network traffic read, in bytes, over the life of this
 453  * process.
 454  */
 455 MOCK_IMPL(uint64_t,
 456 get_bytes_read,(void))
 457 {
 458   return stats_n_bytes_read;
 459 }
 460
 461 /**
 462  * Return the amount of network traffic read, in bytes, over the life of this
 463  * process.
 464  */
 465 MOCK_IMPL(uint64_t,
 466 get_bytes_written,(void))
 467 {
 468   return stats_n_bytes_written;
 469 }
 470
 471 /**
 472  * Increment the amount of network traffic read and written, over the life of
 473  * this process.
 474  */
 475 void
 476 stats_increment_bytes_read_and_written(uint64_t r, uint64_t w)
 477 {
 478   stats_n_bytes_read += r;
 479   stats_n_bytes_written += w;
 480 }
 481
 482 /** Set the event mask on <b>conn</b> to <b>events</b>.  (The event
 483  * mask is a bitmask whose bits are READ_EVENT and WRITE_EVENT)
 484  */
 485 void
 486 connection_watch_events(connection_t *conn, watchable_events_t events)
 487 {
 488   if (events & READ_EVENT)
 489     connection_start_reading(conn);
 490   else
 491     connection_stop_reading(conn);
 492
 493   if (events & WRITE_EVENT)
 494     connection_start_writing(conn);
 495   else
 496     connection_stop_writing(conn);
 497 }
 498
 499 /** Return true iff <b>conn</b> is listening for read events. */
 500 int
 501 connection_is_reading(connection_t *conn)
 502 {
 503   tor_assert(conn);
 504
 505   return conn->reading_from_linked_conn ||
 506     (conn->read_event && event_pending(conn->read_event, EV_READ, NULL));
 507 }
 508
 509 /** Reset our main loop counters. */
 510 void
 511 reset_main_loop_counters(void)
 512 {
 513   stats_n_main_loop_successes = 0;
 514   stats_n_main_loop_errors = 0;
 515   stats_n_main_loop_idle = 0;
 516 }
 517
 518 /** Increment the main loop success counter. */
 519 static void
 520 increment_main_loop_success_count(void)
 521 {
 522   ++stats_n_main_loop_successes;
 523 }
 524
 525 /** Get the main loop success counter. */
 526 uint64_t
 527 get_main_loop_success_count(void)
 528 {
 529   return stats_n_main_loop_successes;
 530 }
 531
 532 /** Increment the main loop error counter. */
 533 static void
 534 increment_main_loop_error_count(void)
 535 {
 536   ++stats_n_main_loop_errors;
 537 }
 538
 539 /** Get the main loop error counter. */
 540 uint64_t
 541 get_main_loop_error_count(void)
 542 {
 543   return stats_n_main_loop_errors;
 544 }
 545
 546 /** Increment the main loop idle counter. */
 547 static void
 548 increment_main_loop_idle_count(void)
 549 {
 550   ++stats_n_main_loop_idle;
 551 }
 552
 553 /** Get the main loop idle counter. */
 554 uint64_t
 555 get_main_loop_idle_count(void)
 556 {
 557   return stats_n_main_loop_idle;
 558 }
 559
 560 /** Check whether <b>conn</b> is correct in having (or not having) a
 561  * read/write event (passed in <b>ev</b>). On success, return 0. On failure,
 562  * log a warning and return -1. */
 563 static int
 564 connection_check_event(connection_t *conn, struct event *ev)
 565 {
 566   int bad;
 567
 568   if (conn->type == CONN_TYPE_AP && TO_EDGE_CONN(conn)->is_dns_request) {
 569     /* DNS requests which we launch through the dnsserv.c module do not have
 570      * any underlying socket or any underlying linked connection, so they
 571      * shouldn't have any attached events either.
 572      */
 573     bad = ev != NULL;
 574   } else {
 575     /* Everything else should have an underlying socket, or a linked
 576      * connection (which is also tracked with a read_event/write_event pair).
 577      */
 578     bad = ev == NULL;
 579   }
 580
 581   if (bad) {
 582     log_warn(LD_BUG, "Event missing on connection %p [%s;%s]. "
 583              "socket=%d. linked=%d. "
 584              "is_dns_request=%d. Marked_for_close=%s:%d",
 585              conn,
 586              conn_type_to_string(conn->type),
 587              conn_state_to_string(conn->type, conn->state),
 588              (int)conn->s, (int)conn->linked,
 589              (conn->type == CONN_TYPE_AP &&
 590                                TO_EDGE_CONN(conn)->is_dns_request),
 591              conn->marked_for_close_file ? conn->marked_for_close_file : "-",
 592              conn->marked_for_close
 593              );
 594     log_backtrace(LOG_WARN, LD_BUG, "Backtrace attached.");
 595     return -1;
 596   }
 597   return 0;
 598 }
 599
 600 /** Tell the main loop to stop notifying <b>conn</b> of any read events. */
 601 MOCK_IMPL(void,
 602 connection_stop_reading,(connection_t *conn))
 603 {
 604   tor_assert(conn);
 605
 606   if (connection_check_event(conn, conn->read_event) < 0) {
 607     return;
 608   }
 609
 610   if (conn->linked) {
 611     conn->reading_from_linked_conn = 0;
 612     connection_stop_reading_from_linked_conn(conn);
 613   } else {
 614     if (event_del(conn->read_event))
 615       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 616                "to unwatched: %s",
 617                (int)conn->s,
 618                tor_socket_strerror(tor_socket_errno(conn->s)));
 619   }
 620 }
 621
 622 /** Tell the main loop to start notifying <b>conn</b> of any read events. */
 623 MOCK_IMPL(void,
 624 connection_start_reading,(connection_t *conn))
 625 {
 626   tor_assert(conn);
 627
 628   if (connection_check_event(conn, conn->read_event) < 0) {
 629     return;
 630   }
 631
 632   if (conn->linked) {
 633     conn->reading_from_linked_conn = 1;
 634     if (connection_should_read_from_linked_conn(conn))
 635       connection_start_reading_from_linked_conn(conn);
 636   } else {
 637     if (event_add(conn->read_event, NULL))
 638       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 639                "to watched: %s",
 640                (int)conn->s,
 641                tor_socket_strerror(tor_socket_errno(conn->s)));
 642   }
 643 }
 644
 645 /** Return true iff <b>conn</b> is listening for write events. */
 646 int
 647 connection_is_writing(connection_t *conn)
 648 {
 649   tor_assert(conn);
 650
 651   return conn->writing_to_linked_conn ||
 652     (conn->write_event && event_pending(conn->write_event, EV_WRITE, NULL));
 653 }
 654
 655 /** Tell the main loop to stop notifying <b>conn</b> of any write events. */
 656 MOCK_IMPL(void,
 657 connection_stop_writing,(connection_t *conn))
 658 {
 659   tor_assert(conn);
 660
 661   if (connection_check_event(conn, conn->write_event) < 0) {
 662     return;
 663   }
 664
 665   if (conn->linked) {
 666     conn->writing_to_linked_conn = 0;
 667     if (conn->linked_conn)
 668       connection_stop_reading_from_linked_conn(conn->linked_conn);
 669   } else {
 670     if (event_del(conn->write_event))
 671       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 672                "to unwatched: %s",
 673                (int)conn->s,
 674                tor_socket_strerror(tor_socket_errno(conn->s)));
 675   }
 676 }
 677
 678 /** Tell the main loop to start notifying <b>conn</b> of any write events. */
 679 MOCK_IMPL(void,
 680 connection_start_writing,(connection_t *conn))
 681 {
 682   tor_assert(conn);
 683
 684   if (connection_check_event(conn, conn->write_event) < 0) {
 685     return;
 686   }
 687
 688   if (conn->linked) {
 689     conn->writing_to_linked_conn = 1;
 690     if (conn->linked_conn &&
 691         connection_should_read_from_linked_conn(conn->linked_conn))
 692       connection_start_reading_from_linked_conn(conn->linked_conn);
 693   } else {
 694     if (event_add(conn->write_event, NULL))
 695       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 696                "to watched: %s",
 697                (int)conn->s,
 698                tor_socket_strerror(tor_socket_errno(conn->s)));
 699   }
 700 }
 701
 702 /** Return true iff <b>conn</b> is linked conn, and reading from the conn
 703  * linked to it would be good and feasible.  (Reading is "feasible" if the
 704  * other conn exists and has data in its outbuf, and is "good" if we have our
 705  * reading_from_linked_conn flag set and the other conn has its
 706  * writing_to_linked_conn flag set.)*/
 707 static int
 708 connection_should_read_from_linked_conn(connection_t *conn)
 709 {
 710   if (conn->linked && conn->reading_from_linked_conn) {
 711     if (! conn->linked_conn ||
 712         (conn->linked_conn->writing_to_linked_conn &&
 713          buf_datalen(conn->linked_conn->outbuf)))
 714       return 1;
 715   }
 716   return 0;
 717 }
 718
 719 /** Event to run 'shutdown did not work callback'. */
 720 static struct event *shutdown_did_not_work_event = NULL;
 721
 722 /** Failsafe measure that should never actually be necessary: If
 723  * tor_shutdown_event_loop_and_exit() somehow doesn't successfully exit the
 724  * event loop, then this callback will kill Tor with an assertion failure
 725  * seconds later
 726  */
 727 static void
 728 shutdown_did_not_work_callback(evutil_socket_t fd, short event, void *arg)
 729 {
 730   // LCOV_EXCL_START
 731   (void) fd;
 732   (void) event;
 733   (void) arg;
 734   tor_assert_unreached();
 735   // LCOV_EXCL_STOP
 736 }
 737
 738 #ifdef ENABLE_RESTART_DEBUGGING
 739 static struct event *tor_shutdown_event_loop_for_restart_event = NULL;
 740 static void
 741 tor_shutdown_event_loop_for_restart_cb(
 742                       evutil_socket_t fd, short event, void *arg)
 743 {
 744   (void)fd;
 745   (void)event;
 746   (void)arg;
 747   tor_event_free(tor_shutdown_event_loop_for_restart_event);
 748   tor_shutdown_event_loop_and_exit(0);
 749 }
 750 #endif
 751
 752 /**
 753  * After finishing the current callback (if any), shut down the main loop,
 754  * clean up the process, and exit with <b>exitcode</b>.
 755  */
 756 void
 757 tor_shutdown_event_loop_and_exit(int exitcode)
 758 {
 759   if (main_loop_should_exit)
 760     return; /* Ignore multiple calls to this function. */
 761
 762   main_loop_should_exit = 1;
 763   main_loop_exit_value = exitcode;
 764
 765   /* Die with an assertion failure in ten seconds, if for some reason we don't
 766    * exit normally. */
 767   /* XXXX We should consider this code if it's never used. */
 768   struct timeval ten_seconds = { 10, 0 };
 769   shutdown_did_not_work_event = tor_evtimer_new(
 770                   tor_libevent_get_base(),
 771                   shutdown_did_not_work_callback, NULL);
 772   event_add(shutdown_did_not_work_event, &ten_seconds);
 773
 774   /* Unlike exit_loop_after_delay(), exit_loop_after_callback
 775    * prevents other callbacks from running. */
 776   tor_libevent_exit_loop_after_callback(tor_libevent_get_base());
 777 }
 778
 779 /** Return true iff tor_shutdown_event_loop_and_exit() has been called. */
 780 int
 781 tor_event_loop_shutdown_is_pending(void)
 782 {
 783   return main_loop_should_exit;
 784 }
 785
 786 /** Helper: Tell the main loop to begin reading bytes into <b>conn</b> from
 787  * its linked connection, if it is not doing so already.  Called by
 788  * connection_start_reading and connection_start_writing as appropriate. */
 789 static void
 790 connection_start_reading_from_linked_conn(connection_t *conn)
 791 {
 792   tor_assert(conn);
 793   tor_assert(conn->linked == 1);
 794
 795   if (!conn->active_on_link) {
 796     conn->active_on_link = 1;
 797     smartlist_add(active_linked_connection_lst, conn);
 798     mainloop_event_activate(schedule_active_linked_connections_event);
 799   } else {
 800     tor_assert(smartlist_contains(active_linked_connection_lst, conn));
 801   }
 802 }
 803
 804 /** Tell the main loop to stop reading bytes into <b>conn</b> from its linked
 805  * connection, if is currently doing so.  Called by connection_stop_reading,
 806  * connection_stop_writing, and connection_read. */
 807 void
 808 connection_stop_reading_from_linked_conn(connection_t *conn)
 809 {
 810   tor_assert(conn);
 811   tor_assert(conn->linked == 1);
 812
 813   if (conn->active_on_link) {
 814     conn->active_on_link = 0;
 815     /* FFFF We could keep an index here so we can smartlist_del
 816      * cleanly.  On the other hand, this doesn't show up on profiles,
 817      * so let's leave it alone for now. */
 818     smartlist_remove(active_linked_connection_lst, conn);
 819   } else {
 820     tor_assert(!smartlist_contains(active_linked_connection_lst, conn));
 821   }
 822 }
 823
 824 /** Close all connections that have been scheduled to get closed. */
 825 STATIC void
 826 close_closeable_connections(void)
 827 {
 828   int i;
 829   for (i = 0; i < smartlist_len(closeable_connection_lst); ) {
 830     connection_t *conn = smartlist_get(closeable_connection_lst, i);
 831     if (conn->conn_array_index < 0) {
 832       connection_unlink(conn); /* blow it away right now */
 833     } else {
 834       if (!conn_close_if_marked(conn->conn_array_index))
 835         ++i;
 836     }
 837   }
 838 }
 839
 840 /** Count moribund connections for the OOS handler */
 841 MOCK_IMPL(int,
 842 connection_count_moribund, (void))
 843 {
 844   int moribund = 0;
 845
 846   /*
 847    * Count things we'll try to kill when close_closeable_connections()
 848    * runs next.
 849    */
 850   SMARTLIST_FOREACH_BEGIN(closeable_connection_lst, connection_t *, conn) {
 851     if (SOCKET_OK(conn->s) && connection_is_moribund(conn)) ++moribund;
 852   } SMARTLIST_FOREACH_END(conn);
 853
 854   return moribund;
 855 }
 856
 857 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 858  * some data to read. */
 859 static void
 860 conn_read_callback(evutil_socket_t fd, short event, void *_conn)
 861 {
 862   connection_t *conn = _conn;
 863   (void)fd;
 864   (void)event;
 865
 866   log_debug(LD_NET,"socket %d wants to read.",(int)conn->s);
 867
 868   /* assert_connection_ok(conn, time(NULL)); */
 869
 870   if (connection_handle_read(conn) < 0) {
 871     if (!conn->marked_for_close) {
 872 #ifndef _WIN32
 873       log_warn(LD_BUG,"Unhandled error on read for %s connection "
 874                "(fd %d); removing",
 875                conn_type_to_string(conn->type), (int)conn->s);
 876       tor_fragile_assert();
 877 #endif /* !defined(_WIN32) */
 878       if (CONN_IS_EDGE(conn))
 879         connection_edge_end_errno(TO_EDGE_CONN(conn));
 880       connection_mark_for_close(conn);
 881     }
 882   }
 883   assert_connection_ok(conn, time(NULL));
 884
 885   if (smartlist_len(closeable_connection_lst))
 886     close_closeable_connections();
 887 }
 888
 889 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 890  * some data to write. */
 891 static void
 892 conn_write_callback(evutil_socket_t fd, short events, void *_conn)
 893 {
 894   connection_t *conn = _conn;
 895   (void)fd;
 896   (void)events;
 897
 898   LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "socket %d wants to write.",
 899                      (int)conn->s));
 900
 901   /* assert_connection_ok(conn, time(NULL)); */
 902
 903   if (connection_handle_write(conn, 0) < 0) {
 904     if (!conn->marked_for_close) {
 905       /* this connection is broken. remove it. */
 906       log_fn(LOG_WARN,LD_BUG,
 907              "unhandled error on write for %s connection (fd %d); removing",
 908              conn_type_to_string(conn->type), (int)conn->s);
 909       tor_fragile_assert();
 910       if (CONN_IS_EDGE(conn)) {
 911         /* otherwise we cry wolf about duplicate close */
 912         edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
 913         if (!edge_conn->end_reason)
 914           edge_conn->end_reason = END_STREAM_REASON_INTERNAL;
 915         edge_conn->edge_has_sent_end = 1;
 916       }
 917       connection_close_immediate(conn); /* So we don't try to flush. */
 918       connection_mark_for_close(conn);
 919     }
 920   }
 921   assert_connection_ok(conn, time(NULL));
 922
 923   if (smartlist_len(closeable_connection_lst))
 924     close_closeable_connections();
 925 }
 926
 927 /** If the connection at connection_array[i] is marked for close, then:
 928  *    - If it has data that it wants to flush, try to flush it.
 929  *    - If it _still_ has data to flush, and conn->hold_open_until_flushed is
 930  *      true, then leave the connection open and return.
 931  *    - Otherwise, remove the connection from connection_array and from
 932  *      all other lists, close it, and free it.
 933  * Returns 1 if the connection was closed, 0 otherwise.
 934  */
 935 static int
 936 conn_close_if_marked(int i)
 937 {
 938   connection_t *conn;
 939   int retval;
 940   time_t now;
 941
 942   conn = smartlist_get(connection_array, i);
 943   if (!conn->marked_for_close)
 944     return 0; /* nothing to see here, move along */
 945   now = time(NULL);
 946   assert_connection_ok(conn, now);
 947   /* assert_all_pending_dns_resolves_ok(); */
 948
 949   log_debug(LD_NET,"Cleaning up connection (fd "TOR_SOCKET_T_FORMAT").",
 950             conn->s);
 951
 952   /* If the connection we are about to close was trying to connect to
 953   a proxy server and failed, the client won't be able to use that
 954   proxy. We should warn the user about this. */
 955   if (conn->proxy_state == PROXY_INFANT)
 956     log_failed_proxy_connection(conn);
 957
 958   if ((SOCKET_OK(conn->s) || conn->linked_conn) &&
 959       connection_wants_to_flush(conn)) {
 960     /* s == -1 means it's an incomplete edge connection, or that the socket
 961      * has already been closed as unflushable. */
 962     ssize_t sz = connection_bucket_write_limit(conn, now);
 963     if (!conn->hold_open_until_flushed)
 964       log_info(LD_NET,
 965                "Conn (addr %s, fd %d, type %s, state %d) marked, but wants "
 966                "to flush %d bytes. (Marked at %s:%d)",
 967                escaped_safe_str_client(conn->address),
 968                (int)conn->s, conn_type_to_string(conn->type), conn->state,
 969                (int)conn->outbuf_flushlen,
 970                 conn->marked_for_close_file, conn->marked_for_close);
 971     if (conn->linked_conn) {
 972       retval = buf_move_to_buf(conn->linked_conn->inbuf, conn->outbuf,
 973                                &conn->outbuf_flushlen);
 974       if (retval >= 0) {
 975         /* The linked conn will notice that it has data when it notices that
 976          * we're gone. */
 977         connection_start_reading_from_linked_conn(conn->linked_conn);
 978       }
 979       log_debug(LD_GENERAL, "Flushed last %d bytes from a linked conn; "
 980                "%d left; flushlen %d; wants-to-flush==%d", retval,
 981                 (int)connection_get_outbuf_len(conn),
 982                 (int)conn->outbuf_flushlen,
 983                 connection_wants_to_flush(conn));
 984     } else if (connection_speaks_cells(conn)) {
 985       if (conn->state == OR_CONN_STATE_OPEN) {
 986         retval = buf_flush_to_tls(conn->outbuf, TO_OR_CONN(conn)->tls, sz,
 987                                &conn->outbuf_flushlen);
 988       } else
 989         retval = -1; /* never flush non-open broken tls connections */
 990     } else {
 991       retval = buf_flush_to_socket(conn->outbuf, conn->s, sz,
 992                                    &conn->outbuf_flushlen);
 993     }
 994     if (retval >= 0 && /* Technically, we could survive things like
 995                           TLS_WANT_WRITE here. But don't bother for now. */
 996         conn->hold_open_until_flushed && connection_wants_to_flush(conn)) {
 997       if (retval > 0) {
 998         LOG_FN_CONN(conn, (LOG_INFO,LD_NET,
 999                            "Holding conn (fd %d) open for more flushing.",
1000                            (int)conn->s));
1001         conn->timestamp_last_write_allowed = now; /* reset so we can flush
1002                                                    * more */
1003       } else if (sz == 0) {
1004         /* Also, retval==0.  If we get here, we didn't want to write anything
1005          * (because of rate-limiting) and we didn't. */
1006
1007         /* Connection must flush before closing, but it's being rate-limited.
1008          * Let's remove from Libevent, and mark it as blocked on bandwidth
1009          * so it will be re-added on next token bucket refill. Prevents
1010          * busy Libevent loops where we keep ending up here and returning
1011          * 0 until we are no longer blocked on bandwidth.
1012          */
1013         connection_consider_empty_write_buckets(conn);
1014         /* Make sure that consider_empty_buckets really disabled the
1015          * connection: */
1016         if (BUG(connection_is_writing(conn))) {
1017           connection_write_bw_exhausted(conn, true);
1018         }
1019
1020         /* The connection is being held due to write rate limit and thus will
1021          * flush its data later. We need to stop reading because this
1022          * connection is about to be closed once flushed. It should not
1023          * process anything more coming in at this stage. */
1024         connection_stop_reading(conn);
1025       }
1026       return 0;
1027     }
1028     if (connection_wants_to_flush(conn)) {
1029       log_fn(LOG_INFO, LD_NET, "We stalled too much while trying to write %d "
1030              "bytes to address %s.  If this happens a lot, either "
1031              "something is wrong with your network connection, or "
1032              "something is wrong with theirs. "
1033              "(fd %d, type %s, state %d, marked at %s:%d).",
1034              (int)connection_get_outbuf_len(conn),
1035              escaped_safe_str_client(conn->address),
1036              (int)conn->s, conn_type_to_string(conn->type), conn->state,
1037              conn->marked_for_close_file,
1038              conn->marked_for_close);
1039     }
1040   }
1041
1042   connection_unlink(conn); /* unlink, remove, free */
1043   return 1;
1044 }
1045
1046 /** Implementation for directory_all_unreachable.  This is done in a callback,
1047  * since otherwise it would complicate Tor's control-flow graph beyond all
1048  * reason.
1049  */
1050 static void
1051 directory_all_unreachable_cb(mainloop_event_t *event, void *arg)
1052 {
1053   (void)event;
1054   (void)arg;
1055
1056   connection_t *conn;
1057
1058   while ((conn = connection_get_by_type_state(CONN_TYPE_AP,
1059                                               AP_CONN_STATE_CIRCUIT_WAIT))) {
1060     entry_connection_t *entry_conn = TO_ENTRY_CONN(conn);
1061     log_notice(LD_NET,
1062                "Is your network connection down? "
1063                "Failing connection to '%s:%d'.",
1064                safe_str_client(entry_conn->socks_request->address),
1065                entry_conn->socks_request->port);
1066     connection_mark_unattached_ap(entry_conn,
1067                                   END_STREAM_REASON_NET_UNREACHABLE);
1068   }
1069   control_event_general_error("DIR_ALL_UNREACHABLE");
1070 }
1071
1072 static mainloop_event_t *directory_all_unreachable_cb_event = NULL;
1073
1074 /** We've just tried every dirserver we know about, and none of
1075  * them were reachable. Assume the network is down. Change state
1076  * so next time an application connection arrives we'll delay it
1077  * and try another directory fetch. Kill off all the circuit_wait
1078  * streams that are waiting now, since they will all timeout anyway.
1079  */
1080 void
1081 directory_all_unreachable(time_t now)
1082 {
1083   (void)now;
1084
1085   reset_uptime(); /* reset it */
1086
1087   if (!directory_all_unreachable_cb_event) {
1088     directory_all_unreachable_cb_event =
1089       mainloop_event_new(directory_all_unreachable_cb, NULL);
1090     tor_assert(directory_all_unreachable_cb_event);
1091   }
1092
1093   mainloop_event_activate(directory_all_unreachable_cb_event);
1094 }
1095
1096 /** This function is called whenever we successfully pull down some new
1097  * network statuses or server descriptors. */
1098 void
1099 directory_info_has_arrived(time_t now, int from_cache, int suppress_logs)
1100 {
1101   const or_options_t *options = get_options();
1102
1103   /* if we have enough dir info, then update our guard status with
1104    * whatever we just learned. */
1105   int invalidate_circs = guards_update_all();
1106
1107   if (invalidate_circs) {
1108     circuit_mark_all_unused_circs();
1109     circuit_mark_all_dirty_circs_as_unusable();
1110   }
1111
1112   if (!router_have_minimum_dir_info()) {
1113     int quiet = suppress_logs || from_cache ||
1114                 directory_too_idle_to_fetch_descriptors(options, now);
1115     tor_log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR,
1116         "I learned some more directory information, but not enough to "
1117         "build a circuit: %s", get_dir_info_status_string());
1118     update_all_descriptor_downloads(now);
1119     return;
1120   } else {
1121     if (directory_fetches_from_authorities(options)) {
1122       update_all_descriptor_downloads(now);
1123     }
1124
1125     /* Don't even bother trying to get extrainfo until the rest of our
1126      * directory info is up-to-date */
1127     if (options->DownloadExtraInfo)
1128       update_extrainfo_downloads(now);
1129   }
1130
1131   if (server_mode(options) && !net_is_disabled() && !from_cache &&
1132       (have_completed_a_circuit() || !any_predicted_circuits(now)))
1133    router_do_reachability_checks(1, 1);
1134 }
1135
1136 /** Perform regular maintenance tasks for a single connection.  This
1137  * function gets run once per second per connection by run_scheduled_events.
1138  */
1139 static void
1140 run_connection_housekeeping(int i, time_t now)
1141 {
1142   cell_t cell;
1143   connection_t *conn = smartlist_get(connection_array, i);
1144   const or_options_t *options = get_options();
1145   or_connection_t *or_conn;
1146   channel_t *chan = NULL;
1147   int have_any_circuits;
1148   int past_keepalive =
1149     now >= conn->timestamp_last_write_allowed + options->KeepalivePeriod;
1150
1151   if (conn->outbuf && !connection_get_outbuf_len(conn) &&
1152       conn->type == CONN_TYPE_OR)
1153     TO_OR_CONN(conn)->timestamp_lastempty = now;
1154
1155   if (conn->marked_for_close) {
1156     /* nothing to do here */
1157     return;
1158   }
1159
1160   /* Expire any directory connections that haven't been active (sent
1161    * if a server or received if a client) for 5 min */
1162   if (conn->type == CONN_TYPE_DIR &&
1163       ((DIR_CONN_IS_SERVER(conn) &&
1164         conn->timestamp_last_write_allowed
1165             + options->TestingDirConnectionMaxStall < now) ||
1166        (!DIR_CONN_IS_SERVER(conn) &&
1167         conn->timestamp_last_read_allowed
1168             + options->TestingDirConnectionMaxStall < now))) {
1169     log_info(LD_DIR,"Expiring wedged directory conn (fd %d, purpose %d)",
1170              (int)conn->s, conn->purpose);
1171     /* This check is temporary; it's to let us know whether we should consider
1172      * parsing partial serverdesc responses. */
1173     if (conn->purpose == DIR_PURPOSE_FETCH_SERVERDESC &&
1174         connection_get_inbuf_len(conn) >= 1024) {
1175       log_info(LD_DIR,"Trying to extract information from wedged server desc "
1176                "download.");
1177       connection_dir_reached_eof(TO_DIR_CONN(conn));
1178     } else {
1179       connection_mark_for_close(conn);
1180     }
1181     return;
1182   }
1183
1184   if (!connection_speaks_cells(conn))
1185     return; /* we're all done here, the rest is just for OR conns */
1186
1187   /* If we haven't flushed to an OR connection for a while, then either nuke
1188      the connection or send a keepalive, depending. */
1189
1190   or_conn = TO_OR_CONN(conn);
1191   tor_assert(conn->outbuf);
1192
1193   chan = TLS_CHAN_TO_BASE(or_conn->chan);
1194   tor_assert(chan);
1195
1196   if (channel_num_circuits(chan) != 0) {
1197     have_any_circuits = 1;
1198     chan->timestamp_last_had_circuits = now;
1199   } else {
1200     have_any_circuits = 0;
1201   }
1202
1203   if (channel_is_bad_for_new_circs(TLS_CHAN_TO_BASE(or_conn->chan)) &&
1204       ! have_any_circuits) {
1205     /* It's bad for new circuits, and has no unmarked circuits on it:
1206      * mark it now. */
1207     log_info(LD_OR,
1208              "Expiring non-used OR connection to fd %d (%s:%d) [Too old].",
1209              (int)conn->s, conn->address, conn->port);
1210     if (conn->state == OR_CONN_STATE_CONNECTING)
1211       connection_or_connect_failed(TO_OR_CONN(conn),
1212                                    END_OR_CONN_REASON_TIMEOUT,
1213                                    "Tor gave up on the connection");
1214     connection_or_close_normally(TO_OR_CONN(conn), 1);
1215   } else if (!connection_state_is_open(conn)) {
1216     if (past_keepalive) {
1217       /* We never managed to actually get this connection open and happy. */
1218       log_info(LD_OR,"Expiring non-open OR connection to fd %d (%s:%d).",
1219                (int)conn->s,conn->address, conn->port);
1220       connection_or_close_normally(TO_OR_CONN(conn), 0);
1221     }
1222   } else if (we_are_hibernating() &&
1223              ! have_any_circuits &&
1224              !connection_get_outbuf_len(conn)) {
1225     /* We're hibernating or shutting down, there's no circuits, and nothing to
1226      * flush.*/
1227     log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
1228              "[Hibernating or exiting].",
1229              (int)conn->s,conn->address, conn->port);
1230     connection_or_close_normally(TO_OR_CONN(conn), 1);
1231   } else if (!have_any_circuits &&
1232              now - or_conn->idle_timeout >=
1233                                          chan->timestamp_last_had_circuits) {
1234     log_info(LD_OR,"Expiring non-used OR connection %"PRIu64" to fd %d "
1235              "(%s:%d) [no circuits for %d; timeout %d; %scanonical].",
1236              (chan->global_identifier),
1237              (int)conn->s, conn->address, conn->port,
1238              (int)(now - chan->timestamp_last_had_circuits),
1239              or_conn->idle_timeout,
1240              or_conn->is_canonical ? "" : "non");
1241     connection_or_close_normally(TO_OR_CONN(conn), 0);
1242   } else if (
1243       now >= or_conn->timestamp_lastempty + options->KeepalivePeriod*10 &&
1244       now >=
1245           conn->timestamp_last_write_allowed + options->KeepalivePeriod*10) {
1246     log_fn(LOG_PROTOCOL_WARN,LD_PROTOCOL,
1247            "Expiring stuck OR connection to fd %d (%s:%d). (%d bytes to "
1248            "flush; %d seconds since last write)",
1249            (int)conn->s, conn->address, conn->port,
1250            (int)connection_get_outbuf_len(conn),
1251            (int)(now-conn->timestamp_last_write_allowed));
1252     connection_or_close_normally(TO_OR_CONN(conn), 0);
1253   } else if (past_keepalive && !connection_get_outbuf_len(conn)) {
1254     /* send a padding cell */
1255     log_fn(LOG_DEBUG,LD_OR,"Sending keepalive to (%s:%d)",
1256            conn->address, conn->port);
1257     memset(&cell,0,sizeof(cell_t));
1258     cell.command = CELL_PADDING;
1259     connection_or_write_cell_to_buf(&cell, or_conn);
1260   } else {
1261     channelpadding_decide_to_pad_channel(chan);
1262   }
1263 }
1264
1265 /** Honor a NEWNYM request: make future requests unlinkable to past
1266  * requests. */
1267 static void
1268 signewnym_impl(time_t now)
1269 {
1270   const or_options_t *options = get_options();
1271   if (!proxy_mode(options)) {
1272     log_info(LD_CONTROL, "Ignoring SIGNAL NEWNYM because client functionality "
1273              "is disabled.");
1274     return;
1275   }
1276
1277   circuit_mark_all_dirty_circs_as_unusable();
1278   addressmap_clear_transient();
1279   hs_client_purge_state();
1280   time_of_last_signewnym = now;
1281   signewnym_is_pending = 0;
1282
1283   ++newnym_epoch;
1284
1285   control_event_signal(SIGNEWNYM);
1286 }
1287
1288 /** Callback: run a deferred signewnym. */
1289 static void
1290 handle_deferred_signewnym_cb(mainloop_event_t *event, void *arg)
1291 {
1292   (void)event;
1293   (void)arg;
1294   log_info(LD_CONTROL, "Honoring delayed NEWNYM request");
1295   do_signewnym(time(NULL));
1296 }
1297
1298 /** Either perform a signewnym or schedule one, depending on rate limiting. */
1299 void
1300 do_signewnym(time_t now)
1301 {
1302   if (time_of_last_signewnym + MAX_SIGNEWNYM_RATE > now) {
1303     const time_t delay_sec =
1304       time_of_last_signewnym + MAX_SIGNEWNYM_RATE - now;
1305     if (! signewnym_is_pending) {
1306       signewnym_is_pending = 1;
1307       if (!handle_deferred_signewnym_ev) {
1308         handle_deferred_signewnym_ev =
1309           mainloop_event_postloop_new(handle_deferred_signewnym_cb, NULL);
1310       }
1311       const struct timeval delay_tv = { delay_sec, 0 };
1312       mainloop_event_schedule(handle_deferred_signewnym_ev, &delay_tv);
1313     }
1314     log_notice(LD_CONTROL,
1315                "Rate limiting NEWNYM request: delaying by %d second(s)",
1316                (int)(delay_sec));
1317   } else {
1318     signewnym_impl(now);
1319   }
1320 }
1321
1322 /** Return the number of times that signewnym has been called. */
1323 unsigned
1324 get_signewnym_epoch(void)
1325 {
1326   return newnym_epoch;
1327 }
1328
1329 /** True iff we have initialized all the members of <b>periodic_events</b>.
1330  * Used to prevent double-initialization. */
1331 static int periodic_events_initialized = 0;
1332
1333 /* Declare all the timer callback functions... */
1334 #undef CALLBACK
1335 #define CALLBACK(name) \
1336   static int name ## _callback(time_t, const or_options_t *)
1337 CALLBACK(add_entropy);
1338 CALLBACK(check_authority_cert);
1339 CALLBACK(check_canonical_channels);
1340 CALLBACK(check_descriptor);
1341 CALLBACK(check_dns_honesty);
1342 CALLBACK(check_ed_keys);
1343 CALLBACK(check_expired_networkstatus);
1344 CALLBACK(check_for_reachability_bw);
1345 CALLBACK(check_onion_keys_expiry_time);
1346 CALLBACK(clean_caches);
1347 CALLBACK(clean_consdiffmgr);
1348 CALLBACK(dirvote);
1349 CALLBACK(downrate_stability);
1350 CALLBACK(expire_old_ciruits_serverside);
1351 CALLBACK(fetch_networkstatus);
1352 CALLBACK(heartbeat);
1353 CALLBACK(hs_service);
1354 CALLBACK(launch_descriptor_fetches);
1355 CALLBACK(launch_reachability_tests);
1356 CALLBACK(reachability_warnings);
1357 CALLBACK(record_bridge_stats);
1358 CALLBACK(rend_cache_failure_clean);
1359 CALLBACK(reset_padding_counts);
1360 CALLBACK(retry_dns);
1361 CALLBACK(retry_listeners);
1362 CALLBACK(rotate_onion_key);
1363 CALLBACK(rotate_x509_certificate);
1364 CALLBACK(save_stability);
1365 CALLBACK(save_state);
1366 CALLBACK(write_bridge_ns);
1367 CALLBACK(write_stats_file);
1368 CALLBACK(control_per_second_events);
1369 CALLBACK(second_elapsed);
1370 CALLBACK(check_network_participation);
1371
1372 #undef CALLBACK
1373
1374 /* Now we declare an array of periodic_event_item_t for each periodic event */
1375 #define CALLBACK(name, r, f) \
1376   PERIODIC_EVENT(name, PERIODIC_EVENT_ROLE_ ## r, f)
1377 #define FL(name) (PERIODIC_EVENT_FLAG_ ## name)
1378
1379 STATIC periodic_event_item_t periodic_events[] = {
1380   /* Everyone needs to run these. They need to have very long timeouts for
1381    * that to be safe. */
1382   CALLBACK(add_entropy, ALL, 0),
1383   CALLBACK(heartbeat, ALL, 0),
1384   CALLBACK(reset_padding_counts, ALL, 0),
1385
1386   /* This is a legacy catch-all callback that runs once per second if
1387    * we are online and active. */
1388   CALLBACK(second_elapsed, NET_PARTICIPANT,
1389            FL(NEED_NET)|FL(FLUSH_ON_DISABLE)),
1390
1391   /* XXXX Do we have a reason to do this on a callback? Does it do any good at
1392    * all?  For now, if we're dormant, we can let our listeners decay. */
1393   CALLBACK(retry_listeners, NET_PARTICIPANT, FL(NEED_NET)),
1394
1395   /* We need to do these if we're participating in the Tor network. */
1396   CALLBACK(check_expired_networkstatus, NET_PARTICIPANT, 0),
1397   CALLBACK(fetch_networkstatus, NET_PARTICIPANT, 0),
1398   CALLBACK(launch_descriptor_fetches, NET_PARTICIPANT, FL(NEED_NET)),
1399   CALLBACK(rotate_x509_certificate, NET_PARTICIPANT, 0),
1400   CALLBACK(check_network_participation, NET_PARTICIPANT, 0),
1401
1402   /* We need to do these if we're participating in the Tor network, and
1403    * immediately before we stop. */
1404   CALLBACK(clean_caches, NET_PARTICIPANT, FL(FLUSH_ON_DISABLE)),
1405   CALLBACK(save_state, NET_PARTICIPANT, FL(FLUSH_ON_DISABLE)),
1406   CALLBACK(write_stats_file, NET_PARTICIPANT, FL(FLUSH_ON_DISABLE)),
1407
1408   /* Routers (bridge and relay) only. */
1409   CALLBACK(check_descriptor, ROUTER, FL(NEED_NET)),
1410   CALLBACK(check_ed_keys, ROUTER, 0),
1411   CALLBACK(check_for_reachability_bw, ROUTER, FL(NEED_NET)),
1412   CALLBACK(check_onion_keys_expiry_time, ROUTER, 0),
1413   CALLBACK(expire_old_ciruits_serverside, ROUTER, FL(NEED_NET)),
1414   CALLBACK(reachability_warnings, ROUTER, FL(NEED_NET)),
1415   CALLBACK(retry_dns, ROUTER, 0),
1416   CALLBACK(rotate_onion_key, ROUTER, 0),
1417
1418   /* Authorities (bridge and directory) only. */
1419   CALLBACK(downrate_stability, AUTHORITIES, 0),
1420   CALLBACK(launch_reachability_tests, AUTHORITIES, FL(NEED_NET)),
1421   CALLBACK(save_stability, AUTHORITIES, 0),
1422
1423   /* Directory authority only. */
1424   CALLBACK(check_authority_cert, DIRAUTH, 0),
1425   CALLBACK(dirvote, DIRAUTH, FL(NEED_NET)),
1426
1427   /* Relay only. */
1428   CALLBACK(check_canonical_channels, RELAY, FL(NEED_NET)),
1429   CALLBACK(check_dns_honesty, RELAY, FL(NEED_NET)),
1430
1431   /* Hidden Service service only. */
1432   CALLBACK(hs_service, HS_SERVICE, FL(NEED_NET)), // XXXX break this down more
1433
1434   /* Bridge only. */
1435   CALLBACK(record_bridge_stats, BRIDGE, 0),
1436
1437   /* Client only. */
1438   /* XXXX this could be restricted to CLIENT+NET_PARTICIPANT */
1439   CALLBACK(rend_cache_failure_clean, NET_PARTICIPANT, FL(FLUSH_ON_DISABLE)),
1440
1441   /* Bridge Authority only. */
1442   CALLBACK(write_bridge_ns, BRIDGEAUTH, 0),
1443
1444   /* Directory server only. */
1445   CALLBACK(clean_consdiffmgr, DIRSERVER, 0),
1446
1447   /* Controller with per-second events only. */
1448   CALLBACK(control_per_second_events, CONTROLEV, 0),
1449
1450   END_OF_PERIODIC_EVENTS
1451 };
1452 #undef CALLBACK
1453 #undef FL
1454
1455 /* These are pointers to members of periodic_events[] that are used to
1456  * implement particular callbacks.  We keep them separate here so that we
1457  * can access them by name.  We also keep them inside periodic_events[]
1458  * so that we can implement "reset all timers" in a reasonable way. */
1459 static periodic_event_item_t *check_descriptor_event=NULL;
1460 static periodic_event_item_t *dirvote_event=NULL;
1461 static periodic_event_item_t *fetch_networkstatus_event=NULL;
1462 static periodic_event_item_t *launch_descriptor_fetches_event=NULL;
1463 static periodic_event_item_t *check_dns_honesty_event=NULL;
1464 static periodic_event_item_t *save_state_event=NULL;
1465
1466 /** Reset all the periodic events so we'll do all our actions again as if we
1467  * just started up.
1468  * Useful if our clock just moved back a long time from the future,
1469  * so we don't wait until that future arrives again before acting.
1470  */
1471 void
1472 reset_all_main_loop_timers(void)
1473 {
1474   int i;
1475   for (i = 0; periodic_events[i].name; ++i) {
1476     periodic_event_reschedule(&periodic_events[i]);
1477   }
1478 }
1479
1480 /** Return the member of periodic_events[] whose name is <b>name</b>.
1481  * Return NULL if no such event is found.
1482  */
1483 static periodic_event_item_t *
1484 find_periodic_event(const char *name)
1485 {
1486   int i;
1487   for (i = 0; periodic_events[i].name; ++i) {
1488     if (strcmp(name, periodic_events[i].name) == 0)
1489       return &periodic_events[i];
1490   }
1491   return NULL;
1492 }
1493
1494 /** Return a bitmask of the roles this tor instance is configured for using
1495  * the given options. */
1496 STATIC int
1497 get_my_roles(const or_options_t *options)
1498 {
1499   tor_assert(options);
1500
1501   int roles = PERIODIC_EVENT_ROLE_ALL;
1502   int is_bridge = options->BridgeRelay;
1503   int is_relay = server_mode(options);
1504   int is_dirauth = authdir_mode_v3(options);
1505   int is_bridgeauth = authdir_mode_bridge(options);
1506   int is_hidden_service = !!hs_service_get_num_services() ||
1507                           !!rend_num_services();
1508   int is_dirserver = dir_server_mode(options);
1509   int sending_control_events = control_any_per_second_event_enabled();
1510
1511   /* We also consider tor to have the role of a client if the ControlPort is
1512    * set because a lot of things can be done over the control port which
1513    * requires tor to have basic functionnalities. */
1514   int is_client = options_any_client_port_set(options) ||
1515                   options->ControlPort_set ||
1516                   options->OwningControllerFD != UINT64_MAX;
1517
1518   int is_net_participant = is_participating_on_network();
1519
1520   if (is_bridge) roles |= PERIODIC_EVENT_ROLE_BRIDGE;
1521   if (is_client) roles |= PERIODIC_EVENT_ROLE_CLIENT;
1522   if (is_relay) roles |= PERIODIC_EVENT_ROLE_RELAY;
1523   if (is_dirauth) roles |= PERIODIC_EVENT_ROLE_DIRAUTH;
1524   if (is_bridgeauth) roles |= PERIODIC_EVENT_ROLE_BRIDGEAUTH;
1525   if (is_hidden_service) roles |= PERIODIC_EVENT_ROLE_HS_SERVICE;
1526   if (is_dirserver) roles |= PERIODIC_EVENT_ROLE_DIRSERVER;
1527   if (is_net_participant) roles |= PERIODIC_EVENT_ROLE_NET_PARTICIPANT;
1528   if (sending_control_events) roles |= PERIODIC_EVENT_ROLE_CONTROLEV;
1529
1530   return roles;
1531 }
1532
1533 /** Event to run initialize_periodic_events_cb */
1534 static struct event *initialize_periodic_events_event = NULL;
1535
1536 /** Helper, run one second after setup:
1537  * Initializes all members of periodic_events and starts them running.
1538  *
1539  * (We do this one second after setup for backward-compatibility reasons;
1540  * it might not actually be necessary.) */
1541 static void
1542 initialize_periodic_events_cb(evutil_socket_t fd, short events, void *data)
1543 {
1544   (void) fd;
1545   (void) events;
1546   (void) data;
1547
1548   tor_event_free(initialize_periodic_events_event);
1549
1550   rescan_periodic_events(get_options());
1551 }
1552
1553 /** Set up all the members of periodic_events[], and configure them all to be
1554  * launched from a callback. */
1555 STATIC void
1556 initialize_periodic_events(void)
1557 {
1558   if (periodic_events_initialized)
1559     return;
1560
1561   periodic_events_initialized = 1;
1562
1563   /* Set up all periodic events. We'll launch them by roles. */
1564   int i;
1565   for (i = 0; periodic_events[i].name; ++i) {
1566     periodic_event_setup(&periodic_events[i]);
1567   }
1568
1569 #define NAMED_CALLBACK(name) \
1570   STMT_BEGIN name ## _event = find_periodic_event( #name ); STMT_END
1571
1572   NAMED_CALLBACK(check_descriptor);
1573   NAMED_CALLBACK(dirvote);
1574   NAMED_CALLBACK(fetch_networkstatus);
1575   NAMED_CALLBACK(launch_descriptor_fetches);
1576   NAMED_CALLBACK(check_dns_honesty);
1577   NAMED_CALLBACK(save_state);
1578
1579   struct timeval one_second = { 1, 0 };
1580   initialize_periodic_events_event = tor_evtimer_new(
1581                   tor_libevent_get_base(),
1582                   initialize_periodic_events_cb, NULL);
1583   event_add(initialize_periodic_events_event, &one_second);
1584 }
1585
1586 STATIC void
1587 teardown_periodic_events(void)
1588 {
1589   int i;
1590   for (i = 0; periodic_events[i].name; ++i) {
1591     periodic_event_destroy(&periodic_events[i]);
1592   }
1593   periodic_events_initialized = 0;
1594 }
1595
1596 static mainloop_event_t *rescan_periodic_events_ev = NULL;
1597
1598 /** Callback: rescan the periodic event list. */
1599 static void
1600 rescan_periodic_events_cb(mainloop_event_t *event, void *arg)
1601 {
1602   (void)event;
1603   (void)arg;
1604   rescan_periodic_events(get_options());
1605 }
1606
1607 /**
1608  * Schedule an event that will rescan which periodic events should run.
1609  **/
1610 void
1611 schedule_rescan_periodic_events(void)
1612 {
1613   if (!rescan_periodic_events_ev) {
1614     rescan_periodic_events_ev =
1615       mainloop_event_new(rescan_periodic_events_cb, NULL);
1616   }
1617   mainloop_event_activate(rescan_periodic_events_ev);
1618 }
1619
1620 /** Do a pass at all our periodic events, disable those we don't need anymore
1621  * and enable those we need now using the given options. */
1622 void
1623 rescan_periodic_events(const or_options_t *options)
1624 {
1625   tor_assert(options);
1626
1627   /* Avoid scanning the event list if we haven't initialized it yet. This is
1628    * particularly useful for unit tests in order to avoid initializing main
1629    * loop events everytime. */
1630   if (!periodic_events_initialized) {
1631     return;
1632   }
1633
1634   int roles = get_my_roles(options);
1635
1636   for (int i = 0; periodic_events[i].name; ++i) {
1637     periodic_event_item_t *item = &periodic_events[i];
1638
1639     int enable = !!(item->roles & roles);
1640
1641     /* Handle the event flags. */
1642     if (net_is_disabled() &&
1643         (item->flags & PERIODIC_EVENT_FLAG_NEED_NET)) {
1644       enable = 0;
1645     }
1646
1647     /* Enable the event if needed. It is safe to enable an event that was
1648      * already enabled. Same goes for disabling it. */
1649     if (enable) {
1650       log_debug(LD_GENERAL, "Launching periodic event %s", item->name);
1651       periodic_event_enable(item);
1652     } else {
1653       log_debug(LD_GENERAL, "Disabling periodic event %s", item->name);
1654       if (item->flags & PERIODIC_EVENT_FLAG_FLUSH_ON_DISABLE) {
1655         periodic_event_flush_and_disable(item);
1656       } else {
1657         periodic_event_disable(item);
1658       }
1659     }
1660   }
1661 }
1662
1663 /* We just got new options globally set, see if we need to enabled or disable
1664  * periodic events. */
1665 void
1666 periodic_events_on_new_options(const or_options_t *options)
1667 {
1668   /* Only if we've already initialized the events, rescan the list which will
1669    * enable or disable events depending on our roles. This will be called at
1670    * bootup and we don't want this function to initialize the events because
1671    * they aren't set up at this stage. */
1672   if (periodic_events_initialized) {
1673     rescan_periodic_events(options);
1674   }
1675 }
1676
1677 /**
1678  * Update our schedule so that we'll check whether we need to update our
1679  * descriptor immediately, rather than after up to CHECK_DESCRIPTOR_INTERVAL
1680  * seconds.
1681  */
1682 void
1683 reschedule_descriptor_update_check(void)
1684 {
1685   if (check_descriptor_event) {
1686     periodic_event_reschedule(check_descriptor_event);
1687   }
1688 }
1689
1690 /**
1691  * Update our schedule so that we'll check whether we need to fetch directory
1692  * info immediately.
1693  */
1694 void
1695 reschedule_directory_downloads(void)
1696 {
1697   tor_assert(fetch_networkstatus_event);
1698   tor_assert(launch_descriptor_fetches_event);
1699
1700   periodic_event_reschedule(fetch_networkstatus_event);
1701   periodic_event_reschedule(launch_descriptor_fetches_event);
1702 }
1703
1704 /** Mainloop callback: clean up circuits, channels, and connections
1705  * that are pending close. */
1706 static void
1707 postloop_cleanup_cb(mainloop_event_t *ev, void *arg)
1708 {
1709   (void)ev;
1710   (void)arg;
1711   circuit_close_all_marked();
1712   close_closeable_connections();
1713   channel_run_cleanup();
1714   channel_listener_run_cleanup();
1715 }
1716
1717 /** Event to run postloop_cleanup_cb */
1718 static mainloop_event_t *postloop_cleanup_ev=NULL;
1719
1720 /** Schedule a post-loop event to clean up marked channels, connections, and
1721  * circuits. */
1722 void
1723 mainloop_schedule_postloop_cleanup(void)
1724 {
1725   if (PREDICT_UNLIKELY(postloop_cleanup_ev == NULL)) {
1726     // (It's possible that we can get here if we decide to close a connection
1727     // in the earliest stages of our configuration, before we create events.)
1728     return;
1729   }
1730   mainloop_event_activate(postloop_cleanup_ev);
1731 }
1732
1733 /** Event to run 'scheduled_shutdown_cb' */
1734 static mainloop_event_t *scheduled_shutdown_ev=NULL;
1735
1736 /** Callback: run a scheduled shutdown */
1737 static void
1738 scheduled_shutdown_cb(mainloop_event_t *ev, void *arg)
1739 {
1740   (void)ev;
1741   (void)arg;
1742   log_notice(LD_GENERAL, "Clean shutdown finished. Exiting.");
1743   tor_shutdown_event_loop_and_exit(0);
1744 }
1745
1746 /** Schedule the mainloop to exit after <b>delay_sec</b> seconds. */
1747 void
1748 mainloop_schedule_shutdown(int delay_sec)
1749 {
1750   const struct timeval delay_tv = { delay_sec, 0 };
1751   if (! scheduled_shutdown_ev) {
1752     scheduled_shutdown_ev = mainloop_event_new(scheduled_shutdown_cb, NULL);
1753   }
1754   mainloop_event_schedule(scheduled_shutdown_ev, &delay_tv);
1755 }
1756
1757 #define LONGEST_TIMER_PERIOD (30 * 86400)
1758 /** Helper: Return the number of seconds between <b>now</b> and <b>next</b>,
1759  * clipped to the range [1 second, LONGEST_TIMER_PERIOD]. */
1760 static inline int
1761 safe_timer_diff(time_t now, time_t next)
1762 {
1763   if (next > now) {
1764     /* There were no computers at signed TIME_MIN (1902 on 32-bit systems),
1765      * and nothing that could run Tor. It's a bug if 'next' is around then.
1766      * On 64-bit systems with signed TIME_MIN, TIME_MIN is before the Big
1767      * Bang. We cannot extrapolate past a singularity, but there was probably
1768      * nothing that could run Tor then, either.
1769      **/
1770     tor_assert(next > TIME_MIN + LONGEST_TIMER_PERIOD);
1771
1772     if (next - LONGEST_TIMER_PERIOD > now)
1773       return LONGEST_TIMER_PERIOD;
1774     return (int)(next - now);
1775   } else {
1776     return 1;
1777   }
1778 }
1779
1780 /** Perform regular maintenance tasks.  This function gets run once per
1781  * second.
1782  */
1783 static int
1784 second_elapsed_callback(time_t now, const or_options_t *options)
1785 {
1786   /* 0. See if our bandwidth limits are exhausted and we should hibernate
1787    *
1788    * Note: we have redundant mechanisms to handle the case where it's
1789    * time to wake up from hibernation; or where we have a scheduled
1790    * shutdown and it's time to run it, but this will also handle those.
1791    */
1792   consider_hibernation(now);
1793
1794   /* Maybe enough time elapsed for us to reconsider a circuit. */
1795   circuit_upgrade_circuits_from_guard_wait();
1796
1797   if (options->UseBridges && !net_is_disabled()) {
1798     /* Note: this check uses net_is_disabled(), not should_delay_dir_fetches()
1799      * -- the latter is only for fetching consensus-derived directory info. */
1800     // TODO: client
1801     //     Also, schedule this rather than probing 1x / sec
1802     fetch_bridge_descriptors(options, now);
1803   }
1804
1805   if (accounting_is_enabled(options)) {
1806     // TODO: refactor or rewrite?
1807     accounting_run_housekeeping(now);
1808   }
1809
1810   /* 3a. Every second, we examine pending circuits and prune the
1811    *    ones which have been pending for more than a few seconds.
1812    *    We do this before step 4, so it can try building more if
1813    *    it's not comfortable with the number of available circuits.
1814    */
1815   /* (If our circuit build timeout can ever become lower than a second (which
1816    * it can't, currently), we should do this more often.) */
1817   // TODO: All expire stuff can become NET_PARTICIPANT, FLUSH_ON_DISABLE
1818   circuit_expire_building();
1819   circuit_expire_waiting_for_better_guard();
1820
1821   /* 3b. Also look at pending streams and prune the ones that 'began'
1822    *     a long time ago but haven't gotten a 'connected' yet.
1823    *     Do this before step 4, so we can put them back into pending
1824    *     state to be picked up by the new circuit.
1825    */
1826   connection_ap_expire_beginning();
1827
1828   /* 3c. And expire connections that we've held open for too long.
1829    */
1830   connection_expire_held_open();
1831
1832   /* 4. Every second, we try a new circuit if there are no valid
1833    *    circuits. Every NewCircuitPeriod seconds, we expire circuits
1834    *    that became dirty more than MaxCircuitDirtiness seconds ago,
1835    *    and we make a new circ if there are no clean circuits.
1836    */
1837   const int have_dir_info = router_have_minimum_dir_info();
1838   if (have_dir_info && !net_is_disabled()) {
1839     circuit_build_needed_circs(now);
1840   } else {
1841     circuit_expire_old_circs_as_needed(now);
1842   }
1843
1844   /* 5. We do housekeeping for each connection... */
1845   channel_update_bad_for_new_circs(NULL, 0);
1846   int i;
1847   for (i=0;i<smartlist_len(connection_array);i++) {
1848     run_connection_housekeeping(i, now);
1849   }
1850
1851   /* 11b. check pending unconfigured managed proxies */
1852   if (!net_is_disabled() && pt_proxies_configuration_pending())
1853     pt_configure_remaining_proxies();
1854
1855   /* Run again in a second. */
1856   return 1;
1857 }
1858
1859 /* Periodic callback: rotate the onion keys after the period defined by the
1860  * "onion-key-rotation-days" consensus parameter, shut down and restart all
1861  * cpuworkers, and update our descriptor if necessary.
1862  */
1863 static int
1864 rotate_onion_key_callback(time_t now, const or_options_t *options)
1865 {
1866   if (server_mode(options)) {
1867     int onion_key_lifetime = get_onion_key_lifetime();
1868     time_t rotation_time = get_onion_key_set_at()+onion_key_lifetime;
1869     if (rotation_time > now) {
1870       return ONION_KEY_CONSENSUS_CHECK_INTERVAL;
1871     }
1872
1873     log_info(LD_GENERAL,"Rotating onion key.");
1874     rotate_onion_key();
1875     cpuworkers_rotate_keyinfo();
1876     if (router_rebuild_descriptor(1)<0) {
1877       log_info(LD_CONFIG, "Couldn't rebuild router descriptor");
1878     }
1879     if (advertised_server_mode() && !net_is_disabled())
1880       router_upload_dir_desc_to_dirservers(0);
1881     return ONION_KEY_CONSENSUS_CHECK_INTERVAL;
1882   }
1883   return PERIODIC_EVENT_NO_UPDATE;
1884 }
1885
1886 /* Period callback: Check if our old onion keys are still valid after the
1887  * period of time defined by the consensus parameter
1888  * "onion-key-grace-period-days", otherwise expire them by setting them to
1889  * NULL.
1890  */
1891 static int
1892 check_onion_keys_expiry_time_callback(time_t now, const or_options_t *options)
1893 {
1894   if (server_mode(options)) {
1895     int onion_key_grace_period = get_onion_key_grace_period();
1896     time_t expiry_time = get_onion_key_set_at()+onion_key_grace_period;
1897     if (expiry_time > now) {
1898       return ONION_KEY_CONSENSUS_CHECK_INTERVAL;
1899     }
1900
1901     log_info(LD_GENERAL, "Expiring old onion keys.");
1902     expire_old_onion_keys();
1903     cpuworkers_rotate_keyinfo();
1904     return ONION_KEY_CONSENSUS_CHECK_INTERVAL;
1905   }
1906
1907   return PERIODIC_EVENT_NO_UPDATE;
1908 }
1909
1910 /* Periodic callback: Every 30 seconds, check whether it's time to make new
1911  * Ed25519 subkeys.
1912  */
1913 static int
1914 check_ed_keys_callback(time_t now, const or_options_t *options)
1915 {
1916   if (server_mode(options)) {
1917     if (should_make_new_ed_keys(options, now)) {
1918       int new_signing_key = load_ed_keys(options, now);
1919       if (new_signing_key < 0 ||
1920           generate_ed_link_cert(options, now, new_signing_key > 0)) {
1921         log_err(LD_OR, "Unable to update Ed25519 keys!  Exiting.");
1922         tor_shutdown_event_loop_and_exit(1);
1923       }
1924     }
1925     return 30;
1926   }
1927   return PERIODIC_EVENT_NO_UPDATE;
1928 }
1929
1930 /**
1931  * Periodic callback: Every {LAZY,GREEDY}_DESCRIPTOR_RETRY_INTERVAL,
1932  * see about fetching descriptors, microdescriptors, and extrainfo
1933  * documents.
1934  */
1935 static int
1936 launch_descriptor_fetches_callback(time_t now, const or_options_t *options)
1937 {
1938   if (should_delay_dir_fetches(options, NULL))
1939       return PERIODIC_EVENT_NO_UPDATE;
1940
1941   update_all_descriptor_downloads(now);
1942   update_extrainfo_downloads(now);
1943   if (router_have_minimum_dir_info())
1944     return LAZY_DESCRIPTOR_RETRY_INTERVAL;
1945   else
1946     return GREEDY_DESCRIPTOR_RETRY_INTERVAL;
1947 }
1948
1949 /**
1950  * Periodic event: Rotate our X.509 certificates and TLS keys once every
1951  * MAX_SSL_KEY_LIFETIME_INTERNAL.
1952  */
1953 static int
1954 rotate_x509_certificate_callback(time_t now, const or_options_t *options)
1955 {
1956   static int first = 1;
1957   (void)now;
1958   (void)options;
1959   if (first) {
1960     first = 0;
1961     return MAX_SSL_KEY_LIFETIME_INTERNAL;
1962   }
1963
1964   /* 1b. Every MAX_SSL_KEY_LIFETIME_INTERNAL seconds, we change our
1965    * TLS context. */
1966   log_info(LD_GENERAL,"Rotating tls context.");
1967   if (router_initialize_tls_context() < 0) {
1968     log_err(LD_BUG, "Error reinitializing TLS context");
1969     tor_assert_unreached();
1970   }
1971   if (generate_ed_link_cert(options, now, 1)) {
1972     log_err(LD_OR, "Unable to update Ed25519->TLS link certificate for "
1973             "new TLS context.");
1974     tor_assert_unreached();
1975   }
1976
1977   /* We also make sure to rotate the TLS connections themselves if they've
1978    * been up for too long -- but that's done via is_bad_for_new_circs in
1979    * run_connection_housekeeping() above. */
1980   return MAX_SSL_KEY_LIFETIME_INTERNAL;
1981 }
1982
1983 /**
1984  * Periodic callback: once an hour, grab some more entropy from the
1985  * kernel and feed it to our CSPRNG.
1986  **/
1987 static int
1988 add_entropy_callback(time_t now, const or_options_t *options)
1989 {
1990   (void)now;
1991   (void)options;
1992   /* We already seeded once, so don't die on failure. */
1993   if (crypto_seed_rng() < 0) {
1994     log_warn(LD_GENERAL, "Tried to re-seed RNG, but failed. We already "
1995              "seeded once, though, so we won't exit here.");
1996   }
1997
1998   /** How often do we add more entropy to OpenSSL's RNG pool? */
1999 #define ENTROPY_INTERVAL (60*60)
2000   return ENTROPY_INTERVAL;
2001 }
2002
2003 /** Periodic callback: if there has been no network usage in a while,
2004  * enter a dormant state. */
2005 static int
2006 check_network_participation_callback(time_t now, const or_options_t *options)
2007 {
2008   /* If we're a server, we can't become dormant. */
2009   if (server_mode(options)) {
2010     goto found_activity;
2011   }
2012
2013   /* If we're running an onion service, we can't become dormant. */
2014   /* XXXX this would be nice to change, so that we can be dormant with a
2015    * service. */
2016   if (hs_service_get_num_services() || rend_num_services()) {
2017     goto found_activity;
2018   }
2019
2020   /* XXXX Add an option to never become dormant. */
2021
2022   /* If we have any currently open entry streams other than "linked"
2023    * connections used for directory requests, those count as user activity.
2024    */
2025   /* XXXX make this configurable? */
2026   if (connection_get_by_type_nonlinked(CONN_TYPE_AP) != NULL) {
2027     goto found_activity;
2028   }
2029
2030   /* XXXX Make this configurable? */
2031 /** How often do we check whether we have had network activity? */
2032 #define CHECK_PARTICIPATION_INTERVAL (5*60)
2033
2034   /** Become dormant if there has been no user activity in this long. */
2035   /* XXXX make this configurable! */
2036 #define BECOME_DORMANT_AFTER_INACTIVITY (24*60*60)
2037   if (get_last_user_activity_time() + BECOME_DORMANT_AFTER_INACTIVITY >= now) {
2038     log_notice(LD_GENERAL, "No user activity in a long time: becoming"
2039                " dormant.");
2040     set_network_participation(false);
2041     rescan_periodic_events(options);
2042   }
2043
2044   return CHECK_PARTICIPATION_INTERVAL;
2045
2046  found_activity:
2047   note_user_activity(now);
2048   return CHECK_PARTICIPATION_INTERVAL;
2049 }
2050
2051 /**
2052  * Periodic callback: if we're an authority, make sure we test
2053  * the routers on the network for reachability.
2054  */
2055 static int
2056 launch_reachability_tests_callback(time_t now, const or_options_t *options)
2057 {
2058   if (authdir_mode_tests_reachability(options) &&
2059       !net_is_disabled()) {
2060     /* try to determine reachability of the other Tor relays */
2061     dirserv_test_reachability(now);
2062   }
2063   return REACHABILITY_TEST_INTERVAL;
2064 }
2065
2066 /**
2067  * Periodic callback: if we're an authority, discount the stability
2068  * information (and other rephist information) that's older.
2069  */
2070 static int
2071 downrate_stability_callback(time_t now, const or_options_t *options)
2072 {
2073   (void)options;
2074   /* 1d. Periodically, we discount older stability information so that new
2075    * stability info counts more, and save the stability information to disk as
2076    * appropriate. */
2077   time_t next = rep_hist_downrate_old_runs(now);
2078   return safe_timer_diff(now, next);
2079 }
2080
2081 /**
2082  * Periodic callback: if we're an authority, record our measured stability
2083  * information from rephist in an mtbf file.
2084  */
2085 static int
2086 save_stability_callback(time_t now, const or_options_t *options)
2087 {
2088   if (authdir_mode_tests_reachability(options)) {
2089     if (rep_hist_record_mtbf_data(now, 1)<0) {
2090       log_warn(LD_GENERAL, "Couldn't store mtbf data.");
2091     }
2092   }
2093 #define SAVE_STABILITY_INTERVAL (30*60)
2094   return SAVE_STABILITY_INTERVAL;
2095 }
2096
2097 /**
2098  * Periodic callback: if we're an authority, check on our authority
2099  * certificate (the one that authenticates our authority signing key).
2100  */
2101 static int
2102 check_authority_cert_callback(time_t now, const or_options_t *options)
2103 {
2104   (void)now;
2105   (void)options;
2106   /* 1e. Periodically, if we're a v3 authority, we check whether our cert is
2107    * close to expiring and warn the admin if it is. */
2108   v3_authority_check_key_expiry();
2109 #define CHECK_V3_CERTIFICATE_INTERVAL (5*60)
2110   return CHECK_V3_CERTIFICATE_INTERVAL;
2111 }
2112
2113 /**
2114  * Scheduled callback: Run directory-authority voting functionality.
2115  *
2116  * The schedule is a bit complicated here, so dirvote_act() manages the
2117  * schedule itself.
2118  **/
2119 static int
2120 dirvote_callback(time_t now, const or_options_t *options)
2121 {
2122   if (!authdir_mode_v3(options)) {
2123     tor_assert_nonfatal_unreached();
2124     return 3600;
2125   }
2126
2127   time_t next = dirvote_act(options, now);
2128   if (BUG(next == TIME_MAX)) {
2129     /* This shouldn't be returned unless we called dirvote_act() without
2130      * being an authority.  If it happens, maybe our configuration will
2131      * fix itself in an hour or so? */
2132     return 3600;
2133   }
2134   return safe_timer_diff(now, next);
2135 }
2136
2137 /** Reschedule the directory-authority voting event.  Run this whenever the
2138  * schedule has changed. */
2139 void
2140 reschedule_dirvote(const or_options_t *options)
2141 {
2142   if (periodic_events_initialized && authdir_mode_v3(options)) {
2143     periodic_event_reschedule(dirvote_event);
2144   }
2145 }
2146
2147 /**
2148  * Periodic callback: If our consensus is too old, recalculate whether
2149  * we can actually use it.
2150  */
2151 static int
2152 check_expired_networkstatus_callback(time_t now, const or_options_t *options)
2153 {
2154   (void)options;
2155   /* Check whether our networkstatus has expired. */
2156   networkstatus_t *ns = networkstatus_get_latest_consensus();
2157   /*XXXX RD: This value needs to be the same as REASONABLY_LIVE_TIME in
2158    * networkstatus_get_reasonably_live_consensus(), but that value is way
2159    * way too high.  Arma: is the bridge issue there resolved yet? -NM */
2160 #define NS_EXPIRY_SLOP (24*60*60)
2161   if (ns && ns->valid_until < (now - NS_EXPIRY_SLOP) &&
2162       router_have_minimum_dir_info()) {
2163     router_dir_info_changed();
2164   }
2165 #define CHECK_EXPIRED_NS_INTERVAL (2*60)
2166   return CHECK_EXPIRED_NS_INTERVAL;
2167 }
2168
2169 /**
2170  * Scheduled callback: Save the state file to disk if appropriate.
2171  */
2172 static int
2173 save_state_callback(time_t now, const or_options_t *options)
2174 {
2175   (void) options;
2176   (void) or_state_save(now); // only saves if appropriate
2177   const time_t next_write = get_or_state()->next_write;
2178   if (next_write == TIME_MAX) {
2179     return 86400;
2180   }
2181   return safe_timer_diff(now, next_write);
2182 }
2183
2184 /** Reschedule the event for saving the state file.
2185  *
2186  * Run this when the state becomes dirty. */
2187 void
2188 reschedule_or_state_save(void)
2189 {
2190   if (save_state_event == NULL) {
2191     /* This can happen early on during startup. */
2192     return;
2193   }
2194   periodic_event_reschedule(save_state_event);
2195 }
2196
2197 /**
2198  * Periodic callback: Write statistics to disk if appropriate.
2199  */
2200 static int
2201 write_stats_file_callback(time_t now, const or_options_t *options)
2202 {
2203   /* 1g. Check whether we should write statistics to disk.
2204    */
2205 #define CHECK_WRITE_STATS_INTERVAL (60*60)
2206   time_t next_time_to_write_stats_files = now + CHECK_WRITE_STATS_INTERVAL;
2207   if (options->CellStatistics) {
2208     time_t next_write =
2209       rep_hist_buffer_stats_write(now);
2210     if (next_write && next_write < next_time_to_write_stats_files)
2211       next_time_to_write_stats_files = next_write;
2212   }
2213   if (options->DirReqStatistics) {
2214     time_t next_write = geoip_dirreq_stats_write(now);
2215     if (next_write && next_write < next_time_to_write_stats_files)
2216       next_time_to_write_stats_files = next_write;
2217   }
2218   if (options->EntryStatistics) {
2219     time_t next_write = geoip_entry_stats_write(now);
2220     if (next_write && next_write < next_time_to_write_stats_files)
2221       next_time_to_write_stats_files = next_write;
2222   }
2223   if (options->HiddenServiceStatistics) {
2224     time_t next_write = rep_hist_hs_stats_write(now);
2225     if (next_write && next_write < next_time_to_write_stats_files)
2226       next_time_to_write_stats_files = next_write;
2227   }
2228   if (options->ExitPortStatistics) {
2229     time_t next_write = rep_hist_exit_stats_write(now);
2230     if (next_write && next_write < next_time_to_write_stats_files)
2231       next_time_to_write_stats_files = next_write;
2232   }
2233   if (options->ConnDirectionStatistics) {
2234     time_t next_write = rep_hist_conn_stats_write(now);
2235     if (next_write && next_write < next_time_to_write_stats_files)
2236       next_time_to_write_stats_files = next_write;
2237   }
2238   if (options->BridgeAuthoritativeDir) {
2239     time_t next_write = rep_hist_desc_stats_write(now);
2240     if (next_write && next_write < next_time_to_write_stats_files)
2241       next_time_to_write_stats_files = next_write;
2242   }
2243
2244   return safe_timer_diff(now, next_time_to_write_stats_files);
2245 }
2246
2247 #define CHANNEL_CHECK_INTERVAL (60*60)
2248 static int
2249 check_canonical_channels_callback(time_t now, const or_options_t *options)
2250 {
2251   (void)now;
2252   if (public_server_mode(options))
2253     channel_check_for_duplicates();
2254
2255   return CHANNEL_CHECK_INTERVAL;
2256 }
2257
2258 static int
2259 reset_padding_counts_callback(time_t now, const or_options_t *options)
2260 {
2261   if (options->PaddingStatistics) {
2262     rep_hist_prep_published_padding_counts(now);
2263   }
2264
2265   rep_hist_reset_padding_counts();
2266   return REPHIST_CELL_PADDING_COUNTS_INTERVAL;
2267 }
2268
2269 static int should_init_bridge_stats = 1;
2270
2271 /**
2272  * Periodic callback: Write bridge statistics to disk if appropriate.
2273  */
2274 static int
2275 record_bridge_stats_callback(time_t now, const or_options_t *options)
2276 {
2277   /* 1h. Check whether we should write bridge statistics to disk.
2278    */
2279   if (should_record_bridge_info(options)) {
2280     if (should_init_bridge_stats) {
2281       /* (Re-)initialize bridge statistics. */
2282         geoip_bridge_stats_init(now);
2283         should_init_bridge_stats = 0;
2284         return WRITE_STATS_INTERVAL;
2285     } else {
2286       /* Possibly write bridge statistics to disk and ask when to write
2287        * them next time. */
2288       time_t next = geoip_bridge_stats_write(now);
2289       return safe_timer_diff(now, next);
2290     }
2291   } else if (!should_init_bridge_stats) {
2292     /* Bridge mode was turned off. Ensure that stats are re-initialized
2293      * next time bridge mode is turned on. */
2294     should_init_bridge_stats = 1;
2295   }
2296   return PERIODIC_EVENT_NO_UPDATE;
2297 }
2298
2299 /**
2300  * Periodic callback: Clean in-memory caches every once in a while
2301  */
2302 static int
2303 clean_caches_callback(time_t now, const or_options_t *options)
2304 {
2305   /* Remove old information from rephist and the rend cache. */
2306   rep_history_clean(now - options->RephistTrackTime);
2307   rend_cache_clean(now, REND_CACHE_TYPE_SERVICE);
2308   hs_cache_clean_as_client(now);
2309   hs_cache_clean_as_dir(now);
2310   microdesc_cache_rebuild(NULL, 0);
2311 #define CLEAN_CACHES_INTERVAL (30*60)
2312   return CLEAN_CACHES_INTERVAL;
2313 }
2314
2315 /**
2316  * Periodic callback: Clean the cache of failed hidden service lookups
2317  * frequently.
2318  */
2319 static int
2320 rend_cache_failure_clean_callback(time_t now, const or_options_t *options)
2321 {
2322   (void)options;
2323   /* We don't keep entries that are more than five minutes old so we try to
2324    * clean it as soon as we can since we want to make sure the client waits
2325    * as little as possible for reachability reasons. */
2326   rend_cache_failure_clean(now);
2327   hs_cache_client_intro_state_clean(now);
2328   return 30;
2329 }
2330
2331 /**
2332  * Periodic callback: If we're a server and initializing dns failed, retry.
2333  */
2334 static int
2335 retry_dns_callback(time_t now, const or_options_t *options)
2336 {
2337   (void)now;
2338 #define RETRY_DNS_INTERVAL (10*60)
2339   if (server_mode(options) && has_dns_init_failed())
2340     dns_init();
2341   return RETRY_DNS_INTERVAL;
2342 }
2343
2344 /** Periodic callback: consider rebuilding or and re-uploading our descriptor
2345  * (if we've passed our internal checks). */
2346 static int
2347 check_descriptor_callback(time_t now, const or_options_t *options)
2348 {
2349 /** How often do we check whether part of our router info has changed in a
2350  * way that would require an upload? That includes checking whether our IP
2351  * address has changed. */
2352 #define CHECK_DESCRIPTOR_INTERVAL (60)
2353
2354   (void)options;
2355
2356   /* 2b. Once per minute, regenerate and upload the descriptor if the old
2357    * one is inaccurate. */
2358   if (!net_is_disabled()) {
2359     check_descriptor_bandwidth_changed(now);
2360     check_descriptor_ipaddress_changed(now);
2361     mark_my_descriptor_dirty_if_too_old(now);
2362     consider_publishable_server(0);
2363     /* If any networkstatus documents are no longer recent, we need to
2364      * update all the descriptors' running status. */
2365     /* Remove dead routers. */
2366     /* XXXX This doesn't belong here, but it was here in the pre-
2367      * XXXX refactoring code. */
2368     routerlist_remove_old_routers();
2369   }
2370
2371   return CHECK_DESCRIPTOR_INTERVAL;
2372 }
2373
2374 /**
2375  * Periodic callback: check whether we're reachable (as a relay), and
2376  * whether our bandwidth has changed enough that we need to
2377  * publish a new descriptor.
2378  */
2379 static int
2380 check_for_reachability_bw_callback(time_t now, const or_options_t *options)
2381 {
2382   /* XXXX This whole thing was stuck in the middle of what is now
2383    * XXXX check_descriptor_callback.  I'm not sure it's right. */
2384
2385   static int dirport_reachability_count = 0;
2386   /* also, check religiously for reachability, if it's within the first
2387    * 20 minutes of our uptime. */
2388   if (server_mode(options) &&
2389       (have_completed_a_circuit() || !any_predicted_circuits(now)) &&
2390       !net_is_disabled()) {
2391     if (get_uptime() < TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
2392       router_do_reachability_checks(1, dirport_reachability_count==0);
2393       if (++dirport_reachability_count > 5)
2394         dirport_reachability_count = 0;
2395       return 1;
2396     } else {
2397       /* If we haven't checked for 12 hours and our bandwidth estimate is
2398        * low, do another bandwidth test. This is especially important for
2399        * bridges, since they might go long periods without much use. */
2400       const routerinfo_t *me = router_get_my_routerinfo();
2401       static int first_time = 1;
2402       if (!first_time && me &&
2403           me->bandwidthcapacity < me->bandwidthrate &&
2404           me->bandwidthcapacity < 51200) {
2405         reset_bandwidth_test();
2406       }
2407       first_time = 0;
2408 #define BANDWIDTH_RECHECK_INTERVAL (12*60*60)
2409       return BANDWIDTH_RECHECK_INTERVAL;
2410     }
2411   }
2412   return CHECK_DESCRIPTOR_INTERVAL;
2413 }
2414
2415 /**
2416  * Periodic event: once a minute, (or every second if TestingTorNetwork, or
2417  * during client bootstrap), check whether we want to download any
2418  * networkstatus documents. */
2419 static int
2420 fetch_networkstatus_callback(time_t now, const or_options_t *options)
2421 {
2422   /* How often do we check whether we should download network status
2423    * documents? */
2424   const int we_are_bootstrapping = networkstatus_consensus_is_bootstrapping(
2425                                                                         now);
2426   const int prefer_mirrors = !directory_fetches_from_authorities(
2427                                                               get_options());
2428   int networkstatus_dl_check_interval = 60;
2429   /* check more often when testing, or when bootstrapping from mirrors
2430    * (connection limits prevent too many connections being made) */
2431   if (options->TestingTorNetwork
2432       || (we_are_bootstrapping && prefer_mirrors)) {
2433     networkstatus_dl_check_interval = 1;
2434   }
2435
2436   if (should_delay_dir_fetches(options, NULL))
2437     return PERIODIC_EVENT_NO_UPDATE;
2438
2439   update_networkstatus_downloads(now);
2440   return networkstatus_dl_check_interval;
2441 }
2442
2443 /**
2444  * Periodic callback: Every 60 seconds, we relaunch listeners if any died. */
2445 static int
2446 retry_listeners_callback(time_t now, const or_options_t *options)
2447 {
2448   (void)now;
2449   (void)options;
2450   if (!net_is_disabled()) {
2451     retry_all_listeners(NULL, 0);
2452     return 60;
2453   }
2454   return PERIODIC_EVENT_NO_UPDATE;
2455 }
2456
2457 /**
2458  * Periodic callback: as a server, see if we have any old unused circuits
2459  * that should be expired */
2460 static int
2461 expire_old_ciruits_serverside_callback(time_t now, const or_options_t *options)
2462 {
2463   (void)options;
2464   /* every 11 seconds, so not usually the same second as other such events */
2465   circuit_expire_old_circuits_serverside(now);
2466   return 11;
2467 }
2468
2469 /**
2470  * Callback: Send warnings if Tor doesn't find its ports reachable.
2471  */
2472 static int
2473 reachability_warnings_callback(time_t now, const or_options_t *options)
2474 {
2475   (void) now;
2476
2477   if (get_uptime() < TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
2478     return (int)(TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT - get_uptime());
2479   }
2480
2481   if (server_mode(options) &&
2482       !net_is_disabled() &&
2483       have_completed_a_circuit()) {
2484     /* every 20 minutes, check and complain if necessary */
2485     const routerinfo_t *me = router_get_my_routerinfo();
2486     if (me && !check_whether_orport_reachable(options)) {
2487       char *address = tor_dup_ip(me->addr);
2488       log_warn(LD_CONFIG,"Your server (%s:%d) has not managed to confirm that "
2489                "its ORPort is reachable. Relays do not publish descriptors "
2490                "until their ORPort and DirPort are reachable. Please check "
2491                "your firewalls, ports, address, /etc/hosts file, etc.",
2492                address, me->or_port);
2493       control_event_server_status(LOG_WARN,
2494                                   "REACHABILITY_FAILED ORADDRESS=%s:%d",
2495                                   address, me->or_port);
2496       tor_free(address);
2497     }
2498
2499     if (me && !check_whether_dirport_reachable(options)) {
2500       char *address = tor_dup_ip(me->addr);
2501       log_warn(LD_CONFIG,
2502                "Your server (%s:%d) has not managed to confirm that its "
2503                "DirPort is reachable. Relays do not publish descriptors "
2504                "until their ORPort and DirPort are reachable. Please check "
2505                "your firewalls, ports, address, /etc/hosts file, etc.",
2506                address, me->dir_port);
2507       control_event_server_status(LOG_WARN,
2508                                   "REACHABILITY_FAILED DIRADDRESS=%s:%d",
2509                                   address, me->dir_port);
2510       tor_free(address);
2511     }
2512   }
2513
2514   return TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT;
2515 }
2516
2517 static int dns_honesty_first_time = 1;
2518
2519 /**
2520  * Periodic event: if we're an exit, see if our DNS server is telling us
2521  * obvious lies.
2522  */
2523 static int
2524 check_dns_honesty_callback(time_t now, const or_options_t *options)
2525 {
2526   (void)now;
2527   /* 9. and if we're an exit node, check whether our DNS is telling stories
2528    * to us. */
2529   if (net_is_disabled() ||
2530       ! public_server_mode(options) ||
2531       router_my_exit_policy_is_reject_star())
2532     return PERIODIC_EVENT_NO_UPDATE;
2533
2534   if (dns_honesty_first_time) {
2535     /* Don't launch right when we start */
2536     dns_honesty_first_time = 0;
2537     return crypto_rand_int_range(60, 180);
2538   }
2539
2540   dns_launch_correctness_checks();
2541   return 12*3600 + crypto_rand_int(12*3600);
2542 }
2543
2544 /**
2545  * Periodic callback: if we're the bridge authority, write a networkstatus
2546  * file to disk.
2547  */
2548 static int
2549 write_bridge_ns_callback(time_t now, const or_options_t *options)
2550 {
2551   /* 10. write bridge networkstatus file to disk */
2552   if (options->BridgeAuthoritativeDir) {
2553     networkstatus_dump_bridge_status_to_file(now);
2554 #define BRIDGE_STATUSFILE_INTERVAL (30*60)
2555      return BRIDGE_STATUSFILE_INTERVAL;
2556   }
2557   return PERIODIC_EVENT_NO_UPDATE;
2558 }
2559
2560 static int heartbeat_callback_first_time = 1;
2561
2562 /**
2563  * Periodic callback: write the heartbeat message in the logs.
2564  *
2565  * If writing the heartbeat message to the logs fails for some reason, retry
2566  * again after <b>MIN_HEARTBEAT_PERIOD</b> seconds.
2567  */
2568 static int
2569 heartbeat_callback(time_t now, const or_options_t *options)
2570 {
2571   /* Check if heartbeat is disabled */
2572   if (!options->HeartbeatPeriod) {
2573     return PERIODIC_EVENT_NO_UPDATE;
2574   }
2575
2576   /* Skip the first one. */
2577   if (heartbeat_callback_first_time) {
2578     heartbeat_callback_first_time = 0;
2579     return options->HeartbeatPeriod;
2580   }
2581
2582   /* Write the heartbeat message */
2583   if (log_heartbeat(now) == 0) {
2584     return options->HeartbeatPeriod;
2585   } else {
2586     /* If we couldn't write the heartbeat log message, try again in the minimum
2587      * interval of time. */
2588     return MIN_HEARTBEAT_PERIOD;
2589   }
2590 }
2591
2592 #define CDM_CLEAN_CALLBACK_INTERVAL 600
2593 static int
2594 clean_consdiffmgr_callback(time_t now, const or_options_t *options)
2595 {
2596   (void)now;
2597   if (dir_server_mode(options)) {
2598     consdiffmgr_cleanup();
2599   }
2600   return CDM_CLEAN_CALLBACK_INTERVAL;
2601 }
2602
2603 /*
2604  * Periodic callback: Run scheduled events for HS service. This is called
2605  * every second.
2606  */
2607 static int
2608 hs_service_callback(time_t now, const or_options_t *options)
2609 {
2610   (void) options;
2611
2612   /* We need to at least be able to build circuits and that we actually have
2613    * a working network. */
2614   if (!have_completed_a_circuit() || net_is_disabled() ||
2615       networkstatus_get_live_consensus(now) == NULL) {
2616     goto end;
2617   }
2618
2619   hs_service_run_scheduled_events(now);
2620
2621  end:
2622   /* Every 1 second. */
2623   return 1;
2624 }
2625
2626 /*
2627  * Periodic callback: Send once-per-second events to the controller(s).
2628  * This is called every second.
2629  */
2630 static int
2631 control_per_second_events_callback(time_t now, const or_options_t *options)
2632 {
2633   (void) options;
2634   (void) now;
2635
2636   control_per_second_events();
2637
2638   return 1;
2639 }
2640
2641 /** Last time that update_current_time was called. */
2642 static time_t current_second = 0;
2643 /** Last time that update_current_time updated current_second. */
2644 static monotime_coarse_t current_second_last_changed;
2645
2646 /**
2647  * Set the current time to "now", which should be the value returned by
2648  * time().  Check for clock jumps and track the total number of seconds we
2649  * have been running.
2650  */
2651 void
2652 update_current_time(time_t now)
2653 {
2654   if (PREDICT_LIKELY(now == current_second)) {
2655     /* We call this function a lot.  Most frequently, the current second
2656      * will not have changed, so we just return. */
2657     return;
2658   }
2659
2660   const time_t seconds_elapsed = current_second ? (now - current_second) : 0;
2661
2662   /* Check the wall clock against the monotonic clock, so we can
2663    * better tell idleness from clock jumps and/or other shenanigans. */
2664   monotime_coarse_t last_updated;
2665   memcpy(&last_updated, &current_second_last_changed, sizeof(last_updated));
2666   monotime_coarse_get(&current_second_last_changed);
2667
2668   /** How much clock jumping do we tolerate? */
2669 #define NUM_JUMPED_SECONDS_BEFORE_WARN 100
2670
2671   /** How much idleness do we tolerate? */
2672 #define NUM_IDLE_SECONDS_BEFORE_WARN 3600
2673
2674   if (seconds_elapsed < -NUM_JUMPED_SECONDS_BEFORE_WARN) {
2675     // moving back in time is always a bad sign.
2676     circuit_note_clock_jumped(seconds_elapsed, false);
2677
2678     /* Don't go dormant just because we jumped in time. */
2679     if (is_participating_on_network()) {
2680       reset_user_activity(now);
2681     }
2682   } else if (seconds_elapsed >= NUM_JUMPED_SECONDS_BEFORE_WARN) {
2683     /* Compare the monotonic clock to the result of time(). */
2684     const int32_t monotime_msec_passed =
2685       monotime_coarse_diff_msec32(&last_updated,
2686                                   &current_second_last_changed);
2687     const int monotime_sec_passed = monotime_msec_passed / 1000;
2688     const int discrepancy = monotime_sec_passed - (int)seconds_elapsed;
2689     /* If the monotonic clock deviates from time(NULL), we have a couple of
2690      * possibilities.  On some systems, this means we have been suspended or
2691      * sleeping.  Everywhere, it can mean that the wall-clock time has
2692      * been changed -- for example, with settimeofday().
2693      *
2694      * On the other hand, if the monotonic time matches with the wall-clock
2695      * time, we've probably just been idle for a while, with no events firing.
2696      * we tolerate much more of that.
2697      */
2698     const bool clock_jumped = abs(discrepancy) > 2;
2699
2700     if (clock_jumped || seconds_elapsed >= NUM_IDLE_SECONDS_BEFORE_WARN) {
2701       circuit_note_clock_jumped(seconds_elapsed, ! clock_jumped);
2702     }
2703
2704     /* Don't go dormant just because we jumped in time. */
2705     if (is_participating_on_network()) {
2706       reset_user_activity(now);
2707     }
2708   } else if (seconds_elapsed > 0) {
2709     stats_n_seconds_working += seconds_elapsed;
2710   }
2711
2712   update_approx_time(now);
2713   current_second = now;
2714 }
2715
2716 #ifdef HAVE_SYSTEMD_209
2717 static periodic_timer_t *systemd_watchdog_timer = NULL;
2718
2719 /** Libevent callback: invoked to reset systemd watchdog. */
2720 static void
2721 systemd_watchdog_callback(periodic_timer_t *timer, void *arg)
2722 {
2723   (void)timer;
2724   (void)arg;
2725   sd_notify(0, "WATCHDOG=1");
2726 }
2727 #endif /* defined(HAVE_SYSTEMD_209) */
2728
2729 #define UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST (6*60*60)
2730
2731 /** Called when our IP address seems to have changed. <b>at_interface</b>
2732  * should be true if we detected a change in our interface, and false if we
2733  * detected a change in our published address. */
2734 void
2735 ip_address_changed(int at_interface)
2736 {
2737   const or_options_t *options = get_options();
2738   int server = server_mode(options);
2739   int exit_reject_interfaces = (server && options->ExitRelay
2740                                 && options->ExitPolicyRejectLocalInterfaces);
2741
2742   if (at_interface) {
2743     if (! server) {
2744       /* Okay, change our keys. */
2745       if (init_keys_client() < 0)
2746         log_warn(LD_GENERAL, "Unable to rotate keys after IP change!");
2747     }
2748   } else {
2749     if (server) {
2750       if (get_uptime() > UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST)
2751         reset_bandwidth_test();
2752       reset_uptime();
2753       router_reset_reachability();
2754     }
2755   }
2756
2757   /* Exit relays incorporate interface addresses in their exit policies when
2758    * ExitPolicyRejectLocalInterfaces is set */
2759   if (exit_reject_interfaces || (server && !at_interface)) {
2760     mark_my_descriptor_dirty("IP address changed");
2761   }
2762
2763   dns_servers_relaunch_checks();
2764 }
2765
2766 /** Forget what we've learned about the correctness of our DNS servers, and
2767  * start learning again. */
2768 void
2769 dns_servers_relaunch_checks(void)
2770 {
2771   if (server_mode(get_options())) {
2772     dns_reset_correctness_checks();
2773     if (periodic_events_initialized) {
2774       tor_assert(check_dns_honesty_event);
2775       periodic_event_reschedule(check_dns_honesty_event);
2776     }
2777   }
2778 }
2779
2780 /** Initialize some mainloop_event_t objects that we require. */
2781 void
2782 initialize_mainloop_events(void)
2783 {
2784   initialize_periodic_events();
2785
2786   if (!schedule_active_linked_connections_event) {
2787     schedule_active_linked_connections_event =
2788       mainloop_event_postloop_new(schedule_active_linked_connections_cb, NULL);
2789   }
2790   if (!postloop_cleanup_ev) {
2791     postloop_cleanup_ev =
2792       mainloop_event_postloop_new(postloop_cleanup_cb, NULL);
2793   }
2794 }
2795
2796 /** Tor main loop. */
2797 int
2798 do_main_loop(void)
2799 {
2800   /* For now, starting Tor always counts as user activity. Later, we might
2801    * have an option to control this.
2802    */
2803   reset_user_activity(approx_time());
2804   set_network_participation(true);
2805
2806   /* initialize the periodic events first, so that code that depends on the
2807    * events being present does not assert.
2808    */
2809   initialize_periodic_events();
2810   initialize_mainloop_events();
2811
2812 #ifdef HAVE_SYSTEMD_209
2813   uint64_t watchdog_delay;
2814   /* set up systemd watchdog notification. */
2815   if (sd_watchdog_enabled(1, &watchdog_delay) > 0) {
2816     if (! systemd_watchdog_timer) {
2817       struct timeval watchdog;
2818       /* The manager will "act on" us if we don't send them a notification
2819        * every 'watchdog_delay' microseconds.  So, send notifications twice
2820        * that often.  */
2821       watchdog_delay /= 2;
2822       watchdog.tv_sec = watchdog_delay  / 1000000;
2823       watchdog.tv_usec = watchdog_delay % 1000000;
2824
2825       systemd_watchdog_timer = periodic_timer_new(tor_libevent_get_base(),
2826                                                   &watchdog,
2827                                                   systemd_watchdog_callback,
2828                                                   NULL);
2829       tor_assert(systemd_watchdog_timer);
2830     }
2831   }
2832 #endif /* defined(HAVE_SYSTEMD_209) */
2833
2834   main_loop_should_exit = 0;
2835   main_loop_exit_value = 0;
2836
2837 #ifdef ENABLE_RESTART_DEBUGGING
2838   {
2839     static int first_time = 1;
2840
2841     if (first_time && getenv("TOR_DEBUG_RESTART")) {
2842       first_time = 0;
2843       const char *sec_str = getenv("TOR_DEBUG_RESTART_AFTER_SECONDS");
2844       long sec;
2845       int sec_ok=0;
2846       if (sec_str &&
2847           (sec = tor_parse_long(sec_str, 10, 0, INT_MAX, &sec_ok, NULL)) &&
2848           sec_ok) {
2849         /* Okay, we parsed the seconds. */
2850       } else {
2851         sec = 5;
2852       }
2853       struct timeval restart_after = { (time_t) sec, 0 };
2854       tor_shutdown_event_loop_for_restart_event =
2855         tor_evtimer_new(tor_libevent_get_base(),
2856                         tor_shutdown_event_loop_for_restart_cb, NULL);
2857       event_add(tor_shutdown_event_loop_for_restart_event, &restart_after);
2858     }
2859   }
2860 #endif
2861
2862   return run_main_loop_until_done();
2863 }
2864
2865 #ifndef _WIN32
2866 /** Rate-limiter for EINVAL-type libevent warnings. */
2867 static ratelim_t libevent_error_ratelim = RATELIM_INIT(10);
2868 #endif
2869
2870 /**
2871  * Run the main loop a single time. Return 0 for "exit"; -1 for "exit with
2872  * error", and 1 for "run this again."
2873  */
2874 static int
2875 run_main_loop_once(void)
2876 {
2877   int loop_result;
2878
2879   if (nt_service_is_stopping())
2880     return 0;
2881
2882   if (main_loop_should_exit)
2883     return 0;
2884
2885 #ifndef _WIN32
2886   /* Make it easier to tell whether libevent failure is our fault or not. */
2887   errno = 0;
2888 #endif
2889
2890   if (get_options()->MainloopStats) {
2891     /* We always enforce that EVLOOP_ONCE is passed to event_base_loop() if we
2892      * are collecting main loop statistics. */
2893     called_loop_once = 1;
2894   } else {
2895     called_loop_once = 0;
2896   }
2897
2898   /* Make sure we know (about) what time it is. */
2899   update_approx_time(time(NULL));
2900
2901   /* Here it is: the main loop.  Here we tell Libevent to poll until we have
2902    * an event, or the second ends, or until we have some active linked
2903    * connections to trigger events for.  Libevent will wait till one
2904    * of these happens, then run all the appropriate callbacks. */
2905   loop_result = tor_libevent_run_event_loop(tor_libevent_get_base(),
2906                                             called_loop_once);
2907
2908   if (get_options()->MainloopStats) {
2909     /* Update our main loop counters. */
2910     if (loop_result == 0) {
2911       // The call was successful.
2912       increment_main_loop_success_count();
2913     } else if (loop_result == -1) {
2914       // The call was erroneous.
2915       increment_main_loop_error_count();
2916     } else if (loop_result == 1) {
2917       // The call didn't have any active or pending events
2918       // to handle.
2919       increment_main_loop_idle_count();
2920     }
2921   }
2922
2923   /* Oh, the loop failed.  That might be an error that we need to
2924    * catch, but more likely, it's just an interrupted poll() call or something,
2925    * and we should try again. */
2926   if (loop_result < 0) {
2927     int e = tor_socket_errno(-1);
2928     /* let the program survive things like ^z */
2929     if (e != EINTR && !ERRNO_IS_EINPROGRESS(e)) {
2930       log_err(LD_NET,"libevent call with %s failed: %s [%d]",
2931               tor_libevent_get_method(), tor_socket_strerror(e), e);
2932       return -1;
2933 #ifndef _WIN32
2934     } else if (e == EINVAL) {
2935       log_fn_ratelim(&libevent_error_ratelim, LOG_WARN, LD_NET,
2936                      "EINVAL from libevent: should you upgrade libevent?");
2937       if (libevent_error_ratelim.n_calls_since_last_time > 8) {
2938         log_err(LD_NET, "Too many libevent errors, too fast: dying");
2939         return -1;
2940       }
2941 #endif /* !defined(_WIN32) */
2942     } else {
2943       tor_assert_nonfatal_once(! ERRNO_IS_EINPROGRESS(e));
2944       log_debug(LD_NET,"libevent call interrupted.");
2945       /* You can't trust the results of this poll(). Go back to the
2946        * top of the big for loop. */
2947       return 1;
2948     }
2949   }
2950
2951   if (main_loop_should_exit)
2952     return 0;
2953
2954   return 1;
2955 }
2956
2957 /** Run the run_main_loop_once() function until it declares itself done,
2958  * and return its final return value.
2959  *
2960  * Shadow won't invoke this function, so don't fill it up with things.
2961  */
2962 static int
2963 run_main_loop_until_done(void)
2964 {
2965   int loop_result = 1;
2966   do {
2967     loop_result = run_main_loop_once();
2968   } while (loop_result == 1);
2969
2970   if (main_loop_should_exit)
2971     return main_loop_exit_value;
2972   else
2973     return loop_result;
2974 }
2975
2976 /** Returns Tor's uptime. */
2977 MOCK_IMPL(long,
2978 get_uptime,(void))
2979 {
2980   return stats_n_seconds_working;
2981 }
2982
2983 /** Reset Tor's uptime. */
2984 MOCK_IMPL(void,
2985 reset_uptime,(void))
2986 {
2987   stats_n_seconds_working = 0;
2988 }
2989
2990 void
2991 tor_mainloop_free_all(void)
2992 {
2993   smartlist_free(connection_array);
2994   smartlist_free(closeable_connection_lst);
2995   smartlist_free(active_linked_connection_lst);
2996   teardown_periodic_events();
2997   tor_event_free(shutdown_did_not_work_event);
2998   tor_event_free(initialize_periodic_events_event);
2999   mainloop_event_free(directory_all_unreachable_cb_event);
3000   mainloop_event_free(schedule_active_linked_connections_event);
3001   mainloop_event_free(postloop_cleanup_ev);
3002   mainloop_event_free(handle_deferred_signewnym_ev);
3003   mainloop_event_free(scheduled_shutdown_ev);
3004   mainloop_event_free(rescan_periodic_events_ev);
3005
3006 #ifdef HAVE_SYSTEMD_209
3007   periodic_timer_free(systemd_watchdog_timer);
3008 #endif
3009
3010   stats_n_bytes_read = stats_n_bytes_written = 0;
3011
3012   memset(&global_bucket, 0, sizeof(global_bucket));
3013   memset(&global_relayed_bucket, 0, sizeof(global_relayed_bucket));
3014   time_of_process_start = 0;
3015   time_of_last_signewnym = 0;
3016   signewnym_is_pending = 0;
3017   newnym_epoch = 0;
3018   called_loop_once = 0;
3019   main_loop_should_exit = 0;
3020   main_loop_exit_value = 0;
3021   can_complete_circuits = 0;
3022   quiet_level = 0;
3023   should_init_bridge_stats = 1;
3024   dns_honesty_first_time = 1;
3025   heartbeat_callback_first_time = 1;
3026   current_second = 0;
3027   memset(&current_second_last_changed, 0,
3028          sizeof(current_second_last_changed));
3029 }