src/or/main.c

   1 /* Copyright (c) 2001 Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2010, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file main.c
   9  * \brief Toplevel module. Handles signals, multiplexes between
  10  * connections, implements main loop, and drives scheduled events.
  11  **/
  12
  13 #define MAIN_PRIVATE
  14 #include "or.h"
  15 #ifdef USE_DMALLOC
  16 #include <dmalloc.h>
  17 #include <openssl/crypto.h>
  18 #endif
  19 #include "memarea.h"
  20
  21 #ifdef HAVE_EVENT2_EVENT_H
  22 #include <event2/event.h>
  23 #else
  24 #include <event.h>
  25 #endif
  26
  27 void evdns_shutdown(int);
  28
  29 /********* PROTOTYPES **********/
  30
  31 static void dumpmemusage(int severity);
  32 static void dumpstats(int severity); /* log stats */
  33 static void conn_read_callback(int fd, short event, void *_conn);
  34 static void conn_write_callback(int fd, short event, void *_conn);
  35 static void signal_callback(int fd, short events, void *arg);
  36 static void second_elapsed_callback(int fd, short event, void *args);
  37 static int conn_close_if_marked(int i);
  38 static void connection_start_reading_from_linked_conn(connection_t *conn);
  39 static int connection_should_read_from_linked_conn(connection_t *conn);
  40
  41 /********* START VARIABLES **********/
  42
  43 int global_read_bucket; /**< Max number of bytes I can read this second. */
  44 int global_write_bucket; /**< Max number of bytes I can write this second. */
  45
  46 /** Max number of relayed (bandwidth class 1) bytes I can read this second. */
  47 int global_relayed_read_bucket;
  48 /** Max number of relayed (bandwidth class 1) bytes I can write this second. */
  49 int global_relayed_write_bucket;
  50
  51 /** What was the read bucket before the last second_elapsed_callback() call?
  52  * (used to determine how many bytes we've read). */
  53 static int stats_prev_global_read_bucket;
  54 /** What was the write bucket before the last second_elapsed_callback() call?
  55  * (used to determine how many bytes we've written). */
  56 static int stats_prev_global_write_bucket;
  57 /* XXX we might want to keep stats about global_relayed_*_bucket too. Or not.*/
  58 /** How many bytes have we read since we started the process? */
  59 static uint64_t stats_n_bytes_read = 0;
  60 /** How many bytes have we written since we started the process? */
  61 static uint64_t stats_n_bytes_written = 0;
  62 /** What time did this process start up? */
  63 time_t time_of_process_start = 0;
  64 /** How many seconds have we been running? */
  65 long stats_n_seconds_working = 0;
  66 /** When do we next launch DNS wildcarding checks? */
  67 static time_t time_to_check_for_correct_dns = 0;
  68
  69 /** How often will we honor SIGNEWNYM requests? */
  70 #define MAX_SIGNEWNYM_RATE 10
  71 /** When did we last process a SIGNEWNYM request? */
  72 static time_t time_of_last_signewnym = 0;
  73 /** Is there a signewnym request we're currently waiting to handle? */
  74 static int signewnym_is_pending = 0;
  75
  76 /** Smartlist of all open connections. */
  77 static smartlist_t *connection_array = NULL;
  78 /** List of connections that have been marked for close and need to be freed
  79  * and removed from connection_array. */
  80 static smartlist_t *closeable_connection_lst = NULL;
  81 /** List of linked connections that are currently reading data into their
  82  * inbuf from their partner's outbuf. */
  83 static smartlist_t *active_linked_connection_lst = NULL;
  84 /** Flag: Set to true iff we entered the current libevent main loop via
  85  * <b>loop_once</b>. If so, there's no need to trigger a loopexit in order
  86  * to handle linked connections. */
  87 static int called_loop_once = 0;
  88
  89 /** We set this to 1 when we've opened a circuit, so we can print a log
  90  * entry to inform the user that Tor is working. */
  91 int has_completed_circuit=0;
  92
  93 /** How often do we check for router descriptors that we should download
  94  * when we have too little directory info? */
  95 #define GREEDY_DESCRIPTOR_RETRY_INTERVAL (10)
  96 /** How often do we check for router descriptors that we should download
  97  * when we have enough directory info? */
  98 #define LAZY_DESCRIPTOR_RETRY_INTERVAL (60)
  99 /** How often do we 'forgive' undownloadable router descriptors and attempt
 100  * to download them again? */
 101 #define DESCRIPTOR_FAILURE_RESET_INTERVAL (60*60)
 102 /** How long do we let a directory connection stall before expiring it? */
 103 #define DIR_CONN_MAX_STALL (5*60)
 104
 105 /** How long do we let OR connections handshake before we decide that
 106  * they are obsolete? */
 107 #define TLS_HANDSHAKE_TIMEOUT (60)
 108
 109 /********* END VARIABLES ************/
 110
 111 /****************************************************************************
 112 *
 113 * This section contains accessors and other methods on the connection_array
 114 * variables (which are global within this file and unavailable outside it).
 115 *
 116 ****************************************************************************/
 117
 118 /** Add <b>conn</b> to the array of connections that we can poll on.  The
 119  * connection's socket must be set; the connection starts out
 120  * non-reading and non-writing.
 121  */
 122 int
 123 connection_add(connection_t *conn)
 124 {
 125   tor_assert(conn);
 126   tor_assert(conn->s >= 0 ||
 127              conn->linked ||
 128              (conn->type == CONN_TYPE_AP &&
 129               TO_EDGE_CONN(conn)->is_dns_request));
 130
 131   tor_assert(conn->conn_array_index == -1); /* can only connection_add once */
 132   conn->conn_array_index = smartlist_len(connection_array);
 133   smartlist_add(connection_array, conn);
 134
 135   if (conn->s >= 0 || conn->linked) {
 136     conn->read_event = tor_event_new(tor_libevent_get_base(),
 137          conn->s, EV_READ|EV_PERSIST, conn_read_callback, conn);
 138     conn->write_event = tor_event_new(tor_libevent_get_base(),
 139          conn->s, EV_WRITE|EV_PERSIST, conn_write_callback, conn);
 140   }
 141
 142   log_debug(LD_NET,"new conn type %s, socket %d, address %s, n_conns %d.",
 143             conn_type_to_string(conn->type), conn->s, conn->address,
 144             smartlist_len(connection_array));
 145
 146   return 0;
 147 }
 148
 149 /** Tell libevent that we don't care about <b>conn</b> any more. */
 150 void
 151 connection_unregister_events(connection_t *conn)
 152 {
 153   if (conn->read_event) {
 154     if (event_del(conn->read_event))
 155       log_warn(LD_BUG, "Error removing read event for %d", conn->s);
 156     tor_free(conn->read_event);
 157   }
 158   if (conn->write_event) {
 159     if (event_del(conn->write_event))
 160       log_warn(LD_BUG, "Error removing write event for %d", conn->s);
 161     tor_free(conn->write_event);
 162   }
 163   if (conn->dns_server_port) {
 164     dnsserv_close_listener(conn);
 165   }
 166 }
 167
 168 /** Remove the connection from the global list, and remove the
 169  * corresponding poll entry.  Calling this function will shift the last
 170  * connection (if any) into the position occupied by conn.
 171  */
 172 int
 173 connection_remove(connection_t *conn)
 174 {
 175   int current_index;
 176   connection_t *tmp;
 177
 178   tor_assert(conn);
 179
 180   log_debug(LD_NET,"removing socket %d (type %s), n_conns now %d",
 181             conn->s, conn_type_to_string(conn->type),
 182             smartlist_len(connection_array));
 183
 184   tor_assert(conn->conn_array_index >= 0);
 185   current_index = conn->conn_array_index;
 186   connection_unregister_events(conn); /* This is redundant, but cheap. */
 187   if (current_index == smartlist_len(connection_array)-1) { /* at the end */
 188     smartlist_del(connection_array, current_index);
 189     return 0;
 190   }
 191
 192   /* replace this one with the one at the end */
 193   smartlist_del(connection_array, current_index);
 194   tmp = smartlist_get(connection_array, current_index);
 195   tmp->conn_array_index = current_index;
 196
 197   return 0;
 198 }
 199
 200 /** If <b>conn</b> is an edge conn, remove it from the list
 201  * of conn's on this circuit. If it's not on an edge,
 202  * flush and send destroys for all circuits on this conn.
 203  *
 204  * Remove it from connection_array (if applicable) and
 205  * from closeable_connection_list.
 206  *
 207  * Then free it.
 208  */
 209 static void
 210 connection_unlink(connection_t *conn)
 211 {
 212   connection_about_to_close_connection(conn);
 213   if (conn->conn_array_index >= 0) {
 214     connection_remove(conn);
 215   }
 216   if (conn->linked_conn) {
 217     conn->linked_conn->linked_conn = NULL;
 218     if (! conn->linked_conn->marked_for_close &&
 219         conn->linked_conn->reading_from_linked_conn)
 220       connection_start_reading(conn->linked_conn);
 221     conn->linked_conn = NULL;
 222   }
 223   smartlist_remove(closeable_connection_lst, conn);
 224   smartlist_remove(active_linked_connection_lst, conn);
 225   if (conn->type == CONN_TYPE_EXIT) {
 226     assert_connection_edge_not_dns_pending(TO_EDGE_CONN(conn));
 227   }
 228   if (conn->type == CONN_TYPE_OR) {
 229     if (!tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest))
 230       connection_or_remove_from_identity_map(TO_OR_CONN(conn));
 231   }
 232   connection_free(conn);
 233 }
 234
 235 /** Schedule <b>conn</b> to be closed. **/
 236 void
 237 add_connection_to_closeable_list(connection_t *conn)
 238 {
 239   tor_assert(!smartlist_isin(closeable_connection_lst, conn));
 240   tor_assert(conn->marked_for_close);
 241   assert_connection_ok(conn, time(NULL));
 242   smartlist_add(closeable_connection_lst, conn);
 243 }
 244
 245 /** Return 1 if conn is on the closeable list, else return 0. */
 246 int
 247 connection_is_on_closeable_list(connection_t *conn)
 248 {
 249   return smartlist_isin(closeable_connection_lst, conn);
 250 }
 251
 252 /** Return true iff conn is in the current poll array. */
 253 int
 254 connection_in_array(connection_t *conn)
 255 {
 256   return smartlist_isin(connection_array, conn);
 257 }
 258
 259 /** Set <b>*array</b> to an array of all connections, and <b>*n</b>
 260  * to the length of the array. <b>*array</b> and <b>*n</b> must not
 261  * be modified.
 262  */
 263 smartlist_t *
 264 get_connection_array(void)
 265 {
 266   if (!connection_array)
 267     connection_array = smartlist_create();
 268   return connection_array;
 269 }
 270
 271 /** Set the event mask on <b>conn</b> to <b>events</b>.  (The event
 272  * mask is a bitmask whose bits are READ_EVENT and WRITE_EVENT)
 273  */
 274 void
 275 connection_watch_events(connection_t *conn, watchable_events_t events)
 276 {
 277   if (events & READ_EVENT)
 278     connection_start_reading(conn);
 279   else
 280     connection_stop_reading(conn);
 281
 282   if (events & WRITE_EVENT)
 283     connection_start_writing(conn);
 284   else
 285     connection_stop_writing(conn);
 286 }
 287
 288 /** Return true iff <b>conn</b> is listening for read events. */
 289 int
 290 connection_is_reading(connection_t *conn)
 291 {
 292   tor_assert(conn);
 293
 294   return conn->reading_from_linked_conn ||
 295     (conn->read_event && event_pending(conn->read_event, EV_READ, NULL));
 296 }
 297
 298 /** Tell the main loop to stop notifying <b>conn</b> of any read events. */
 299 void
 300 connection_stop_reading(connection_t *conn)
 301 {
 302   tor_assert(conn);
 303   tor_assert(conn->read_event);
 304
 305   if (conn->linked) {
 306     conn->reading_from_linked_conn = 0;
 307     connection_stop_reading_from_linked_conn(conn);
 308   } else {
 309     if (event_del(conn->read_event))
 310       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 311                "to unwatched: %s",
 312                conn->s,
 313                tor_socket_strerror(tor_socket_errno(conn->s)));
 314   }
 315 }
 316
 317 /** Tell the main loop to start notifying <b>conn</b> of any read events. */
 318 void
 319 connection_start_reading(connection_t *conn)
 320 {
 321   tor_assert(conn);
 322   tor_assert(conn->read_event);
 323
 324   if (conn->linked) {
 325     conn->reading_from_linked_conn = 1;
 326     if (connection_should_read_from_linked_conn(conn))
 327       connection_start_reading_from_linked_conn(conn);
 328   } else {
 329     if (event_add(conn->read_event, NULL))
 330       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 331                "to watched: %s",
 332                conn->s,
 333                tor_socket_strerror(tor_socket_errno(conn->s)));
 334   }
 335 }
 336
 337 /** Return true iff <b>conn</b> is listening for write events. */
 338 int
 339 connection_is_writing(connection_t *conn)
 340 {
 341   tor_assert(conn);
 342
 343   return conn->writing_to_linked_conn ||
 344     (conn->write_event && event_pending(conn->write_event, EV_WRITE, NULL));
 345 }
 346
 347 /** Tell the main loop to stop notifying <b>conn</b> of any write events. */
 348 void
 349 connection_stop_writing(connection_t *conn)
 350 {
 351   tor_assert(conn);
 352   tor_assert(conn->write_event);
 353
 354   if (conn->linked) {
 355     conn->writing_to_linked_conn = 0;
 356     if (conn->linked_conn)
 357       connection_stop_reading_from_linked_conn(conn->linked_conn);
 358   } else {
 359     if (event_del(conn->write_event))
 360       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 361                "to unwatched: %s",
 362                conn->s,
 363                tor_socket_strerror(tor_socket_errno(conn->s)));
 364   }
 365 }
 366
 367 /** Tell the main loop to start notifying <b>conn</b> of any write events. */
 368 void
 369 connection_start_writing(connection_t *conn)
 370 {
 371   tor_assert(conn);
 372   tor_assert(conn->write_event);
 373
 374   if (conn->linked) {
 375     conn->writing_to_linked_conn = 1;
 376     if (conn->linked_conn &&
 377         connection_should_read_from_linked_conn(conn->linked_conn))
 378       connection_start_reading_from_linked_conn(conn->linked_conn);
 379   } else {
 380     if (event_add(conn->write_event, NULL))
 381       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 382                "to watched: %s",
 383                conn->s,
 384                tor_socket_strerror(tor_socket_errno(conn->s)));
 385   }
 386 }
 387
 388 /** Return true iff <b>conn</b> is linked conn, and reading from the conn
 389  * linked to it would be good and feasible.  (Reading is "feasible" if the
 390  * other conn exists and has data in its outbuf, and is "good" if we have our
 391  * reading_from_linked_conn flag set and the other conn has its
 392  * writing_to_linked_conn flag set.)*/
 393 static int
 394 connection_should_read_from_linked_conn(connection_t *conn)
 395 {
 396   if (conn->linked && conn->reading_from_linked_conn) {
 397     if (! conn->linked_conn ||
 398         (conn->linked_conn->writing_to_linked_conn &&
 399          buf_datalen(conn->linked_conn->outbuf)))
 400       return 1;
 401   }
 402   return 0;
 403 }
 404
 405 /** Helper: Tell the main loop to begin reading bytes into <b>conn</b> from
 406  * its linked connection, if it is not doing so already.  Called by
 407  * connection_start_reading and connection_start_writing as appropriate. */
 408 static void
 409 connection_start_reading_from_linked_conn(connection_t *conn)
 410 {
 411   tor_assert(conn);
 412   tor_assert(conn->linked == 1);
 413
 414   if (!conn->active_on_link) {
 415     conn->active_on_link = 1;
 416     smartlist_add(active_linked_connection_lst, conn);
 417     if (!called_loop_once) {
 418       /* This is the first event on the list; we won't be in LOOP_ONCE mode,
 419        * so we need to make sure that the event_base_loop() actually exits at
 420        * the end of its run through the current connections and lets us
 421        * activate read events for linked connections. */
 422       struct timeval tv = { 0, 0 };
 423       tor_event_base_loopexit(tor_libevent_get_base(), &tv);
 424     }
 425   } else {
 426     tor_assert(smartlist_isin(active_linked_connection_lst, conn));
 427   }
 428 }
 429
 430 /** Tell the main loop to stop reading bytes into <b>conn</b> from its linked
 431  * connection, if is currently doing so.  Called by connection_stop_reading,
 432  * connection_stop_writing, and connection_read. */
 433 void
 434 connection_stop_reading_from_linked_conn(connection_t *conn)
 435 {
 436   tor_assert(conn);
 437   tor_assert(conn->linked == 1);
 438
 439   if (conn->active_on_link) {
 440     conn->active_on_link = 0;
 441     /* FFFF We could keep an index here so we can smartlist_del
 442      * cleanly.  On the other hand, this doesn't show up on profiles,
 443      * so let's leave it alone for now. */
 444     smartlist_remove(active_linked_connection_lst, conn);
 445   } else {
 446     tor_assert(!smartlist_isin(active_linked_connection_lst, conn));
 447   }
 448 }
 449
 450 /** Close all connections that have been scheduled to get closed. */
 451 static void
 452 close_closeable_connections(void)
 453 {
 454   int i;
 455   for (i = 0; i < smartlist_len(closeable_connection_lst); ) {
 456     connection_t *conn = smartlist_get(closeable_connection_lst, i);
 457     if (conn->conn_array_index < 0) {
 458       connection_unlink(conn); /* blow it away right now */
 459     } else {
 460       if (!conn_close_if_marked(conn->conn_array_index))
 461         ++i;
 462     }
 463   }
 464 }
 465
 466 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 467  * some data to read. */
 468 static void
 469 conn_read_callback(int fd, short event, void *_conn)
 470 {
 471   connection_t *conn = _conn;
 472   (void)fd;
 473   (void)event;
 474
 475   log_debug(LD_NET,"socket %d wants to read.",conn->s);
 476
 477   /* assert_connection_ok(conn, time(NULL)); */
 478
 479   if (connection_handle_read(conn) < 0) {
 480     if (!conn->marked_for_close) {
 481 #ifndef MS_WINDOWS
 482       log_warn(LD_BUG,"Unhandled error on read for %s connection "
 483                "(fd %d); removing",
 484                conn_type_to_string(conn->type), conn->s);
 485       tor_fragile_assert();
 486 #endif
 487       if (CONN_IS_EDGE(conn))
 488         connection_edge_end_errno(TO_EDGE_CONN(conn));
 489       connection_mark_for_close(conn);
 490     }
 491   }
 492   assert_connection_ok(conn, time(NULL));
 493
 494   if (smartlist_len(closeable_connection_lst))
 495     close_closeable_connections();
 496 }
 497
 498 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 499  * some data to write. */
 500 static void
 501 conn_write_callback(int fd, short events, void *_conn)
 502 {
 503   connection_t *conn = _conn;
 504   (void)fd;
 505   (void)events;
 506
 507   LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "socket %d wants to write.",conn->s));
 508
 509   /* assert_connection_ok(conn, time(NULL)); */
 510
 511   if (connection_handle_write(conn, 0) < 0) {
 512     if (!conn->marked_for_close) {
 513       /* this connection is broken. remove it. */
 514       log_fn(LOG_WARN,LD_BUG,
 515              "unhandled error on write for %s connection (fd %d); removing",
 516              conn_type_to_string(conn->type), conn->s);
 517       tor_fragile_assert();
 518       if (CONN_IS_EDGE(conn)) {
 519         /* otherwise we cry wolf about duplicate close */
 520         edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
 521         if (!edge_conn->end_reason)
 522           edge_conn->end_reason = END_STREAM_REASON_INTERNAL;
 523         edge_conn->edge_has_sent_end = 1;
 524       }
 525       connection_close_immediate(conn); /* So we don't try to flush. */
 526       connection_mark_for_close(conn);
 527     }
 528   }
 529   assert_connection_ok(conn, time(NULL));
 530
 531   if (smartlist_len(closeable_connection_lst))
 532     close_closeable_connections();
 533 }
 534
 535 /** If the connection at connection_array[i] is marked for close, then:
 536  *    - If it has data that it wants to flush, try to flush it.
 537  *    - If it _still_ has data to flush, and conn->hold_open_until_flushed is
 538  *      true, then leave the connection open and return.
 539  *    - Otherwise, remove the connection from connection_array and from
 540  *      all other lists, close it, and free it.
 541  * Returns 1 if the connection was closed, 0 otherwise.
 542  */
 543 static int
 544 conn_close_if_marked(int i)
 545 {
 546   connection_t *conn;
 547   int retval;
 548   time_t now;
 549
 550   conn = smartlist_get(connection_array, i);
 551   if (!conn->marked_for_close)
 552     return 0; /* nothing to see here, move along */
 553   now = time(NULL);
 554   assert_connection_ok(conn, now);
 555   /* assert_all_pending_dns_resolves_ok(); */
 556
 557   log_debug(LD_NET,"Cleaning up connection (fd %d).",conn->s);
 558   if ((conn->s >= 0 || conn->linked_conn) && connection_wants_to_flush(conn)) {
 559     /* s == -1 means it's an incomplete edge connection, or that the socket
 560      * has already been closed as unflushable. */
 561     ssize_t sz = connection_bucket_write_limit(conn, now);
 562     if (!conn->hold_open_until_flushed)
 563       log_info(LD_NET,
 564                "Conn (addr %s, fd %d, type %s, state %d) marked, but wants "
 565                "to flush %d bytes. (Marked at %s:%d)",
 566                escaped_safe_str_client(conn->address),
 567                conn->s, conn_type_to_string(conn->type), conn->state,
 568                (int)conn->outbuf_flushlen,
 569                 conn->marked_for_close_file, conn->marked_for_close);
 570     if (conn->linked_conn) {
 571       retval = move_buf_to_buf(conn->linked_conn->inbuf, conn->outbuf,
 572                                &conn->outbuf_flushlen);
 573       if (retval >= 0) {
 574         /* The linked conn will notice that it has data when it notices that
 575          * we're gone. */
 576         connection_start_reading_from_linked_conn(conn->linked_conn);
 577       }
 578       log_debug(LD_GENERAL, "Flushed last %d bytes from a linked conn; "
 579                "%d left; flushlen %d; wants-to-flush==%d", retval,
 580                (int)buf_datalen(conn->outbuf),
 581                (int)conn->outbuf_flushlen,
 582                 connection_wants_to_flush(conn));
 583     } else if (connection_speaks_cells(conn)) {
 584       if (conn->state == OR_CONN_STATE_OPEN) {
 585         retval = flush_buf_tls(TO_OR_CONN(conn)->tls, conn->outbuf, sz,
 586                                &conn->outbuf_flushlen);
 587       } else
 588         retval = -1; /* never flush non-open broken tls connections */
 589     } else {
 590       retval = flush_buf(conn->s, conn->outbuf, sz, &conn->outbuf_flushlen);
 591     }
 592     if (retval >= 0 && /* Technically, we could survive things like
 593                           TLS_WANT_WRITE here. But don't bother for now. */
 594         conn->hold_open_until_flushed && connection_wants_to_flush(conn)) {
 595       if (retval > 0) {
 596         LOG_FN_CONN(conn, (LOG_INFO,LD_NET,
 597                            "Holding conn (fd %d) open for more flushing.",
 598                            conn->s));
 599         conn->timestamp_lastwritten = now; /* reset so we can flush more */
 600       }
 601       return 0;
 602     }
 603     if (connection_wants_to_flush(conn)) {
 604       int severity;
 605       if (conn->type == CONN_TYPE_EXIT ||
 606           (conn->type == CONN_TYPE_OR && server_mode(get_options())) ||
 607           (conn->type == CONN_TYPE_DIR && conn->purpose == DIR_PURPOSE_SERVER))
 608         severity = LOG_INFO;
 609       else
 610         severity = LOG_NOTICE;
 611       /* XXXX Maybe allow this to happen a certain amount per hour; it usually
 612        * is meaningless. */
 613       log_fn(severity, LD_NET, "We stalled too much while trying to write %d "
 614              "bytes to address %s.  If this happens a lot, either "
 615              "something is wrong with your network connection, or "
 616              "something is wrong with theirs. "
 617              "(fd %d, type %s, state %d, marked at %s:%d).",
 618              (int)buf_datalen(conn->outbuf),
 619              escaped_safe_str_client(conn->address),
 620              conn->s, conn_type_to_string(conn->type), conn->state,
 621              conn->marked_for_close_file,
 622              conn->marked_for_close);
 623     }
 624   }
 625   connection_unlink(conn); /* unlink, remove, free */
 626   return 1;
 627 }
 628
 629 /** We've just tried every dirserver we know about, and none of
 630  * them were reachable. Assume the network is down. Change state
 631  * so next time an application connection arrives we'll delay it
 632  * and try another directory fetch. Kill off all the circuit_wait
 633  * streams that are waiting now, since they will all timeout anyway.
 634  */
 635 void
 636 directory_all_unreachable(time_t now)
 637 {
 638   connection_t *conn;
 639   (void)now;
 640
 641   stats_n_seconds_working=0; /* reset it */
 642
 643   while ((conn = connection_get_by_type_state(CONN_TYPE_AP,
 644                                               AP_CONN_STATE_CIRCUIT_WAIT))) {
 645     edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
 646     log_notice(LD_NET,
 647                "Is your network connection down? "
 648                "Failing connection to '%s:%d'.",
 649                safe_str_client(edge_conn->socks_request->address),
 650                edge_conn->socks_request->port);
 651     connection_mark_unattached_ap(edge_conn,
 652                                   END_STREAM_REASON_NET_UNREACHABLE);
 653   }
 654   control_event_general_status(LOG_ERR, "DIR_ALL_UNREACHABLE");
 655 }
 656
 657 /** This function is called whenever we successfully pull down some new
 658  * network statuses or server descriptors. */
 659 void
 660 directory_info_has_arrived(time_t now, int from_cache)
 661 {
 662   or_options_t *options = get_options();
 663
 664   if (!router_have_minimum_dir_info()) {
 665     int quiet = directory_too_idle_to_fetch_descriptors(options, now);
 666     log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR,
 667         "I learned some more directory information, but not enough to "
 668         "build a circuit: %s", get_dir_info_status_string());
 669     update_router_descriptor_downloads(now);
 670     return;
 671   } else {
 672     if (directory_fetches_from_authorities(options))
 673       update_router_descriptor_downloads(now);
 674
 675     /* if we have enough dir info, then update our guard status with
 676      * whatever we just learned. */
 677     entry_guards_compute_status();
 678     /* Don't even bother trying to get extrainfo until the rest of our
 679      * directory info is up-to-date */
 680     if (options->DownloadExtraInfo)
 681       update_extrainfo_downloads(now);
 682   }
 683
 684   if (server_mode(options) && !we_are_hibernating() && !from_cache &&
 685       (has_completed_circuit || !any_predicted_circuits(now)))
 686     consider_testing_reachability(1, 1);
 687 }
 688
 689 /** Perform regular maintenance tasks for a single connection.  This
 690  * function gets run once per second per connection by run_scheduled_events.
 691  */
 692 static void
 693 run_connection_housekeeping(int i, time_t now)
 694 {
 695   cell_t cell;
 696   connection_t *conn = smartlist_get(connection_array, i);
 697   or_options_t *options = get_options();
 698   or_connection_t *or_conn;
 699
 700   if (conn->outbuf && !buf_datalen(conn->outbuf) && conn->type == CONN_TYPE_OR)
 701     TO_OR_CONN(conn)->timestamp_lastempty = now;
 702
 703   if (conn->marked_for_close) {
 704     /* nothing to do here */
 705     return;
 706   }
 707
 708   /* Expire any directory connections that haven't been active (sent
 709    * if a server or received if a client) for 5 min */
 710   if (conn->type == CONN_TYPE_DIR &&
 711       ((DIR_CONN_IS_SERVER(conn) &&
 712         conn->timestamp_lastwritten + DIR_CONN_MAX_STALL < now) ||
 713        (!DIR_CONN_IS_SERVER(conn) &&
 714         conn->timestamp_lastread + DIR_CONN_MAX_STALL < now))) {
 715     log_info(LD_DIR,"Expiring wedged directory conn (fd %d, purpose %d)",
 716              conn->s, conn->purpose);
 717     /* This check is temporary; it's to let us know whether we should consider
 718      * parsing partial serverdesc responses. */
 719     if (conn->purpose == DIR_PURPOSE_FETCH_SERVERDESC &&
 720         buf_datalen(conn->inbuf)>=1024) {
 721       log_info(LD_DIR,"Trying to extract information from wedged server desc "
 722                "download.");
 723       connection_dir_reached_eof(TO_DIR_CONN(conn));
 724     } else {
 725       connection_mark_for_close(conn);
 726     }
 727     return;
 728   }
 729
 730   if (!connection_speaks_cells(conn))
 731     return; /* we're all done here, the rest is just for OR conns */
 732
 733   or_conn = TO_OR_CONN(conn);
 734   tor_assert(conn->outbuf);
 735
 736   if (or_conn->is_bad_for_new_circs && !or_conn->n_circuits) {
 737     /* It's bad for new circuits, and has no unmarked circuits on it:
 738      * mark it now. */
 739     log_info(LD_OR,
 740              "Expiring non-used OR connection to fd %d (%s:%d) [Too old].",
 741              conn->s, conn->address, conn->port);
 742     if (conn->state == OR_CONN_STATE_CONNECTING)
 743       connection_or_connect_failed(TO_OR_CONN(conn),
 744                                    END_OR_CONN_REASON_TIMEOUT,
 745                                    "Tor gave up on the connection");
 746     connection_mark_for_close(conn);
 747     conn->hold_open_until_flushed = 1;
 748     return;
 749   }
 750
 751   /* If we haven't written to an OR connection for a while, then either nuke
 752      the connection or send a keepalive, depending. */
 753   if (now >= conn->timestamp_lastwritten + options->KeepalivePeriod) {
 754     routerinfo_t *router = router_get_by_digest(or_conn->identity_digest);
 755     int maxCircuitlessPeriod = options->MaxCircuitDirtiness*3/2;
 756     if (!connection_state_is_open(conn)) {
 757       /* We never managed to actually get this connection open and happy. */
 758       log_info(LD_OR,"Expiring non-open OR connection to fd %d (%s:%d).",
 759                conn->s,conn->address, conn->port);
 760       connection_mark_for_close(conn);
 761       conn->hold_open_until_flushed = 1;
 762     } else if (we_are_hibernating() && !or_conn->n_circuits &&
 763                !buf_datalen(conn->outbuf)) {
 764       /* We're hibernating, there's no circuits, and nothing to flush.*/
 765       log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
 766                "[Hibernating or exiting].",
 767                conn->s,conn->address, conn->port);
 768       connection_mark_for_close(conn);
 769       conn->hold_open_until_flushed = 1;
 770     } else if (!clique_mode(options) && !or_conn->n_circuits &&
 771                now >= or_conn->timestamp_last_added_nonpadding +
 772                                            maxCircuitlessPeriod &&
 773                (!router || !server_mode(options) ||
 774                 !router_is_clique_mode(router))) {
 775       log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
 776                "[Not in clique mode].",
 777                conn->s,conn->address, conn->port);
 778       connection_mark_for_close(conn);
 779       conn->hold_open_until_flushed = 1;
 780     } else if (
 781          now >= or_conn->timestamp_lastempty + options->KeepalivePeriod*10 &&
 782          now >= conn->timestamp_lastwritten + options->KeepalivePeriod*10) {
 783       log_fn(LOG_PROTOCOL_WARN,LD_PROTOCOL,
 784              "Expiring stuck OR connection to fd %d (%s:%d). (%d bytes to "
 785              "flush; %d seconds since last write)",
 786              conn->s, conn->address, conn->port,
 787              (int)buf_datalen(conn->outbuf),
 788              (int)(now-conn->timestamp_lastwritten));
 789       connection_mark_for_close(conn);
 790     } else if (!buf_datalen(conn->outbuf)) {
 791       /* either in clique mode, or we've got a circuit. send a padding cell. */
 792       log_fn(LOG_DEBUG,LD_OR,"Sending keepalive to (%s:%d)",
 793              conn->address, conn->port);
 794       memset(&cell,0,sizeof(cell_t));
 795       cell.command = CELL_PADDING;
 796       connection_or_write_cell_to_buf(&cell, or_conn);
 797     }
 798   }
 799 }
 800
 801 /** Honor a NEWNYM request: make future requests unlinkable to past
 802  * requests. */
 803 static void
 804 signewnym_impl(time_t now)
 805 {
 806   circuit_expire_all_dirty_circs();
 807   addressmap_clear_transient();
 808   time_of_last_signewnym = now;
 809   signewnym_is_pending = 0;
 810 }
 811
 812 /** Perform regular maintenance tasks.  This function gets run once per
 813  * second by second_elapsed_callback().
 814  */
 815 static void
 816 run_scheduled_events(time_t now)
 817 {
 818   static time_t last_rotated_x509_certificate = 0;
 819   static time_t time_to_check_v3_certificate = 0;
 820   static time_t time_to_check_listeners = 0;
 821   static time_t time_to_check_descriptor = 0;
 822   static time_t time_to_check_ipaddress = 0;
 823   static time_t time_to_shrink_memory = 0;
 824   static time_t time_to_try_getting_descriptors = 0;
 825   static time_t time_to_reset_descriptor_failures = 0;
 826   static time_t time_to_add_entropy = 0;
 827   static time_t time_to_write_bridge_status_file = 0;
 828   static time_t time_to_downrate_stability = 0;
 829   static time_t time_to_save_stability = 0;
 830   static time_t time_to_clean_caches = 0;
 831   static time_t time_to_recheck_bandwidth = 0;
 832   static time_t time_to_check_for_expired_networkstatus = 0;
 833   static time_t time_to_write_stats_files = 0;
 834   static time_t time_to_write_bridge_stats = 0;
 835   static int should_init_bridge_stats = 1;
 836   static time_t time_to_retry_dns_init = 0;
 837   or_options_t *options = get_options();
 838   int i;
 839   int have_dir_info;
 840
 841   /** 0. See if we've been asked to shut down and our timeout has
 842    * expired; or if our bandwidth limits are exhausted and we
 843    * should hibernate; or if it's time to wake up from hibernation.
 844    */
 845   consider_hibernation(now);
 846
 847   /* 0b. If we've deferred a signewnym, make sure it gets handled
 848    * eventually. */
 849   if (signewnym_is_pending &&
 850       time_of_last_signewnym + MAX_SIGNEWNYM_RATE <= now) {
 851     log(LOG_INFO, LD_CONTROL, "Honoring delayed NEWNYM request");
 852     signewnym_impl(now);
 853   }
 854
 855   /** 1a. Every MIN_ONION_KEY_LIFETIME seconds, rotate the onion keys,
 856    *  shut down and restart all cpuworkers, and update the directory if
 857    *  necessary.
 858    */
 859   if (server_mode(options) &&
 860       get_onion_key_set_at()+MIN_ONION_KEY_LIFETIME < now) {
 861     log_info(LD_GENERAL,"Rotating onion key.");
 862     rotate_onion_key();
 863     cpuworkers_rotate();
 864     if (router_rebuild_descriptor(1)<0) {
 865       log_info(LD_CONFIG, "Couldn't rebuild router descriptor");
 866     }
 867     if (advertised_server_mode())
 868       router_upload_dir_desc_to_dirservers(0);
 869   }
 870
 871   if (time_to_try_getting_descriptors < now) {
 872     update_router_descriptor_downloads(now);
 873     update_extrainfo_downloads(now);
 874     if (options->UseBridges)
 875       fetch_bridge_descriptors(now);
 876     if (router_have_minimum_dir_info())
 877       time_to_try_getting_descriptors = now + LAZY_DESCRIPTOR_RETRY_INTERVAL;
 878     else
 879       time_to_try_getting_descriptors = now + GREEDY_DESCRIPTOR_RETRY_INTERVAL;
 880   }
 881
 882   if (time_to_reset_descriptor_failures < now) {
 883     router_reset_descriptor_download_failures();
 884     time_to_reset_descriptor_failures =
 885       now + DESCRIPTOR_FAILURE_RESET_INTERVAL;
 886   }
 887
 888   /** 1b. Every MAX_SSL_KEY_LIFETIME seconds, we change our TLS context. */
 889   if (!last_rotated_x509_certificate)
 890     last_rotated_x509_certificate = now;
 891   if (last_rotated_x509_certificate+MAX_SSL_KEY_LIFETIME < now) {
 892     log_info(LD_GENERAL,"Rotating tls context.");
 893     if (tor_tls_context_new(get_identity_key(), MAX_SSL_KEY_LIFETIME) < 0) {
 894       log_warn(LD_BUG, "Error reinitializing TLS context");
 895       /* XXX is it a bug here, that we just keep going? -RD */
 896     }
 897     last_rotated_x509_certificate = now;
 898     /* We also make sure to rotate the TLS connections themselves if they've
 899      * been up for too long -- but that's done via is_bad_for_new_circs in
 900      * connection_run_housekeeping() above. */
 901   }
 902
 903   if (time_to_add_entropy < now) {
 904     if (time_to_add_entropy) {
 905       /* We already seeded once, so don't die on failure. */
 906       crypto_seed_rng(0);
 907     }
 908 /** How often do we add more entropy to OpenSSL's RNG pool? */
 909 #define ENTROPY_INTERVAL (60*60)
 910     time_to_add_entropy = now + ENTROPY_INTERVAL;
 911   }
 912
 913   /** 1c. If we have to change the accounting interval or record
 914    * bandwidth used in this accounting interval, do so. */
 915   if (accounting_is_enabled(options))
 916     accounting_run_housekeeping(now);
 917
 918   if (now % 10 == 0 && (authdir_mode_tests_reachability(options)) &&
 919       !we_are_hibernating()) {
 920     /* try to determine reachability of the other Tor relays */
 921     dirserv_test_reachability(now);
 922   }
 923
 924   /** 1d. Periodically, we discount older stability information so that new
 925    * stability info counts more, and save the stability information to disk as
 926    * appropriate. */
 927   if (time_to_downrate_stability < now)
 928     time_to_downrate_stability = rep_hist_downrate_old_runs(now);
 929   if (authdir_mode_tests_reachability(options)) {
 930     if (time_to_save_stability < now) {
 931       if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
 932         log_warn(LD_GENERAL, "Couldn't store mtbf data.");
 933       }
 934 #define SAVE_STABILITY_INTERVAL (30*60)
 935       time_to_save_stability = now + SAVE_STABILITY_INTERVAL;
 936     }
 937   }
 938
 939   /* 1e. Periodically, if we're a v3 authority, we check whether our cert is
 940    * close to expiring and warn the admin if it is. */
 941   if (time_to_check_v3_certificate < now) {
 942     v3_authority_check_key_expiry();
 943 #define CHECK_V3_CERTIFICATE_INTERVAL (5*60)
 944     time_to_check_v3_certificate = now + CHECK_V3_CERTIFICATE_INTERVAL;
 945   }
 946
 947   /* 1f. Check whether our networkstatus has expired.
 948    */
 949   if (time_to_check_for_expired_networkstatus < now) {
 950     networkstatus_t *ns = networkstatus_get_latest_consensus();
 951     /*XXXX RD: This value needs to be the same as REASONABLY_LIVE_TIME in
 952      * networkstatus_get_reasonably_live_consensus(), but that value is way
 953      * way too high.  Arma: is the bridge issue there resolved yet? -NM */
 954 #define NS_EXPIRY_SLOP (24*60*60)
 955     if (ns && ns->valid_until < now+NS_EXPIRY_SLOP &&
 956         router_have_minimum_dir_info()) {
 957       router_dir_info_changed();
 958     }
 959 #define CHECK_EXPIRED_NS_INTERVAL (2*60)
 960     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
 961   }
 962
 963   /* 1g. Check whether we should write statistics to disk.
 964    */
 965   if (time_to_write_stats_files >= 0 && time_to_write_stats_files < now) {
 966 #define WRITE_STATS_INTERVAL (24*60*60)
 967     if (options->CellStatistics || options->DirReqStatistics ||
 968         options->EntryStatistics || options->ExitPortStatistics) {
 969       if (!time_to_write_stats_files) {
 970         /* Initialize stats. We're doing this here and not in options_act,
 971          * so that we know exactly when the 24 hours interval ends. */
 972         if (options->CellStatistics)
 973           rep_hist_buffer_stats_init(now);
 974         if (options->DirReqStatistics)
 975           geoip_dirreq_stats_init(now);
 976         if (options->EntryStatistics)
 977           geoip_entry_stats_init(now);
 978         if (options->ExitPortStatistics)
 979           rep_hist_exit_stats_init(now);
 980         log_notice(LD_CONFIG, "Configured to measure statistics. Look for "
 981                    "the *-stats files that will first be written to the "
 982                    "data directory in %d hours from now.",
 983                    WRITE_STATS_INTERVAL / (60 * 60));
 984         time_to_write_stats_files = now + WRITE_STATS_INTERVAL;
 985       } else {
 986         /* Write stats to disk. */
 987         if (options->CellStatistics)
 988           rep_hist_buffer_stats_write(time_to_write_stats_files);
 989         if (options->DirReqStatistics)
 990           geoip_dirreq_stats_write(time_to_write_stats_files);
 991         if (options->EntryStatistics)
 992           geoip_entry_stats_write(time_to_write_stats_files);
 993         if (options->ExitPortStatistics)
 994           rep_hist_exit_stats_write(time_to_write_stats_files);
 995         time_to_write_stats_files += WRITE_STATS_INTERVAL;
 996       }
 997     } else {
 998       /* Never write stats to disk */
 999       time_to_write_stats_files = -1;
1000     }
1001   }
1002
1003   /* 1h. Check whether we should write bridge statistics to disk.
1004    */
1005   if (should_record_bridge_info(options)) {
1006     if (time_to_write_bridge_stats < now) {
1007       if (should_init_bridge_stats) {
1008         /* (Re-)initialize bridge statistics. */
1009         geoip_bridge_stats_init(now);
1010         time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL;
1011         should_init_bridge_stats = 0;
1012       } else {
1013         /* Possibly write bridge statistics to disk and ask when to write
1014          * them next time. */
1015         time_to_write_bridge_stats = geoip_bridge_stats_write(
1016                                            time_to_write_bridge_stats);
1017       }
1018     }
1019   } else if (!should_init_bridge_stats) {
1020     /* Bridge mode was turned off. Ensure that stats are re-initialized
1021      * next time bridge mode is turned on. */
1022     should_init_bridge_stats = 1;
1023   }
1024
1025   /* Remove old information from rephist and the rend cache. */
1026   if (time_to_clean_caches < now) {
1027     rep_history_clean(now - options->RephistTrackTime);
1028     rend_cache_clean();
1029     rend_cache_clean_v2_descs_as_dir();
1030 #define CLEAN_CACHES_INTERVAL (30*60)
1031     time_to_clean_caches = now + CLEAN_CACHES_INTERVAL;
1032   }
1033
1034 #define RETRY_DNS_INTERVAL (10*60)
1035   /* If we're a server and initializing dns failed, retry periodically. */
1036   if (time_to_retry_dns_init < now) {
1037     time_to_retry_dns_init = now + RETRY_DNS_INTERVAL;
1038     if (server_mode(options) && has_dns_init_failed())
1039       dns_init();
1040   }
1041
1042   /** 2. Periodically, we consider force-uploading our descriptor
1043    * (if we've passed our internal checks). */
1044
1045 /** How often do we check whether part of our router info has changed in a way
1046  * that would require an upload? */
1047 #define CHECK_DESCRIPTOR_INTERVAL (60)
1048 /** How often do we (as a router) check whether our IP address has changed? */
1049 #define CHECK_IPADDRESS_INTERVAL (15*60)
1050
1051   /* 2b. Once per minute, regenerate and upload the descriptor if the old
1052    * one is inaccurate. */
1053   if (time_to_check_descriptor < now) {
1054     static int dirport_reachability_count = 0;
1055     time_to_check_descriptor = now + CHECK_DESCRIPTOR_INTERVAL;
1056     check_descriptor_bandwidth_changed(now);
1057     if (time_to_check_ipaddress < now) {
1058       time_to_check_ipaddress = now + CHECK_IPADDRESS_INTERVAL;
1059       check_descriptor_ipaddress_changed(now);
1060     }
1061 /** If our router descriptor ever goes this long without being regenerated
1062  * because something changed, we force an immediate regenerate-and-upload. */
1063 #define FORCE_REGENERATE_DESCRIPTOR_INTERVAL (18*60*60)
1064     mark_my_descriptor_dirty_if_older_than(
1065                                   now - FORCE_REGENERATE_DESCRIPTOR_INTERVAL);
1066     consider_publishable_server(0);
1067     /* also, check religiously for reachability, if it's within the first
1068      * 20 minutes of our uptime. */
1069     if (server_mode(options) &&
1070         (has_completed_circuit || !any_predicted_circuits(now)) &&
1071         !we_are_hibernating()) {
1072       if (stats_n_seconds_working < TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
1073         consider_testing_reachability(1, dirport_reachability_count==0);
1074         if (++dirport_reachability_count > 5)
1075           dirport_reachability_count = 0;
1076       } else if (time_to_recheck_bandwidth < now) {
1077         /* If we haven't checked for 12 hours and our bandwidth estimate is
1078          * low, do another bandwidth test. This is especially important for
1079          * bridges, since they might go long periods without much use. */
1080         routerinfo_t *me = router_get_my_routerinfo();
1081         if (time_to_recheck_bandwidth && me &&
1082             me->bandwidthcapacity < me->bandwidthrate &&
1083             me->bandwidthcapacity < 51200) {
1084           reset_bandwidth_test();
1085         }
1086 #define BANDWIDTH_RECHECK_INTERVAL (12*60*60)
1087         time_to_recheck_bandwidth = now + BANDWIDTH_RECHECK_INTERVAL;
1088       }
1089     }
1090
1091     /* If any networkstatus documents are no longer recent, we need to
1092      * update all the descriptors' running status. */
1093     /* purge obsolete entries */
1094     networkstatus_v2_list_clean(now);
1095     /* Remove dead routers. */
1096     routerlist_remove_old_routers();
1097
1098     /* Also, once per minute, check whether we want to download any
1099      * networkstatus documents.
1100      */
1101     update_networkstatus_downloads(now);
1102   }
1103
1104   /** 2c. Let directory voting happen. */
1105   if (authdir_mode_v3(options))
1106     dirvote_act(options, now);
1107
1108   /** 3a. Every second, we examine pending circuits and prune the
1109    *    ones which have been pending for more than a few seconds.
1110    *    We do this before step 4, so it can try building more if
1111    *    it's not comfortable with the number of available circuits.
1112    */
1113   circuit_expire_building(now);
1114
1115   /** 3b. Also look at pending streams and prune the ones that 'began'
1116    *     a long time ago but haven't gotten a 'connected' yet.
1117    *     Do this before step 4, so we can put them back into pending
1118    *     state to be picked up by the new circuit.
1119    */
1120   connection_ap_expire_beginning();
1121
1122   /** 3c. And expire connections that we've held open for too long.
1123    */
1124   connection_expire_held_open();
1125
1126   /** 3d. And every 60 seconds, we relaunch listeners if any died. */
1127   if (!we_are_hibernating() && time_to_check_listeners < now) {
1128     retry_all_listeners(NULL, NULL);
1129     time_to_check_listeners = now+60;
1130   }
1131
1132   /** 4. Every second, we try a new circuit if there are no valid
1133    *    circuits. Every NewCircuitPeriod seconds, we expire circuits
1134    *    that became dirty more than MaxCircuitDirtiness seconds ago,
1135    *    and we make a new circ if there are no clean circuits.
1136    */
1137   have_dir_info = router_have_minimum_dir_info();
1138   if (have_dir_info && !we_are_hibernating())
1139     circuit_build_needed_circs(now);
1140
1141   /** 5. We do housekeeping for each connection... */
1142   connection_or_set_bad_connections();
1143   for (i=0;i<smartlist_len(connection_array);i++) {
1144     run_connection_housekeeping(i, now);
1145   }
1146   if (time_to_shrink_memory < now) {
1147     SMARTLIST_FOREACH(connection_array, connection_t *, conn, {
1148         if (conn->outbuf)
1149           buf_shrink(conn->outbuf);
1150         if (conn->inbuf)
1151           buf_shrink(conn->inbuf);
1152       });
1153     clean_cell_pool();
1154     buf_shrink_freelists(0);
1155 /** How often do we check buffers and pools for empty space that can be
1156  * deallocated? */
1157 #define MEM_SHRINK_INTERVAL (60)
1158     time_to_shrink_memory = now + MEM_SHRINK_INTERVAL;
1159   }
1160
1161   /** 6. And remove any marked circuits... */
1162   circuit_close_all_marked();
1163
1164   /** 7. And upload service descriptors if necessary. */
1165   if (has_completed_circuit && !we_are_hibernating()) {
1166     rend_consider_services_upload(now);
1167     rend_consider_descriptor_republication();
1168   }
1169
1170   /** 8. and blow away any connections that need to die. have to do this now,
1171    * because if we marked a conn for close and left its socket -1, then
1172    * we'll pass it to poll/select and bad things will happen.
1173    */
1174   close_closeable_connections();
1175
1176   /** 8b. And if anything in our state is ready to get flushed to disk, we
1177    * flush it. */
1178   or_state_save(now);
1179
1180   /** 9. and if we're a server, check whether our DNS is telling stories to
1181    * us. */
1182   if (server_mode(options) && time_to_check_for_correct_dns < now) {
1183     if (!time_to_check_for_correct_dns) {
1184       time_to_check_for_correct_dns = now + 60 + crypto_rand_int(120);
1185     } else {
1186       dns_launch_correctness_checks();
1187       time_to_check_for_correct_dns = now + 12*3600 +
1188         crypto_rand_int(12*3600);
1189     }
1190   }
1191
1192   /** 10b. write bridge networkstatus file to disk */
1193   if (options->BridgeAuthoritativeDir &&
1194       time_to_write_bridge_status_file < now) {
1195     networkstatus_dump_bridge_status_to_file(now);
1196 #define BRIDGE_STATUSFILE_INTERVAL (30*60)
1197     time_to_write_bridge_status_file = now+BRIDGE_STATUSFILE_INTERVAL;
1198   }
1199 }
1200
1201 /** Libevent timer: used to invoke second_elapsed_callback() once per
1202  * second. */
1203 static struct event *timeout_event = NULL;
1204 /** Number of libevent errors in the last second: we die if we get too many. */
1205 static int n_libevent_errors = 0;
1206
1207 /** Libevent callback: invoked once every second. */
1208 static void
1209 second_elapsed_callback(int fd, short event, void *args)
1210 {
1211   /* XXXX This could be sensibly refactored into multiple callbacks, and we
1212    * could use Libevent's timers for this rather than checking the current
1213    * time against a bunch of timeouts every second. */
1214   static struct timeval one_second;
1215   static time_t current_second = 0;
1216   time_t now;
1217   size_t bytes_written;
1218   size_t bytes_read;
1219   int seconds_elapsed;
1220   or_options_t *options = get_options();
1221   (void)fd;
1222   (void)event;
1223   (void)args;
1224   if (!timeout_event) {
1225     timeout_event = tor_evtimer_new(tor_libevent_get_base(),
1226                                     second_elapsed_callback, NULL);
1227     one_second.tv_sec = 1;
1228     one_second.tv_usec = 0;
1229   }
1230
1231   n_libevent_errors = 0;
1232
1233   /* log_fn(LOG_NOTICE, "Tick."); */
1234   now = time(NULL);
1235   update_approx_time(now);
1236
1237   /* the second has rolled over. check more stuff. */
1238   bytes_written = stats_prev_global_write_bucket - global_write_bucket;
1239   bytes_read = stats_prev_global_read_bucket - global_read_bucket;
1240   seconds_elapsed = current_second ? (int)(now - current_second) : 0;
1241   stats_n_bytes_read += bytes_read;
1242   stats_n_bytes_written += bytes_written;
1243   if (accounting_is_enabled(options) && seconds_elapsed >= 0)
1244     accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed);
1245   control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written);
1246   control_event_stream_bandwidth_used();
1247
1248   if (seconds_elapsed > 0)
1249     connection_bucket_refill(seconds_elapsed, now);
1250   stats_prev_global_read_bucket = global_read_bucket;
1251   stats_prev_global_write_bucket = global_write_bucket;
1252
1253   if (server_mode(options) &&
1254       !we_are_hibernating() &&
1255       seconds_elapsed > 0 &&
1256       has_completed_circuit &&
1257       stats_n_seconds_working / TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT !=
1258       (stats_n_seconds_working+seconds_elapsed) /
1259         TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
1260     /* every 20 minutes, check and complain if necessary */
1261     routerinfo_t *me = router_get_my_routerinfo();
1262     if (me && !check_whether_orport_reachable()) {
1263       log_warn(LD_CONFIG,"Your server (%s:%d) has not managed to confirm that "
1264                "its ORPort is reachable. Please check your firewalls, ports, "
1265                "address, /etc/hosts file, etc.",
1266                me->address, me->or_port);
1267       control_event_server_status(LOG_WARN,
1268                                   "REACHABILITY_FAILED ORADDRESS=%s:%d",
1269                                   me->address, me->or_port);
1270     }
1271
1272     if (me && !check_whether_dirport_reachable()) {
1273       log_warn(LD_CONFIG,
1274                "Your server (%s:%d) has not managed to confirm that its "
1275                "DirPort is reachable. Please check your firewalls, ports, "
1276                "address, /etc/hosts file, etc.",
1277                me->address, me->dir_port);
1278       control_event_server_status(LOG_WARN,
1279                                   "REACHABILITY_FAILED DIRADDRESS=%s:%d",
1280                                   me->address, me->dir_port);
1281     }
1282   }
1283
1284 /** If more than this many seconds have elapsed, probably the clock
1285  * jumped: doesn't count. */
1286 #define NUM_JUMPED_SECONDS_BEFORE_WARN 100
1287   if (seconds_elapsed < -NUM_JUMPED_SECONDS_BEFORE_WARN ||
1288       seconds_elapsed >= NUM_JUMPED_SECONDS_BEFORE_WARN) {
1289     circuit_note_clock_jumped(seconds_elapsed);
1290     /* XXX if the time jumps *back* many months, do our events in
1291      * run_scheduled_events() recover? I don't think they do. -RD */
1292   } else if (seconds_elapsed > 0)
1293     stats_n_seconds_working += seconds_elapsed;
1294
1295   run_scheduled_events(now);
1296
1297   current_second = now; /* remember which second it is, for next time */
1298
1299   if (event_add(timeout_event, &one_second))
1300     log_err(LD_NET,
1301             "Error from libevent when setting one-second timeout event");
1302 }
1303
1304 #ifndef MS_WINDOWS
1305 /** Called when a possibly ignorable libevent error occurs; ensures that we
1306  * don't get into an infinite loop by ignoring too many errors from
1307  * libevent. */
1308 static int
1309 got_libevent_error(void)
1310 {
1311   if (++n_libevent_errors > 8) {
1312     log_err(LD_NET, "Too many libevent errors in one second; dying");
1313     return -1;
1314   }
1315   return 0;
1316 }
1317 #endif
1318
1319 #define UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST (6*60*60)
1320
1321 /** Called when our IP address seems to have changed. <b>at_interface</b>
1322  * should be true if we detected a change in our interface, and false if we
1323  * detected a change in our published address. */
1324 void
1325 ip_address_changed(int at_interface)
1326 {
1327   int server = server_mode(get_options());
1328
1329   if (at_interface) {
1330     if (! server) {
1331       /* Okay, change our keys. */
1332       init_keys();
1333     }
1334   } else {
1335     if (server) {
1336       if (stats_n_seconds_working > UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST)
1337         reset_bandwidth_test();
1338       stats_n_seconds_working = 0;
1339       router_reset_reachability();
1340       mark_my_descriptor_dirty();
1341     }
1342   }
1343
1344   dns_servers_relaunch_checks();
1345 }
1346
1347 /** Forget what we've learned about the correctness of our DNS servers, and
1348  * start learning again. */
1349 void
1350 dns_servers_relaunch_checks(void)
1351 {
1352   if (server_mode(get_options())) {
1353     dns_reset_correctness_checks();
1354     time_to_check_for_correct_dns = 0;
1355   }
1356 }
1357
1358 /** Called when we get a SIGHUP: reload configuration files and keys,
1359  * retry all connections, and so on. */
1360 static int
1361 do_hup(void)
1362 {
1363   or_options_t *options = get_options();
1364
1365 #ifdef USE_DMALLOC
1366   dmalloc_log_stats();
1367   dmalloc_log_changed(0, 1, 0, 0);
1368 #endif
1369
1370   log_notice(LD_GENERAL,"Received reload signal (hup). Reloading config and "
1371              "resetting internal state.");
1372   if (accounting_is_enabled(options))
1373     accounting_record_bandwidth_usage(time(NULL), get_or_state());
1374
1375   router_reset_warnings();
1376   routerlist_reset_warnings();
1377   addressmap_clear_transient();
1378   /* first, reload config variables, in case they've changed */
1379   if (options->ReloadTorrcOnSIGHUP) {
1380     /* no need to provide argc/v, they've been cached in init_from_config */
1381     if (options_init_from_torrc(0, NULL) < 0) {
1382       log_err(LD_CONFIG,"Reading config failed--see warnings above. "
1383               "For usage, try -h.");
1384       return -1;
1385     }
1386     options = get_options(); /* they have changed now */
1387   } else {
1388     log_notice(LD_GENERAL, "Not reloading config file: the controller told "
1389                "us not to.");
1390   }
1391   if (authdir_mode_handles_descs(options, -1)) {
1392     /* reload the approved-routers file */
1393     if (dirserv_load_fingerprint_file() < 0) {
1394       /* warnings are logged from dirserv_load_fingerprint_file() directly */
1395       log_info(LD_GENERAL, "Error reloading fingerprints. "
1396                "Continuing with old list.");
1397     }
1398   }
1399
1400   /* Rotate away from the old dirty circuits. This has to be done
1401    * after we've read the new options, but before we start using
1402    * circuits for directory fetches. */
1403   circuit_expire_all_dirty_circs();
1404
1405   /* retry appropriate downloads */
1406   router_reset_status_download_failures();
1407   router_reset_descriptor_download_failures();
1408   update_networkstatus_downloads(time(NULL));
1409
1410   /* We'll retry routerstatus downloads in about 10 seconds; no need to
1411    * force a retry there. */
1412
1413   if (server_mode(options)) {
1414     /* Restart cpuworker and dnsworker processes, so they get up-to-date
1415      * configuration options. */
1416     cpuworkers_rotate();
1417     dns_reset();
1418   }
1419   return 0;
1420 }
1421
1422 /** Tor main loop. */
1423 /* static */ int
1424 do_main_loop(void)
1425 {
1426   int loop_result;
1427   time_t now;
1428
1429   /* initialize dns resolve map, spawn workers if needed */
1430   if (dns_init() < 0) {
1431     if (get_options()->ServerDNSAllowBrokenConfig)
1432       log_warn(LD_GENERAL, "Couldn't set up any working nameservers. "
1433                "Network not up yet?  Will try again soon.");
1434     else {
1435       log_err(LD_GENERAL,"Error initializing dns subsystem; exiting.  To "
1436               "retry instead, set the ServerDNSAllowBrokenResolvConf option.");
1437     }
1438   }
1439
1440   handle_signals(1);
1441
1442   /* load the private keys, if we're supposed to have them, and set up the
1443    * TLS context. */
1444   if (! identity_key_is_set()) {
1445     if (init_keys() < 0) {
1446       log_err(LD_BUG,"Error initializing keys; exiting");
1447       return -1;
1448     }
1449   }
1450
1451   /* Set up the packed_cell_t memory pool. */
1452   init_cell_pool();
1453
1454   /* Set up our buckets */
1455   connection_bucket_init();
1456   stats_prev_global_read_bucket = global_read_bucket;
1457   stats_prev_global_write_bucket = global_write_bucket;
1458
1459   /* initialize the bootstrap status events to know we're starting up */
1460   control_event_bootstrap(BOOTSTRAP_STATUS_STARTING, 0);
1461
1462   if (trusted_dirs_reload_certs()) {
1463     log_warn(LD_DIR,
1464              "Couldn't load all cached v3 certificates. Starting anyway.");
1465   }
1466   if (router_reload_v2_networkstatus()) {
1467     return -1;
1468   }
1469   if (router_reload_consensus_networkstatus()) {
1470     return -1;
1471   }
1472   /* load the routers file, or assign the defaults. */
1473   if (router_reload_router_list()) {
1474     return -1;
1475   }
1476   /* load the networkstatuses. (This launches a download for new routers as
1477    * appropriate.)
1478    */
1479   now = time(NULL);
1480   directory_info_has_arrived(now, 1);
1481
1482   if (server_mode(get_options())) {
1483     /* launch cpuworkers. Need to do this *after* we've read the onion key. */
1484     cpu_init();
1485   }
1486
1487   /* set up once-a-second callback. */
1488   second_elapsed_callback(0,0,NULL);
1489
1490   for (;;) {
1491     if (nt_service_is_stopping())
1492       return 0;
1493
1494 #ifndef MS_WINDOWS
1495     /* Make it easier to tell whether libevent failure is our fault or not. */
1496     errno = 0;
1497 #endif
1498     /* All active linked conns should get their read events activated. */
1499     SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn,
1500                       event_active(conn->read_event, EV_READ, 1));
1501     called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0;
1502
1503     update_approx_time(time(NULL));
1504
1505     /* poll until we have an event, or the second ends, or until we have
1506      * some active linked connections to trigger events for. */
1507     loop_result = event_base_loop(tor_libevent_get_base(),
1508                                   called_loop_once ? EVLOOP_ONCE : 0);
1509
1510     /* let catch() handle things like ^c, and otherwise don't worry about it */
1511     if (loop_result < 0) {
1512       int e = tor_socket_errno(-1);
1513       /* let the program survive things like ^z */
1514       if (e != EINTR && !ERRNO_IS_EINPROGRESS(e)) {
1515         log_err(LD_NET,"libevent call with %s failed: %s [%d]",
1516                 tor_libevent_get_method(), tor_socket_strerror(e), e);
1517         return -1;
1518 #ifndef MS_WINDOWS
1519       } else if (e == EINVAL) {
1520         log_warn(LD_NET, "EINVAL from libevent: should you upgrade libevent?");
1521         if (got_libevent_error())
1522           return -1;
1523 #endif
1524       } else {
1525         if (ERRNO_IS_EINPROGRESS(e))
1526           log_warn(LD_BUG,
1527                    "libevent call returned EINPROGRESS? Please report.");
1528         log_debug(LD_NET,"libevent call interrupted.");
1529         /* You can't trust the results of this poll(). Go back to the
1530          * top of the big for loop. */
1531         continue;
1532       }
1533     }
1534   }
1535 }
1536
1537 /** Used to implement the SIGNAL control command: if we accept
1538  * <b>the_signal</b> as a remote pseudo-signal, act on it. */
1539 /* We don't re-use catch() here because:
1540  *   1. We handle a different set of signals than those allowed in catch.
1541  *   2. Platforms without signal() are unlikely to define SIGfoo.
1542  *   3. The control spec is defined to use fixed numeric signal values
1543  *      which just happen to match the Unix values.
1544  */
1545 void
1546 control_signal_act(int the_signal)
1547 {
1548   switch (the_signal)
1549     {
1550     case 1:
1551       signal_callback(0,0,(void*)(uintptr_t)SIGHUP);
1552       break;
1553     case 2:
1554       signal_callback(0,0,(void*)(uintptr_t)SIGINT);
1555       break;
1556     case 10:
1557       signal_callback(0,0,(void*)(uintptr_t)SIGUSR1);
1558       break;
1559     case 12:
1560       signal_callback(0,0,(void*)(uintptr_t)SIGUSR2);
1561       break;
1562     case 15:
1563       signal_callback(0,0,(void*)(uintptr_t)SIGTERM);
1564       break;
1565     case SIGNEWNYM:
1566       signal_callback(0,0,(void*)(uintptr_t)SIGNEWNYM);
1567       break;
1568     case SIGCLEARDNSCACHE:
1569       signal_callback(0,0,(void*)(uintptr_t)SIGCLEARDNSCACHE);
1570       break;
1571     default:
1572       log_warn(LD_BUG, "Unrecognized signal number %d.", the_signal);
1573       break;
1574     }
1575 }
1576
1577 /** Libevent callback: invoked when we get a signal.
1578  */
1579 static void
1580 signal_callback(int fd, short events, void *arg)
1581 {
1582   uintptr_t sig = (uintptr_t)arg;
1583   (void)fd;
1584   (void)events;
1585   switch (sig)
1586     {
1587     case SIGTERM:
1588       log_notice(LD_GENERAL,"Catching signal TERM, exiting cleanly.");
1589       tor_cleanup();
1590       exit(0);
1591       break;
1592     case SIGINT:
1593       if (!server_mode(get_options())) { /* do it now */
1594         log_notice(LD_GENERAL,"Interrupt: exiting cleanly.");
1595         tor_cleanup();
1596         exit(0);
1597       }
1598       hibernate_begin_shutdown();
1599       break;
1600 #ifdef SIGPIPE
1601     case SIGPIPE:
1602       log_debug(LD_GENERAL,"Caught SIGPIPE. Ignoring.");
1603       break;
1604 #endif
1605     case SIGUSR1:
1606       /* prefer to log it at INFO, but make sure we always see it */
1607       dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO);
1608       break;
1609     case SIGUSR2:
1610       switch_logs_debug();
1611       log_debug(LD_GENERAL,"Caught USR2, going to loglevel debug. "
1612                 "Send HUP to change back.");
1613       break;
1614     case SIGHUP:
1615       if (do_hup() < 0) {
1616         log_warn(LD_CONFIG,"Restart failed (config error?). Exiting.");
1617         tor_cleanup();
1618         exit(1);
1619       }
1620       break;
1621 #ifdef SIGCHLD
1622     case SIGCHLD:
1623       while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more
1624                                                 zombies */
1625       break;
1626 #endif
1627     case SIGNEWNYM: {
1628       time_t now = time(NULL);
1629       if (time_of_last_signewnym + MAX_SIGNEWNYM_RATE > now) {
1630         signewnym_is_pending = 1;
1631         log(LOG_NOTICE, LD_CONTROL,
1632             "Rate limiting NEWNYM request: delaying by %d second(s)",
1633             (int)(MAX_SIGNEWNYM_RATE+time_of_last_signewnym-now));
1634       } else {
1635         signewnym_impl(now);
1636       }
1637       break;
1638     }
1639     case SIGCLEARDNSCACHE:
1640       addressmap_clear_transient();
1641       break;
1642   }
1643 }
1644
1645 extern uint64_t rephist_total_alloc;
1646 extern uint32_t rephist_total_num;
1647
1648 /**
1649  * Write current memory usage information to the log.
1650  */
1651 static void
1652 dumpmemusage(int severity)
1653 {
1654   connection_dump_buffer_mem_stats(severity);
1655   log(severity, LD_GENERAL, "In rephist: "U64_FORMAT" used by %d Tors.",
1656       U64_PRINTF_ARG(rephist_total_alloc), rephist_total_num);
1657   dump_routerlist_mem_usage(severity);
1658   dump_cell_pool_usage(severity);
1659   dump_dns_mem_usage(severity);
1660   buf_dump_freelist_sizes(severity);
1661   tor_log_mallinfo(severity);
1662 }
1663
1664 /** Write all statistics to the log, with log level 'severity'.  Called
1665  * in response to a SIGUSR1. */
1666 static void
1667 dumpstats(int severity)
1668 {
1669   time_t now = time(NULL);
1670   time_t elapsed;
1671   size_t rbuf_cap, wbuf_cap, rbuf_len, wbuf_len;
1672
1673   log(severity, LD_GENERAL, "Dumping stats:");
1674
1675   SMARTLIST_FOREACH(connection_array, connection_t *, conn,
1676   {
1677     int i = conn_sl_idx;
1678     log(severity, LD_GENERAL,
1679         "Conn %d (socket %d) type %d (%s), state %d (%s), created %d secs ago",
1680         i, conn->s, conn->type, conn_type_to_string(conn->type),
1681         conn->state, conn_state_to_string(conn->type, conn->state),
1682         (int)(now - conn->timestamp_created));
1683     if (!connection_is_listener(conn)) {
1684       log(severity,LD_GENERAL,
1685           "Conn %d is to %s:%d.", i,
1686           safe_str_client(conn->address),
1687           conn->port);
1688       log(severity,LD_GENERAL,
1689           "Conn %d: %d bytes waiting on inbuf (len %d, last read %d secs ago)",
1690           i,
1691           (int)buf_datalen(conn->inbuf),
1692           (int)buf_allocation(conn->inbuf),
1693           (int)(now - conn->timestamp_lastread));
1694       log(severity,LD_GENERAL,
1695           "Conn %d: %d bytes waiting on outbuf "
1696           "(len %d, last written %d secs ago)",i,
1697           (int)buf_datalen(conn->outbuf),
1698           (int)buf_allocation(conn->outbuf),
1699           (int)(now - conn->timestamp_lastwritten));
1700       if (conn->type == CONN_TYPE_OR) {
1701         or_connection_t *or_conn = TO_OR_CONN(conn);
1702         if (or_conn->tls) {
1703           tor_tls_get_buffer_sizes(or_conn->tls, &rbuf_cap, &rbuf_len,
1704                                    &wbuf_cap, &wbuf_len);
1705           log(severity, LD_GENERAL,
1706               "Conn %d: %d/%d bytes used on OpenSSL read buffer; "
1707               "%d/%d bytes used on write buffer.",
1708               i, (int)rbuf_len, (int)rbuf_cap, (int)wbuf_len, (int)wbuf_cap);
1709         }
1710       }
1711     }
1712     circuit_dump_by_conn(conn, severity); /* dump info about all the circuits
1713                                            * using this conn */
1714   });
1715   log(severity, LD_NET,
1716       "Cells processed: "U64_FORMAT" padding\n"
1717       "                 "U64_FORMAT" create\n"
1718       "                 "U64_FORMAT" created\n"
1719       "                 "U64_FORMAT" relay\n"
1720       "                        ("U64_FORMAT" relayed)\n"
1721       "                        ("U64_FORMAT" delivered)\n"
1722       "                 "U64_FORMAT" destroy",
1723       U64_PRINTF_ARG(stats_n_padding_cells_processed),
1724       U64_PRINTF_ARG(stats_n_create_cells_processed),
1725       U64_PRINTF_ARG(stats_n_created_cells_processed),
1726       U64_PRINTF_ARG(stats_n_relay_cells_processed),
1727       U64_PRINTF_ARG(stats_n_relay_cells_relayed),
1728       U64_PRINTF_ARG(stats_n_relay_cells_delivered),
1729       U64_PRINTF_ARG(stats_n_destroy_cells_processed));
1730   if (stats_n_data_cells_packaged)
1731     log(severity,LD_NET,"Average packaged cell fullness: %2.3f%%",
1732         100*(U64_TO_DBL(stats_n_data_bytes_packaged) /
1733              U64_TO_DBL(stats_n_data_cells_packaged*RELAY_PAYLOAD_SIZE)) );
1734   if (stats_n_data_cells_received)
1735     log(severity,LD_NET,"Average delivered cell fullness: %2.3f%%",
1736         100*(U64_TO_DBL(stats_n_data_bytes_received) /
1737              U64_TO_DBL(stats_n_data_cells_received*RELAY_PAYLOAD_SIZE)) );
1738
1739   if (now - time_of_process_start >= 0)
1740     elapsed = now - time_of_process_start;
1741   else
1742     elapsed = 0;
1743
1744   if (elapsed) {
1745     log(severity, LD_NET,
1746         "Average bandwidth: "U64_FORMAT"/%d = %d bytes/sec reading",
1747         U64_PRINTF_ARG(stats_n_bytes_read),
1748         (int)elapsed,
1749         (int) (stats_n_bytes_read/elapsed));
1750     log(severity, LD_NET,
1751         "Average bandwidth: "U64_FORMAT"/%d = %d bytes/sec writing",
1752         U64_PRINTF_ARG(stats_n_bytes_written),
1753         (int)elapsed,
1754         (int) (stats_n_bytes_written/elapsed));
1755   }
1756
1757   log(severity, LD_NET, "--------------- Dumping memory information:");
1758   dumpmemusage(severity);
1759
1760   rep_hist_dump_stats(now,severity);
1761   rend_service_dump_stats(severity);
1762   dump_pk_ops(severity);
1763   dump_distinct_digest_count(severity);
1764 }
1765
1766 /** Called by exit() as we shut down the process.
1767  */
1768 static void
1769 exit_function(void)
1770 {
1771   /* NOTE: If we ever daemonize, this gets called immediately.  That's
1772    * okay for now, because we only use this on Windows.  */
1773 #ifdef MS_WINDOWS
1774   WSACleanup();
1775 #endif
1776 }
1777
1778 /** Set up the signal handlers for either parent or child. */
1779 void
1780 handle_signals(int is_parent)
1781 {
1782 #ifndef MS_WINDOWS /* do signal stuff only on Unix */
1783   int i;
1784   static const int signals[] = {
1785     SIGINT,  /* do a controlled slow shutdown */
1786     SIGTERM, /* to terminate now */
1787     SIGPIPE, /* otherwise SIGPIPE kills us */
1788     SIGUSR1, /* dump stats */
1789     SIGUSR2, /* go to loglevel debug */
1790     SIGHUP,  /* to reload config, retry conns, etc */
1791 #ifdef SIGXFSZ
1792     SIGXFSZ, /* handle file-too-big resource exhaustion */
1793 #endif
1794     SIGCHLD, /* handle dns/cpu workers that exit */
1795     -1 };
1796   static struct event *signal_events[16]; /* bigger than it has to be. */
1797   if (is_parent) {
1798     for (i = 0; signals[i] >= 0; ++i) {
1799       signal_events[i] = tor_evsignal_new(
1800                        tor_libevent_get_base(), signals[i], signal_callback,
1801                        (void*)(uintptr_t)signals[i]);
1802       if (event_add(signal_events[i], NULL))
1803         log_warn(LD_BUG, "Error from libevent when adding event for signal %d",
1804                  signals[i]);
1805     }
1806   } else {
1807     struct sigaction action;
1808     action.sa_flags = 0;
1809     sigemptyset(&action.sa_mask);
1810     action.sa_handler = SIG_IGN;
1811     sigaction(SIGINT,  &action, NULL);
1812     sigaction(SIGTERM, &action, NULL);
1813     sigaction(SIGPIPE, &action, NULL);
1814     sigaction(SIGUSR1, &action, NULL);
1815     sigaction(SIGUSR2, &action, NULL);
1816     sigaction(SIGHUP,  &action, NULL);
1817 #ifdef SIGXFSZ
1818     sigaction(SIGXFSZ, &action, NULL);
1819 #endif
1820   }
1821 #else /* MS windows */
1822   (void)is_parent;
1823 #endif /* signal stuff */
1824 }
1825
1826 /** Main entry point for the Tor command-line client.
1827  */
1828 /* static */ int
1829 tor_init(int argc, char *argv[])
1830 {
1831   char buf[256];
1832   int i, quiet = 0;
1833   time_of_process_start = time(NULL);
1834   if (!connection_array)
1835     connection_array = smartlist_create();
1836   if (!closeable_connection_lst)
1837     closeable_connection_lst = smartlist_create();
1838   if (!active_linked_connection_lst)
1839     active_linked_connection_lst = smartlist_create();
1840   /* Have the log set up with our application name. */
1841   tor_snprintf(buf, sizeof(buf), "Tor %s", get_version());
1842   log_set_application_name(buf);
1843   /* Initialize the history structures. */
1844   rep_hist_init();
1845   /* Initialize the service cache. */
1846   rend_cache_init();
1847   addressmap_init(); /* Init the client dns cache. Do it always, since it's
1848                       * cheap. */
1849
1850   /* We search for the "quiet" option first, since it decides whether we
1851    * will log anything at all to the command line. */
1852   for (i=1;i<argc;++i) {
1853     if (!strcmp(argv[i], "--hush"))
1854       quiet = 1;
1855     if (!strcmp(argv[i], "--quiet"))
1856       quiet = 2;
1857   }
1858  /* give it somewhere to log to initially */
1859   switch (quiet) {
1860     case 2:
1861       /* no initial logging */
1862       break;
1863     case 1:
1864       add_temp_log(LOG_WARN);
1865       break;
1866     default:
1867       add_temp_log(LOG_NOTICE);
1868   }
1869
1870   log(LOG_NOTICE, LD_GENERAL, "Tor v%s. This is experimental software. "
1871       "Do not rely on it for strong anonymity. (Running on %s)",get_version(),
1872       get_uname());
1873
1874   if (network_init()<0) {
1875     log_err(LD_BUG,"Error initializing network; exiting.");
1876     return -1;
1877   }
1878   atexit(exit_function);
1879
1880   if (options_init_from_torrc(argc,argv) < 0) {
1881     log_err(LD_CONFIG,"Reading config failed--see warnings above.");
1882     return -1;
1883   }
1884
1885 #ifndef MS_WINDOWS
1886   if (geteuid()==0)
1887     log_warn(LD_GENERAL,"You are running Tor as root. You don't need to, "
1888              "and you probably shouldn't.");
1889 #endif
1890
1891   if (crypto_global_init(get_options()->HardwareAccel,
1892                          get_options()->AccelName,
1893                          get_options()->AccelDir)) {
1894     log_err(LD_BUG, "Unable to initialize OpenSSL. Exiting.");
1895     return -1;
1896   }
1897
1898   return 0;
1899 }
1900
1901 /** A lockfile structure, used to prevent two Tors from messing with the
1902  * data directory at once.  If this variable is non-NULL, we're holding
1903  * the lockfile. */
1904 static tor_lockfile_t *lockfile = NULL;
1905
1906 /** Try to grab the lock file described in <b>options</b>, if we do not
1907  * already have it.  If <b>err_if_locked</b> is true, warn if somebody else is
1908  * holding the lock, and exit if we can't get it after waiting.  Otherwise,
1909  * return -1 if we can't get the lockfile.  Return 0 on success.
1910  */
1911 int
1912 try_locking(or_options_t *options, int err_if_locked)
1913 {
1914   if (lockfile)
1915     return 0;
1916   else {
1917     char *fname = options_get_datadir_fname2_suffix(options, "lock",NULL,NULL);
1918     int already_locked = 0;
1919     tor_lockfile_t *lf = tor_lockfile_lock(fname, 0, &already_locked);
1920     tor_free(fname);
1921     if (!lf) {
1922       if (err_if_locked && already_locked) {
1923         int r;
1924         log_warn(LD_GENERAL, "It looks like another Tor process is running "
1925                  "with the same data directory.  Waiting 5 seconds to see "
1926                  "if it goes away.");
1927 #ifndef WIN32
1928         sleep(5);
1929 #else
1930         Sleep(5000);
1931 #endif
1932         r = try_locking(options, 0);
1933         if (r<0) {
1934           log_err(LD_GENERAL, "No, it's still there.  Exiting.");
1935           exit(0);
1936         }
1937         return r;
1938       }
1939       return -1;
1940     }
1941     lockfile = lf;
1942     return 0;
1943   }
1944 }
1945
1946 /** Return true iff we've successfully acquired the lock file. */
1947 int
1948 have_lockfile(void)
1949 {
1950   return lockfile != NULL;
1951 }
1952
1953 /** If we have successfully acquired the lock file, release it. */
1954 void
1955 release_lockfile(void)
1956 {
1957   if (lockfile) {
1958     tor_lockfile_unlock(lockfile);
1959     lockfile = NULL;
1960   }
1961 }
1962
1963 /** Free all memory that we might have allocated somewhere.
1964  * If <b>postfork</b>, we are a worker process and we want to free
1965  * only the parts of memory that we won't touch. If !<b>postfork</b>,
1966  * Tor is shutting down and we should free everything.
1967  *
1968  * Helps us find the real leaks with dmalloc and the like. Also valgrind
1969  * should then report 0 reachable in its leak report (in an ideal world --
1970  * in practice libevent, SSL, libc etc never quite free everything). */
1971 void
1972 tor_free_all(int postfork)
1973 {
1974   if (!postfork) {
1975     evdns_shutdown(1);
1976   }
1977   geoip_free_all();
1978   dirvote_free_all();
1979   routerlist_free_all();
1980   networkstatus_free_all();
1981   addressmap_free_all();
1982   dirserv_free_all();
1983   rend_service_free_all();
1984   rend_cache_free_all();
1985   rend_service_authorization_free_all();
1986   rep_hist_free_all();
1987   dns_free_all();
1988   clear_pending_onions();
1989   circuit_free_all();
1990   entry_guards_free_all();
1991   connection_free_all();
1992   buf_shrink_freelists(1);
1993   memarea_clear_freelist();
1994   microdesc_free_all();
1995   if (!postfork) {
1996     config_free_all();
1997     router_free_all();
1998     policies_free_all();
1999   }
2000   free_cell_pool();
2001   if (!postfork) {
2002     tor_tls_free_all();
2003   }
2004   /* stuff in main.c */
2005
2006   smartlist_free(connection_array);
2007   smartlist_free(closeable_connection_lst);
2008   smartlist_free(active_linked_connection_lst);
2009   tor_free(timeout_event);
2010   if (!postfork) {
2011     release_lockfile();
2012   }
2013   /* Stuff in util.c and address.c*/
2014   if (!postfork) {
2015     escaped(NULL);
2016     esc_router_info(NULL);
2017     logs_free_all(); /* free log strings. do this last so logs keep working. */
2018   }
2019 }
2020
2021 /** Do whatever cleanup is necessary before shutting Tor down. */
2022 void
2023 tor_cleanup(void)
2024 {
2025   or_options_t *options = get_options();
2026   /* Remove our pid file. We don't care if there was an error when we
2027    * unlink, nothing we could do about it anyways. */
2028   if (options->command == CMD_RUN_TOR) {
2029     time_t now = time(NULL);
2030     if (options->PidFile)
2031       unlink(options->PidFile);
2032     if (accounting_is_enabled(options))
2033       accounting_record_bandwidth_usage(now, get_or_state());
2034     or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
2035     or_state_save(now);
2036     if (authdir_mode_tests_reachability(options))
2037       rep_hist_record_mtbf_data(now, 0);
2038   }
2039 #ifdef USE_DMALLOC
2040   dmalloc_log_stats();
2041 #endif
2042   tor_free_all(0); /* We could move tor_free_all back into the ifdef below
2043                       later, if it makes shutdown unacceptably slow.  But for
2044                       now, leave it here: it's helped us catch bugs in the
2045                       past. */
2046   crypto_global_cleanup();
2047 #ifdef USE_DMALLOC
2048   dmalloc_log_unfreed();
2049   dmalloc_shutdown();
2050 #endif
2051 }
2052
2053 /** Read/create keys as needed, and echo our fingerprint to stdout. */
2054 /* static */ int
2055 do_list_fingerprint(void)
2056 {
2057   char buf[FINGERPRINT_LEN+1];
2058   crypto_pk_env_t *k;
2059   const char *nickname = get_options()->Nickname;
2060   if (!server_mode(get_options())) {
2061     log_err(LD_GENERAL,
2062             "Clients don't have long-term identity keys. Exiting.\n");
2063     return -1;
2064   }
2065   tor_assert(nickname);
2066   if (init_keys() < 0) {
2067     log_err(LD_BUG,"Error initializing keys; can't display fingerprint");
2068     return -1;
2069   }
2070   if (!(k = get_identity_key())) {
2071     log_err(LD_GENERAL,"Error: missing identity key.");
2072     return -1;
2073   }
2074   if (crypto_pk_get_fingerprint(k, buf, 1)<0) {
2075     log_err(LD_BUG, "Error computing fingerprint");
2076     return -1;
2077   }
2078   printf("%s %s\n", nickname, buf);
2079   return 0;
2080 }
2081
2082 /** Entry point for password hashing: take the desired password from
2083  * the command line, and print its salted hash to stdout. **/
2084 /* static */ void
2085 do_hash_password(void)
2086 {
2087
2088   char output[256];
2089   char key[S2K_SPECIFIER_LEN+DIGEST_LEN];
2090
2091   crypto_rand(key, S2K_SPECIFIER_LEN-1);
2092   key[S2K_SPECIFIER_LEN-1] = (uint8_t)96; /* Hash 64 K of data. */
2093   secret_to_key(key+S2K_SPECIFIER_LEN, DIGEST_LEN,
2094                 get_options()->command_arg, strlen(get_options()->command_arg),
2095                 key);
2096   base16_encode(output, sizeof(output), key, sizeof(key));
2097   printf("16:%s\n",output);
2098 }
2099
2100 /** Main entry point for the Tor process.  Called from main(). */
2101 /* This function is distinct from main() only so we can link main.c into
2102  * the unittest binary without conflicting with the unittests' main. */
2103 int
2104 tor_main(int argc, char *argv[])
2105 {
2106   int result = 0;
2107   update_approx_time(time(NULL));
2108   tor_threads_init();
2109   init_logging();
2110 #ifdef USE_DMALLOC
2111   {
2112     /* Instruct OpenSSL to use our internal wrappers for malloc,
2113        realloc and free. */
2114     int r = CRYPTO_set_mem_ex_functions(_tor_malloc, _tor_realloc, _tor_free);
2115     tor_assert(r);
2116   }
2117 #endif
2118 #ifdef NT_SERVICE
2119   {
2120      int done = 0;
2121      result = nt_service_parse_options(argc, argv, &done);
2122      if (done) return result;
2123   }
2124 #endif
2125   if (tor_init(argc, argv)<0)
2126     return -1;
2127   switch (get_options()->command) {
2128   case CMD_RUN_TOR:
2129 #ifdef NT_SERVICE
2130     nt_service_set_state(SERVICE_RUNNING);
2131 #endif
2132     result = do_main_loop();
2133     break;
2134   case CMD_LIST_FINGERPRINT:
2135     result = do_list_fingerprint();
2136     break;
2137   case CMD_HASH_PASSWORD:
2138     do_hash_password();
2139     result = 0;
2140     break;
2141   case CMD_VERIFY_CONFIG:
2142     printf("Configuration was valid\n");
2143     result = 0;
2144     break;
2145   case CMD_RUN_UNITTESTS: /* only set by test.c */
2146   default:
2147     log_warn(LD_BUG,"Illegal command number %d: internal error.",
2148              get_options()->command);
2149     result = -1;
2150   }
2151   tor_cleanup();
2152   return result;
2153 }
2154