src/or/main.c

   1 /* Copyright (c) 2001 Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2008, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file main.c
   9  * \brief Toplevel module. Handles signals, multiplexes between
  10  * connections, implements main loop, and drives scheduled events.
  11  **/
  12
  13 #define MAIN_PRIVATE
  14 #include "or.h"
  15 #ifdef USE_DMALLOC
  16 #include <dmalloc.h>
  17 #include <openssl/crypto.h>
  18 #endif
  19 #include "memarea.h"
  20
  21 void evdns_shutdown(int);
  22
  23 /********* PROTOTYPES **********/
  24
  25 static void dumpmemusage(int severity);
  26 static void dumpstats(int severity); /* log stats */
  27 static void conn_read_callback(int fd, short event, void *_conn);
  28 static void conn_write_callback(int fd, short event, void *_conn);
  29 static void signal_callback(int fd, short events, void *arg);
  30 static void second_elapsed_callback(int fd, short event, void *args);
  31 static int conn_close_if_marked(int i);
  32 static void connection_start_reading_from_linked_conn(connection_t *conn);
  33 static int connection_should_read_from_linked_conn(connection_t *conn);
  34
  35 /********* START VARIABLES **********/
  36
  37 int global_read_bucket; /**< Max number of bytes I can read this second. */
  38 int global_write_bucket; /**< Max number of bytes I can write this second. */
  39
  40 /** Max number of relayed (bandwidth class 1) bytes I can read this second. */
  41 int global_relayed_read_bucket;
  42 /** Max number of relayed (bandwidth class 1) bytes I can write this second. */
  43 int global_relayed_write_bucket;
  44
  45 /** What was the read bucket before the last second_elapsed_callback() call?
  46  * (used to determine how many bytes we've read). */
  47 static int stats_prev_global_read_bucket;
  48 /** What was the write bucket before the last second_elapsed_callback() call?
  49  * (used to determine how many bytes we've written). */
  50 static int stats_prev_global_write_bucket;
  51 /* XXX we might want to keep stats about global_relayed_*_bucket too. Or not.*/
  52 /** How many bytes have we read since we started the process? */
  53 static uint64_t stats_n_bytes_read = 0;
  54 /** How many bytes have we written since we started the process? */
  55 static uint64_t stats_n_bytes_written = 0;
  56 /** What time did this process start up? */
  57 time_t time_of_process_start = 0;
  58 /** How many seconds have we been running? */
  59 long stats_n_seconds_working = 0;
  60 /** When do we next launch DNS wildcarding checks? */
  61 static time_t time_to_check_for_correct_dns = 0;
  62
  63 /** How often will we honor SIGNEWNYM requests? */
  64 #define MAX_SIGNEWNYM_RATE 10
  65 /** When did we last process a SIGNEWNYM request? */
  66 static time_t time_of_last_signewnym = 0;
  67 /** Is there a signewnym request we're currently waiting to handle? */
  68 static int signewnym_is_pending = 0;
  69
  70 /** Smartlist of all open connections. */
  71 static smartlist_t *connection_array = NULL;
  72 /** List of connections that have been marked for close and need to be freed
  73  * and removed from connection_array. */
  74 static smartlist_t *closeable_connection_lst = NULL;
  75 /** List of linked connections that are currently reading data into their
  76  * inbuf from their partner's outbuf. */
  77 static smartlist_t *active_linked_connection_lst = NULL;
  78 /** Flag: Set to true iff we entered the current libevent main loop via
  79  * <b>loop_once</b>. If so, there's no need to trigger a loopexit in order
  80  * to handle linked connections. */
  81 static int called_loop_once = 0;
  82
  83 /** We set this to 1 when we've opened a circuit, so we can print a log
  84  * entry to inform the user that Tor is working. */
  85 int has_completed_circuit=0;
  86
  87 /** How often do we check for router descriptors that we should download
  88  * when we have too little directory info? */
  89 #define GREEDY_DESCRIPTOR_RETRY_INTERVAL (10)
  90 /** How often do we check for router descriptors that we should download
  91  * when we have enough directory info? */
  92 #define LAZY_DESCRIPTOR_RETRY_INTERVAL (60)
  93 /** How often do we 'forgive' undownloadable router descriptors and attempt
  94  * to download them again? */
  95 #define DESCRIPTOR_FAILURE_RESET_INTERVAL (60*60)
  96 /** How long do we let a directory connection stall before expiring it? */
  97 #define DIR_CONN_MAX_STALL (5*60)
  98
  99 /** How long do we let OR connections handshake before we decide that
 100  * they are obsolete? */
 101 #define TLS_HANDSHAKE_TIMEOUT (60)
 102
 103 /********* END VARIABLES ************/
 104
 105 /****************************************************************************
 106 *
 107 * This section contains accessors and other methods on the connection_array
 108 * variables (which are global within this file and unavailable outside it).
 109 *
 110 ****************************************************************************/
 111
 112 /** Add <b>conn</b> to the array of connections that we can poll on.  The
 113  * connection's socket must be set; the connection starts out
 114  * non-reading and non-writing.
 115  */
 116 int
 117 connection_add(connection_t *conn)
 118 {
 119   tor_assert(conn);
 120   tor_assert(conn->s >= 0 ||
 121              conn->linked ||
 122              (conn->type == CONN_TYPE_AP &&
 123               TO_EDGE_CONN(conn)->is_dns_request));
 124
 125   tor_assert(conn->conn_array_index == -1); /* can only connection_add once */
 126   conn->conn_array_index = smartlist_len(connection_array);
 127   smartlist_add(connection_array, conn);
 128
 129   if (conn->s >= 0 || conn->linked) {
 130     conn->read_event = tor_malloc_zero(sizeof(struct event));
 131     conn->write_event = tor_malloc_zero(sizeof(struct event));
 132     event_set(conn->read_event, conn->s, EV_READ|EV_PERSIST,
 133               conn_read_callback, conn);
 134     event_set(conn->write_event, conn->s, EV_WRITE|EV_PERSIST,
 135               conn_write_callback, conn);
 136   }
 137
 138   log_debug(LD_NET,"new conn type %s, socket %d, address %s, n_conns %d.",
 139             conn_type_to_string(conn->type), conn->s, conn->address,
 140             smartlist_len(connection_array));
 141
 142   return 0;
 143 }
 144
 145 /** Remove the connection from the global list, and remove the
 146  * corresponding poll entry.  Calling this function will shift the last
 147  * connection (if any) into the position occupied by conn.
 148  */
 149 int
 150 connection_remove(connection_t *conn)
 151 {
 152   int current_index;
 153   connection_t *tmp;
 154
 155   tor_assert(conn);
 156
 157   log_debug(LD_NET,"removing socket %d (type %s), n_conns now %d",
 158             conn->s, conn_type_to_string(conn->type),
 159             smartlist_len(connection_array));
 160
 161   tor_assert(conn->conn_array_index >= 0);
 162   current_index = conn->conn_array_index;
 163   connection_unregister_events(conn); /* This is redundant, but cheap. */
 164   if (current_index == smartlist_len(connection_array)-1) { /* at the end */
 165     smartlist_del(connection_array, current_index);
 166     return 0;
 167   }
 168
 169   /* replace this one with the one at the end */
 170   smartlist_del(connection_array, current_index);
 171   tmp = smartlist_get(connection_array, current_index);
 172   tmp->conn_array_index = current_index;
 173
 174   return 0;
 175 }
 176
 177 /** If <b>conn</b> is an edge conn, remove it from the list
 178  * of conn's on this circuit. If it's not on an edge,
 179  * flush and send destroys for all circuits on this conn.
 180  *
 181  * Remove it from connection_array (if applicable) and
 182  * from closeable_connection_list.
 183  *
 184  * Then free it.
 185  */
 186 static void
 187 connection_unlink(connection_t *conn)
 188 {
 189   connection_about_to_close_connection(conn);
 190   if (conn->conn_array_index >= 0) {
 191     connection_remove(conn);
 192   }
 193   if (conn->linked_conn) {
 194     conn->linked_conn->linked_conn = NULL;
 195     if (! conn->linked_conn->marked_for_close &&
 196         conn->linked_conn->reading_from_linked_conn)
 197       connection_start_reading(conn->linked_conn);
 198     conn->linked_conn = NULL;
 199   }
 200   smartlist_remove(closeable_connection_lst, conn);
 201   smartlist_remove(active_linked_connection_lst, conn);
 202   if (conn->type == CONN_TYPE_EXIT) {
 203     assert_connection_edge_not_dns_pending(TO_EDGE_CONN(conn));
 204   }
 205   if (conn->type == CONN_TYPE_OR) {
 206     if (!tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest))
 207       connection_or_remove_from_identity_map(TO_OR_CONN(conn));
 208   }
 209   connection_free(conn);
 210 }
 211
 212 /** Schedule <b>conn</b> to be closed. **/
 213 void
 214 add_connection_to_closeable_list(connection_t *conn)
 215 {
 216   tor_assert(!smartlist_isin(closeable_connection_lst, conn));
 217   tor_assert(conn->marked_for_close);
 218   assert_connection_ok(conn, time(NULL));
 219   smartlist_add(closeable_connection_lst, conn);
 220 }
 221
 222 /** Return 1 if conn is on the closeable list, else return 0. */
 223 int
 224 connection_is_on_closeable_list(connection_t *conn)
 225 {
 226   return smartlist_isin(closeable_connection_lst, conn);
 227 }
 228
 229 /** Return true iff conn is in the current poll array. */
 230 int
 231 connection_in_array(connection_t *conn)
 232 {
 233   return smartlist_isin(connection_array, conn);
 234 }
 235
 236 /** Set <b>*array</b> to an array of all connections, and <b>*n</b>
 237  * to the length of the array. <b>*array</b> and <b>*n</b> must not
 238  * be modified.
 239  */
 240 smartlist_t *
 241 get_connection_array(void)
 242 {
 243   if (!connection_array)
 244     connection_array = smartlist_create();
 245   return connection_array;
 246 }
 247
 248 /** Set the event mask on <b>conn</b> to <b>events</b>.  (The event
 249  * mask is a bitmask whose bits are EV_READ and EV_WRITE.)
 250  */
 251 void
 252 connection_watch_events(connection_t *conn, short events)
 253 {
 254   if (events & EV_READ)
 255     connection_start_reading(conn);
 256   else
 257     connection_stop_reading(conn);
 258
 259   if (events & EV_WRITE)
 260     connection_start_writing(conn);
 261   else
 262     connection_stop_writing(conn);
 263 }
 264
 265 /** Return true iff <b>conn</b> is listening for read events. */
 266 int
 267 connection_is_reading(connection_t *conn)
 268 {
 269   tor_assert(conn);
 270
 271   return conn->reading_from_linked_conn ||
 272     (conn->read_event && event_pending(conn->read_event, EV_READ, NULL));
 273 }
 274
 275 /** Tell the main loop to stop notifying <b>conn</b> of any read events. */
 276 void
 277 connection_stop_reading(connection_t *conn)
 278 {
 279   tor_assert(conn);
 280   tor_assert(conn->read_event);
 281
 282   if (conn->linked) {
 283     conn->reading_from_linked_conn = 0;
 284     connection_stop_reading_from_linked_conn(conn);
 285   } else {
 286     if (event_del(conn->read_event))
 287       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 288                "to unwatched: %s",
 289                conn->s,
 290                tor_socket_strerror(tor_socket_errno(conn->s)));
 291   }
 292 }
 293
 294 /** Tell the main loop to start notifying <b>conn</b> of any read events. */
 295 void
 296 connection_start_reading(connection_t *conn)
 297 {
 298   tor_assert(conn);
 299   tor_assert(conn->read_event);
 300
 301   if (conn->linked) {
 302     conn->reading_from_linked_conn = 1;
 303     if (connection_should_read_from_linked_conn(conn))
 304       connection_start_reading_from_linked_conn(conn);
 305   } else {
 306     if (event_add(conn->read_event, NULL))
 307       log_warn(LD_NET, "Error from libevent setting read event state for %d "
 308                "to watched: %s",
 309                conn->s,
 310                tor_socket_strerror(tor_socket_errno(conn->s)));
 311   }
 312 }
 313
 314 /** Return true iff <b>conn</b> is listening for write events. */
 315 int
 316 connection_is_writing(connection_t *conn)
 317 {
 318   tor_assert(conn);
 319
 320   return conn->writing_to_linked_conn ||
 321     (conn->write_event && event_pending(conn->write_event, EV_WRITE, NULL));
 322 }
 323
 324 /** Tell the main loop to stop notifying <b>conn</b> of any write events. */
 325 void
 326 connection_stop_writing(connection_t *conn)
 327 {
 328   tor_assert(conn);
 329   tor_assert(conn->write_event);
 330
 331   if (conn->linked) {
 332     conn->writing_to_linked_conn = 0;
 333     if (conn->linked_conn)
 334       connection_stop_reading_from_linked_conn(conn->linked_conn);
 335   } else {
 336     if (event_del(conn->write_event))
 337       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 338                "to unwatched: %s",
 339                conn->s,
 340                tor_socket_strerror(tor_socket_errno(conn->s)));
 341   }
 342 }
 343
 344 /** Tell the main loop to start notifying <b>conn</b> of any write events. */
 345 void
 346 connection_start_writing(connection_t *conn)
 347 {
 348   tor_assert(conn);
 349   tor_assert(conn->write_event);
 350
 351   if (conn->linked) {
 352     conn->writing_to_linked_conn = 1;
 353     if (conn->linked_conn &&
 354         connection_should_read_from_linked_conn(conn->linked_conn))
 355       connection_start_reading_from_linked_conn(conn->linked_conn);
 356   } else {
 357     if (event_add(conn->write_event, NULL))
 358       log_warn(LD_NET, "Error from libevent setting write event state for %d "
 359                "to watched: %s",
 360                conn->s,
 361                tor_socket_strerror(tor_socket_errno(conn->s)));
 362   }
 363 }
 364
 365 /** Return true iff <b>conn</b> is linked conn, and reading from the conn
 366  * linked to it would be good and feasible.  (Reading is "feasible" if the
 367  * other conn exists and has data in its outbuf, and is "good" if we have our
 368  * reading_from_linked_conn flag set and the other conn has its
 369  * writing_to_linked_conn flag set.)*/
 370 static int
 371 connection_should_read_from_linked_conn(connection_t *conn)
 372 {
 373   if (conn->linked && conn->reading_from_linked_conn) {
 374     if (! conn->linked_conn ||
 375         (conn->linked_conn->writing_to_linked_conn &&
 376          buf_datalen(conn->linked_conn->outbuf)))
 377       return 1;
 378   }
 379   return 0;
 380 }
 381
 382 /** Helper: Tell the main loop to begin reading bytes into <b>conn</b> from
 383  * its linked connection, if it is not doing so already.  Called by
 384  * connection_start_reading and connection_start_writing as appropriate. */
 385 static void
 386 connection_start_reading_from_linked_conn(connection_t *conn)
 387 {
 388   tor_assert(conn);
 389   tor_assert(conn->linked == 1);
 390
 391   if (!conn->active_on_link) {
 392     conn->active_on_link = 1;
 393     smartlist_add(active_linked_connection_lst, conn);
 394     if (!called_loop_once) {
 395       /* This is the first event on the list; we won't be in LOOP_ONCE mode,
 396        * so we need to make sure that the event_loop() actually exits at the
 397        * end of its run through the current connections and
 398        * lets us activate read events for linked connections. */
 399       struct timeval tv = { 0, 0 };
 400       event_loopexit(&tv);
 401     }
 402   } else {
 403     tor_assert(smartlist_isin(active_linked_connection_lst, conn));
 404   }
 405 }
 406
 407 /** Tell the main loop to stop reading bytes into <b>conn</b> from its linked
 408  * connection, if is currently doing so.  Called by connection_stop_reading,
 409  * connection_stop_writing, and connection_read. */
 410 void
 411 connection_stop_reading_from_linked_conn(connection_t *conn)
 412 {
 413   tor_assert(conn);
 414   tor_assert(conn->linked == 1);
 415
 416   if (conn->active_on_link) {
 417     conn->active_on_link = 0;
 418     /* FFFF We could keep an index here so we can smartlist_del
 419      * cleanly.  On the other hand, this doesn't show up on profiles,
 420      * so let's leave it alone for now. */
 421     smartlist_remove(active_linked_connection_lst, conn);
 422   } else {
 423     tor_assert(!smartlist_isin(active_linked_connection_lst, conn));
 424   }
 425 }
 426
 427 /** Close all connections that have been scheduled to get closed. */
 428 static void
 429 close_closeable_connections(void)
 430 {
 431   int i;
 432   for (i = 0; i < smartlist_len(closeable_connection_lst); ) {
 433     connection_t *conn = smartlist_get(closeable_connection_lst, i);
 434     if (conn->conn_array_index < 0) {
 435       connection_unlink(conn); /* blow it away right now */
 436     } else {
 437       if (!conn_close_if_marked(conn->conn_array_index))
 438         ++i;
 439     }
 440   }
 441 }
 442
 443 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 444  * some data to read. */
 445 static void
 446 conn_read_callback(int fd, short event, void *_conn)
 447 {
 448   connection_t *conn = _conn;
 449   (void)fd;
 450   (void)event;
 451
 452   log_debug(LD_NET,"socket %d wants to read.",conn->s);
 453
 454   assert_connection_ok(conn, time(NULL));
 455
 456   if (connection_handle_read(conn) < 0) {
 457     if (!conn->marked_for_close) {
 458 #ifndef MS_WINDOWS
 459       log_warn(LD_BUG,"Unhandled error on read for %s connection "
 460                "(fd %d); removing",
 461                conn_type_to_string(conn->type), conn->s);
 462       tor_fragile_assert();
 463 #endif
 464       if (CONN_IS_EDGE(conn))
 465         connection_edge_end_errno(TO_EDGE_CONN(conn));
 466       connection_mark_for_close(conn);
 467     }
 468   }
 469   assert_connection_ok(conn, time(NULL));
 470
 471   if (smartlist_len(closeable_connection_lst))
 472     close_closeable_connections();
 473 }
 474
 475 /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
 476  * some data to write. */
 477 static void
 478 conn_write_callback(int fd, short events, void *_conn)
 479 {
 480   connection_t *conn = _conn;
 481   (void)fd;
 482   (void)events;
 483
 484   LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "socket %d wants to write.",conn->s));
 485
 486   assert_connection_ok(conn, time(NULL));
 487
 488   if (connection_handle_write(conn, 0) < 0) {
 489     if (!conn->marked_for_close) {
 490       /* this connection is broken. remove it. */
 491       log_fn(LOG_WARN,LD_BUG,
 492              "unhandled error on write for %s connection (fd %d); removing",
 493              conn_type_to_string(conn->type), conn->s);
 494       tor_fragile_assert();
 495       if (CONN_IS_EDGE(conn)) {
 496         /* otherwise we cry wolf about duplicate close */
 497         edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
 498         if (!edge_conn->end_reason)
 499           edge_conn->end_reason = END_STREAM_REASON_INTERNAL;
 500         edge_conn->edge_has_sent_end = 1;
 501       }
 502       connection_close_immediate(conn); /* So we don't try to flush. */
 503       connection_mark_for_close(conn);
 504     }
 505   }
 506   assert_connection_ok(conn, time(NULL));
 507
 508   if (smartlist_len(closeable_connection_lst))
 509     close_closeable_connections();
 510 }
 511
 512 /** If the connection at connection_array[i] is marked for close, then:
 513  *    - If it has data that it wants to flush, try to flush it.
 514  *    - If it _still_ has data to flush, and conn->hold_open_until_flushed is
 515  *      true, then leave the connection open and return.
 516  *    - Otherwise, remove the connection from connection_array and from
 517  *      all other lists, close it, and free it.
 518  * Returns 1 if the connection was closed, 0 otherwise.
 519  */
 520 static int
 521 conn_close_if_marked(int i)
 522 {
 523   connection_t *conn;
 524   int retval;
 525   time_t now;
 526
 527   conn = smartlist_get(connection_array, i);
 528   if (!conn->marked_for_close)
 529     return 0; /* nothing to see here, move along */
 530   now = time(NULL);
 531   assert_connection_ok(conn, now);
 532   assert_all_pending_dns_resolves_ok();
 533
 534   log_debug(LD_NET,"Cleaning up connection (fd %d).",conn->s);
 535   if ((conn->s >= 0 || conn->linked_conn) && connection_wants_to_flush(conn)) {
 536     /* s == -1 means it's an incomplete edge connection, or that the socket
 537      * has already been closed as unflushable. */
 538     ssize_t sz = connection_bucket_write_limit(conn, now);
 539     if (!conn->hold_open_until_flushed)
 540       log_info(LD_NET,
 541                "Conn (addr %s, fd %d, type %s, state %d) marked, but wants "
 542                "to flush %d bytes. (Marked at %s:%d)",
 543                escaped_safe_str(conn->address),
 544                conn->s, conn_type_to_string(conn->type), conn->state,
 545                (int)conn->outbuf_flushlen,
 546                 conn->marked_for_close_file, conn->marked_for_close);
 547     if (conn->linked_conn) {
 548       retval = move_buf_to_buf(conn->linked_conn->inbuf, conn->outbuf,
 549                                &conn->outbuf_flushlen);
 550       if (retval >= 0) {
 551         /* The linked conn will notice that it has data when it notices that
 552          * we're gone. */
 553         connection_start_reading_from_linked_conn(conn->linked_conn);
 554       }
 555       log_debug(LD_GENERAL, "Flushed last %d bytes from a linked conn; "
 556                "%d left; flushlen %d; wants-to-flush==%d", retval,
 557                (int)buf_datalen(conn->outbuf),
 558                (int)conn->outbuf_flushlen,
 559                 connection_wants_to_flush(conn));
 560     } else if (connection_speaks_cells(conn)) {
 561       if (conn->state == OR_CONN_STATE_OPEN) {
 562         retval = flush_buf_tls(TO_OR_CONN(conn)->tls, conn->outbuf, sz,
 563                                &conn->outbuf_flushlen);
 564       } else
 565         retval = -1; /* never flush non-open broken tls connections */
 566     } else {
 567       retval = flush_buf(conn->s, conn->outbuf, sz, &conn->outbuf_flushlen);
 568     }
 569     if (retval >= 0 && /* Technically, we could survive things like
 570                           TLS_WANT_WRITE here. But don't bother for now. */
 571         conn->hold_open_until_flushed && connection_wants_to_flush(conn)) {
 572       if (retval > 0) {
 573         LOG_FN_CONN(conn, (LOG_INFO,LD_NET,
 574                            "Holding conn (fd %d) open for more flushing.",
 575                            conn->s));
 576         conn->timestamp_lastwritten = now; /* reset so we can flush more */
 577       }
 578       return 0;
 579     }
 580     if (connection_wants_to_flush(conn)) {
 581       int severity;
 582       if (conn->type == CONN_TYPE_EXIT ||
 583           (conn->type == CONN_TYPE_OR && server_mode(get_options())) ||
 584           (conn->type == CONN_TYPE_DIR && conn->purpose == DIR_PURPOSE_SERVER))
 585         severity = LOG_INFO;
 586       else
 587         severity = LOG_NOTICE;
 588       /* XXXX Maybe allow this to happen a certain amount per hour; it usually
 589        * is meaningless. */
 590       log_fn(severity, LD_NET, "We stalled too much while trying to write %d "
 591              "bytes to address %s.  If this happens a lot, either "
 592              "something is wrong with your network connection, or "
 593              "something is wrong with theirs. "
 594              "(fd %d, type %s, state %d, marked at %s:%d).",
 595              (int)buf_datalen(conn->outbuf),
 596              escaped_safe_str(conn->address), conn->s,
 597              conn_type_to_string(conn->type), conn->state,
 598              conn->marked_for_close_file,
 599              conn->marked_for_close);
 600     }
 601   }
 602   connection_unlink(conn); /* unlink, remove, free */
 603   return 1;
 604 }
 605
 606 /** We've just tried every dirserver we know about, and none of
 607  * them were reachable. Assume the network is down. Change state
 608  * so next time an application connection arrives we'll delay it
 609  * and try another directory fetch. Kill off all the circuit_wait
 610  * streams that are waiting now, since they will all timeout anyway.
 611  */
 612 void
 613 directory_all_unreachable(time_t now)
 614 {
 615   connection_t *conn;
 616   (void)now;
 617
 618   stats_n_seconds_working=0; /* reset it */
 619
 620   while ((conn = connection_get_by_type_state(CONN_TYPE_AP,
 621                                               AP_CONN_STATE_CIRCUIT_WAIT))) {
 622     edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
 623     log_notice(LD_NET,
 624                "Is your network connection down? "
 625                "Failing connection to '%s:%d'.",
 626                safe_str(edge_conn->socks_request->address),
 627                edge_conn->socks_request->port);
 628     connection_mark_unattached_ap(edge_conn,
 629                                   END_STREAM_REASON_NET_UNREACHABLE);
 630   }
 631   control_event_general_status(LOG_ERR, "DIR_ALL_UNREACHABLE");
 632 }
 633
 634 /** This function is called whenever we successfully pull down some new
 635  * network statuses or server descriptors. */
 636 void
 637 directory_info_has_arrived(time_t now, int from_cache)
 638 {
 639   or_options_t *options = get_options();
 640
 641   if (!router_have_minimum_dir_info()) {
 642     int quiet = directory_too_idle_to_fetch_descriptors(options, now);
 643     log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR,
 644         "I learned some more directory information, but not enough to "
 645         "build a circuit: %s", get_dir_info_status_string());
 646     update_router_descriptor_downloads(now);
 647     return;
 648   } else {
 649     if (directory_fetches_from_authorities(options))
 650       update_router_descriptor_downloads(now);
 651
 652     /* if we have enough dir info, then update our guard status with
 653      * whatever we just learned. */
 654     entry_guards_compute_status();
 655     /* Don't even bother trying to get extrainfo until the rest of our
 656      * directory info is up-to-date */
 657     if (options->DownloadExtraInfo)
 658       update_extrainfo_downloads(now);
 659   }
 660
 661   if (server_mode(options) && !we_are_hibernating() && !from_cache &&
 662       (has_completed_circuit || !any_predicted_circuits(now)))
 663     consider_testing_reachability(1, 1);
 664 }
 665
 666 /** Perform regular maintenance tasks for a single connection.  This
 667  * function gets run once per second per connection by run_scheduled_events.
 668  */
 669 static void
 670 run_connection_housekeeping(int i, time_t now)
 671 {
 672   cell_t cell;
 673   connection_t *conn = smartlist_get(connection_array, i);
 674   or_options_t *options = get_options();
 675   or_connection_t *or_conn;
 676
 677   if (conn->outbuf && !buf_datalen(conn->outbuf) && conn->type == CONN_TYPE_OR)
 678     TO_OR_CONN(conn)->timestamp_lastempty = now;
 679
 680   if (conn->marked_for_close) {
 681     /* nothing to do here */
 682     return;
 683   }
 684
 685   /* Expire any directory connections that haven't been active (sent
 686    * if a server or received if a client) for 5 min */
 687   if (conn->type == CONN_TYPE_DIR &&
 688       ((DIR_CONN_IS_SERVER(conn) &&
 689         conn->timestamp_lastwritten + DIR_CONN_MAX_STALL < now) ||
 690        (!DIR_CONN_IS_SERVER(conn) &&
 691         conn->timestamp_lastread + DIR_CONN_MAX_STALL < now))) {
 692     log_info(LD_DIR,"Expiring wedged directory conn (fd %d, purpose %d)",
 693              conn->s, conn->purpose);
 694     /* This check is temporary; it's to let us know whether we should consider
 695      * parsing partial serverdesc responses. */
 696     if (conn->purpose == DIR_PURPOSE_FETCH_SERVERDESC &&
 697         buf_datalen(conn->inbuf)>=1024) {
 698       log_info(LD_DIR,"Trying to extract information from wedged server desc "
 699                "download.");
 700       connection_dir_reached_eof(TO_DIR_CONN(conn));
 701     } else {
 702       connection_mark_for_close(conn);
 703     }
 704     return;
 705   }
 706
 707   if (!connection_speaks_cells(conn))
 708     return; /* we're all done here, the rest is just for OR conns */
 709
 710   or_conn = TO_OR_CONN(conn);
 711
 712   if (or_conn->is_bad_for_new_circs && !or_conn->n_circuits) {
 713     /* It's bad for new circuits, and has no unmarked circuits on it:
 714      * mark it now. */
 715     log_info(LD_OR,
 716              "Expiring non-used OR connection to fd %d (%s:%d) [Too old].",
 717              conn->s, conn->address, conn->port);
 718     if (conn->state == OR_CONN_STATE_CONNECTING)
 719       connection_or_connect_failed(TO_OR_CONN(conn),
 720                                    END_OR_CONN_REASON_TIMEOUT,
 721                                    "Tor gave up on the connection");
 722     connection_mark_for_close(conn);
 723     conn->hold_open_until_flushed = 1;
 724     return;
 725   }
 726
 727   /* If we haven't written to an OR connection for a while, then either nuke
 728      the connection or send a keepalive, depending. */
 729   if (now >= conn->timestamp_lastwritten + options->KeepalivePeriod) {
 730     routerinfo_t *router = router_get_by_digest(or_conn->identity_digest);
 731     int maxCircuitlessPeriod = options->MaxCircuitDirtiness*3/2;
 732     if (!connection_state_is_open(conn)) {
 733       /* We never managed to actually get this connection open and happy. */
 734       log_info(LD_OR,"Expiring non-open OR connection to fd %d (%s:%d).",
 735                conn->s,conn->address, conn->port);
 736       connection_mark_for_close(conn);
 737       conn->hold_open_until_flushed = 1;
 738     } else if (we_are_hibernating() && !or_conn->n_circuits &&
 739                !buf_datalen(conn->outbuf)) {
 740       /* We're hibernating, there's no circuits, and nothing to flush.*/
 741       log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
 742                "[Hibernating or exiting].",
 743                conn->s,conn->address, conn->port);
 744       connection_mark_for_close(conn);
 745       conn->hold_open_until_flushed = 1;
 746     } else if (!clique_mode(options) && !or_conn->n_circuits &&
 747                now >= or_conn->timestamp_last_added_nonpadding +
 748                                            maxCircuitlessPeriod &&
 749                (!router || !server_mode(options) ||
 750                 !router_is_clique_mode(router))) {
 751       log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
 752                "[Not in clique mode].",
 753                conn->s,conn->address, conn->port);
 754       connection_mark_for_close(conn);
 755       conn->hold_open_until_flushed = 1;
 756     } else if (
 757          now >= or_conn->timestamp_lastempty + options->KeepalivePeriod*10 &&
 758          now >= conn->timestamp_lastwritten + options->KeepalivePeriod*10) {
 759       log_fn(LOG_PROTOCOL_WARN,LD_PROTOCOL,
 760              "Expiring stuck OR connection to fd %d (%s:%d). (%d bytes to "
 761              "flush; %d seconds since last write)",
 762              conn->s, conn->address, conn->port,
 763              (int)buf_datalen(conn->outbuf),
 764              (int)(now-conn->timestamp_lastwritten));
 765       connection_mark_for_close(conn);
 766     } else if (!buf_datalen(conn->outbuf)) {
 767       /* either in clique mode, or we've got a circuit. send a padding cell. */
 768       log_fn(LOG_DEBUG,LD_OR,"Sending keepalive to (%s:%d)",
 769              conn->address, conn->port);
 770       memset(&cell,0,sizeof(cell_t));
 771       cell.command = CELL_PADDING;
 772       connection_or_write_cell_to_buf(&cell, or_conn);
 773     }
 774   }
 775 }
 776
 777 /** Honor a NEWNYM request: make future requests unlinkability to past
 778  * requests. */
 779 static void
 780 signewnym_impl(time_t now)
 781 {
 782   circuit_expire_all_dirty_circs();
 783   addressmap_clear_transient();
 784   time_of_last_signewnym = now;
 785   signewnym_is_pending = 0;
 786 }
 787
 788 /** Perform regular maintenance tasks.  This function gets run once per
 789  * second by second_elapsed_callback().
 790  */
 791 static void
 792 run_scheduled_events(time_t now)
 793 {
 794   static time_t last_rotated_x509_certificate = 0;
 795   static time_t time_to_check_v3_certificate = 0;
 796   static time_t time_to_check_listeners = 0;
 797   static time_t time_to_check_descriptor = 0;
 798   static time_t time_to_check_ipaddress = 0;
 799   static time_t time_to_shrink_memory = 0;
 800   static time_t time_to_try_getting_descriptors = 0;
 801   static time_t time_to_reset_descriptor_failures = 0;
 802   static time_t time_to_add_entropy = 0;
 803   static time_t time_to_write_hs_statistics = 0;
 804   static time_t time_to_write_bridge_status_file = 0;
 805   static time_t time_to_downrate_stability = 0;
 806   static time_t time_to_save_stability = 0;
 807   static time_t time_to_clean_caches = 0;
 808   static time_t time_to_recheck_bandwidth = 0;
 809   static time_t time_to_check_for_expired_networkstatus = 0;
 810   static time_t time_to_dump_geoip_stats = 0;
 811   static time_t time_to_retry_dns_init = 0;
 812   or_options_t *options = get_options();
 813   int i;
 814   int have_dir_info;
 815
 816   /** 0. See if we've been asked to shut down and our timeout has
 817    * expired; or if our bandwidth limits are exhausted and we
 818    * should hibernate; or if it's time to wake up from hibernation.
 819    */
 820   consider_hibernation(now);
 821
 822   /* 0b. If we've deferred a signewnym, make sure it gets handled
 823    * eventually. */
 824   if (signewnym_is_pending &&
 825       time_of_last_signewnym + MAX_SIGNEWNYM_RATE <= now) {
 826     log(LOG_INFO, LD_CONTROL, "Honoring delayed NEWNYM request");
 827     signewnym_impl(now);
 828   }
 829
 830   /** 1a. Every MIN_ONION_KEY_LIFETIME seconds, rotate the onion keys,
 831    *  shut down and restart all cpuworkers, and update the directory if
 832    *  necessary.
 833    */
 834   if (server_mode(options) &&
 835       get_onion_key_set_at()+MIN_ONION_KEY_LIFETIME < now) {
 836     log_info(LD_GENERAL,"Rotating onion key.");
 837     rotate_onion_key();
 838     cpuworkers_rotate();
 839     if (router_rebuild_descriptor(1)<0) {
 840       log_info(LD_CONFIG, "Couldn't rebuild router descriptor");
 841     }
 842     if (advertised_server_mode())
 843       router_upload_dir_desc_to_dirservers(0);
 844   }
 845
 846   if (time_to_try_getting_descriptors < now) {
 847     update_router_descriptor_downloads(now);
 848     update_extrainfo_downloads(now);
 849     if (options->UseBridges)
 850       fetch_bridge_descriptors(now);
 851     if (router_have_minimum_dir_info())
 852       time_to_try_getting_descriptors = now + LAZY_DESCRIPTOR_RETRY_INTERVAL;
 853     else
 854       time_to_try_getting_descriptors = now + GREEDY_DESCRIPTOR_RETRY_INTERVAL;
 855   }
 856
 857   if (time_to_reset_descriptor_failures < now) {
 858     router_reset_descriptor_download_failures();
 859     time_to_reset_descriptor_failures =
 860       now + DESCRIPTOR_FAILURE_RESET_INTERVAL;
 861   }
 862
 863   /** 1b. Every MAX_SSL_KEY_LIFETIME seconds, we change our TLS context. */
 864   if (!last_rotated_x509_certificate)
 865     last_rotated_x509_certificate = now;
 866   if (last_rotated_x509_certificate+MAX_SSL_KEY_LIFETIME < now) {
 867     log_info(LD_GENERAL,"Rotating tls context.");
 868     if (tor_tls_context_new(get_identity_key(), MAX_SSL_KEY_LIFETIME) < 0) {
 869       log_warn(LD_BUG, "Error reinitializing TLS context");
 870       /* XXX is it a bug here, that we just keep going? -RD */
 871     }
 872     last_rotated_x509_certificate = now;
 873     /* We also make sure to rotate the TLS connections themselves if they've
 874      * been up for too long -- but that's done via is_bad_for_new_circs in
 875      * connection_run_housekeeping() above. */
 876   }
 877
 878   if (time_to_add_entropy < now) {
 879     if (time_to_add_entropy) {
 880       /* We already seeded once, so don't die on failure. */
 881       crypto_seed_rng(0);
 882     }
 883 /** How often do we add more entropy to OpenSSL's RNG pool? */
 884 #define ENTROPY_INTERVAL (60*60)
 885     time_to_add_entropy = now + ENTROPY_INTERVAL;
 886   }
 887
 888   /** 1c. If we have to change the accounting interval or record
 889    * bandwidth used in this accounting interval, do so. */
 890   if (accounting_is_enabled(options))
 891     accounting_run_housekeeping(now);
 892
 893   if (now % 10 == 0 && (authdir_mode_tests_reachability(options)) &&
 894       !we_are_hibernating()) {
 895     /* try to determine reachability of the other Tor relays */
 896     dirserv_test_reachability(now, 0);
 897   }
 898
 899   /** 1d. Periodically, we discount older stability information so that new
 900    * stability info counts more, and save the stability information to disk as
 901    * appropriate. */
 902   if (time_to_downrate_stability < now)
 903     time_to_downrate_stability = rep_hist_downrate_old_runs(now);
 904   if (authdir_mode_tests_reachability(options)) {
 905     if (time_to_save_stability < now) {
 906       if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
 907         log_warn(LD_GENERAL, "Couldn't store mtbf data.");
 908       }
 909 #define SAVE_STABILITY_INTERVAL (30*60)
 910       time_to_save_stability = now + SAVE_STABILITY_INTERVAL;
 911     }
 912   }
 913
 914   /* 1e. Periodicaly, if we're a v3 authority, we check whether our cert is
 915    * close to expiring and warn the admin if it is. */
 916   if (time_to_check_v3_certificate < now) {
 917     v3_authority_check_key_expiry();
 918 #define CHECK_V3_CERTIFICATE_INTERVAL (5*60)
 919     time_to_check_v3_certificate = now + CHECK_V3_CERTIFICATE_INTERVAL;
 920   }
 921
 922   /* 1f. Check whether our networkstatus has expired.
 923    */
 924   if (time_to_check_for_expired_networkstatus < now) {
 925     networkstatus_t *ns = networkstatus_get_latest_consensus();
 926     /*XXXX RD: This value needs to be the same as REASONABLY_LIVE_TIME in
 927      * networkstatus_get_reasonably_live_consensus(), but that value is way
 928      * way too high.  Arma: is the bridge issue there resolved yet? -NM */
 929 #define NS_EXPIRY_SLOP (24*60*60)
 930     if (ns && ns->valid_until < now+NS_EXPIRY_SLOP &&
 931         router_have_minimum_dir_info()) {
 932       router_dir_info_changed();
 933     }
 934 #define CHECK_EXPIRED_NS_INTERVAL (2*60)
 935     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
 936   }
 937
 938   if (time_to_dump_geoip_stats < now) {
 939 #define DUMP_GEOIP_STATS_INTERVAL (60*60);
 940     if (time_to_dump_geoip_stats)
 941       dump_geoip_stats();
 942     time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
 943   }
 944
 945   /* Remove old information from rephist and the rend cache. */
 946   if (time_to_clean_caches < now) {
 947     rep_history_clean(now - options->RephistTrackTime);
 948     rend_cache_clean();
 949     rend_cache_clean_v2_descs_as_dir();
 950 #define CLEAN_CACHES_INTERVAL (30*60)
 951     time_to_clean_caches = now + CLEAN_CACHES_INTERVAL;
 952   }
 953
 954 #define RETRY_DNS_INTERVAL (10*60)
 955   /* If we're a server and initializing dns failed, retry periodically. */
 956   if (time_to_retry_dns_init < now) {
 957     time_to_retry_dns_init = now + RETRY_DNS_INTERVAL;
 958     if (server_mode(options) && has_dns_init_failed())
 959       dns_init();
 960   }
 961
 962   /** 2. Periodically, we consider force-uploading our descriptor
 963    * (if we've passed our internal checks). */
 964
 965 /** How often do we check whether part of our router info has changed in a way
 966  * that would require an upload? */
 967 #define CHECK_DESCRIPTOR_INTERVAL (60)
 968 /** How often do we (as a router) check whether our IP address has changed? */
 969 #define CHECK_IPADDRESS_INTERVAL (15*60)
 970
 971   /* 2b. Once per minute, regenerate and upload the descriptor if the old
 972    * one is inaccurate. */
 973   if (time_to_check_descriptor < now) {
 974     static int dirport_reachability_count = 0;
 975     time_to_check_descriptor = now + CHECK_DESCRIPTOR_INTERVAL;
 976     check_descriptor_bandwidth_changed(now);
 977     if (time_to_check_ipaddress < now) {
 978       time_to_check_ipaddress = now + CHECK_IPADDRESS_INTERVAL;
 979       check_descriptor_ipaddress_changed(now);
 980     }
 981 /** If our router descriptor ever goes this long without being regenerated
 982  * because something changed, we force an immediate regenerate-and-upload. */
 983 #define FORCE_REGENERATE_DESCRIPTOR_INTERVAL (18*60*60)
 984     mark_my_descriptor_dirty_if_older_than(
 985                                   now - FORCE_REGENERATE_DESCRIPTOR_INTERVAL);
 986     consider_publishable_server(0);
 987     /* also, check religiously for reachability, if it's within the first
 988      * 20 minutes of our uptime. */
 989     if (server_mode(options) &&
 990         (has_completed_circuit || !any_predicted_circuits(now)) &&
 991         !we_are_hibernating()) {
 992       if (stats_n_seconds_working < TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
 993         consider_testing_reachability(1, dirport_reachability_count==0);
 994         if (++dirport_reachability_count > 5)
 995           dirport_reachability_count = 0;
 996       } else if (time_to_recheck_bandwidth < now) {
 997         /* If we haven't checked for 12 hours and our bandwidth estimate is
 998          * low, do another bandwidth test. This is especially important for
 999          * bridges, since they might go long periods without much use. */
1000         routerinfo_t *me = router_get_my_routerinfo();
1001         if (time_to_recheck_bandwidth && me &&
1002             me->bandwidthcapacity < me->bandwidthrate &&
1003             me->bandwidthcapacity < 51200) {
1004           reset_bandwidth_test();
1005         }
1006 #define BANDWIDTH_RECHECK_INTERVAL (12*60*60)
1007         time_to_recheck_bandwidth = now + BANDWIDTH_RECHECK_INTERVAL;
1008       }
1009     }
1010
1011     /* If any networkstatus documents are no longer recent, we need to
1012      * update all the descriptors' running status. */
1013     /* purge obsolete entries */
1014     networkstatus_v2_list_clean(now);
1015     /* Remove dead routers. */
1016     routerlist_remove_old_routers();
1017
1018     /* Also, once per minute, check whether we want to download any
1019      * networkstatus documents.
1020      */
1021     update_networkstatus_downloads(now);
1022   }
1023
1024   /** 2c. Let directory voting happen. */
1025   if (authdir_mode_v3(options))
1026     dirvote_act(options, now);
1027
1028   /** 3a. Every second, we examine pending circuits and prune the
1029    *    ones which have been pending for more than a few seconds.
1030    *    We do this before step 4, so it can try building more if
1031    *    it's not comfortable with the number of available circuits.
1032    */
1033   circuit_expire_building(now);
1034
1035   /** 3b. Also look at pending streams and prune the ones that 'began'
1036    *     a long time ago but haven't gotten a 'connected' yet.
1037    *     Do this before step 4, so we can put them back into pending
1038    *     state to be picked up by the new circuit.
1039    */
1040   connection_ap_expire_beginning();
1041
1042   /** 3c. And expire connections that we've held open for too long.
1043    */
1044   connection_expire_held_open();
1045
1046   /** 3d. And every 60 seconds, we relaunch listeners if any died. */
1047   if (!we_are_hibernating() && time_to_check_listeners < now) {
1048     retry_all_listeners(NULL, NULL);
1049     time_to_check_listeners = now+60;
1050   }
1051
1052   /** 4. Every second, we try a new circuit if there are no valid
1053    *    circuits. Every NewCircuitPeriod seconds, we expire circuits
1054    *    that became dirty more than MaxCircuitDirtiness seconds ago,
1055    *    and we make a new circ if there are no clean circuits.
1056    */
1057   have_dir_info = router_have_minimum_dir_info();
1058   if (have_dir_info && !we_are_hibernating())
1059     circuit_build_needed_circs(now);
1060
1061   /** 5. We do housekeeping for each connection... */
1062   connection_or_set_bad_connections();
1063   for (i=0;i<smartlist_len(connection_array);i++) {
1064     run_connection_housekeeping(i, now);
1065   }
1066   if (time_to_shrink_memory < now) {
1067     SMARTLIST_FOREACH(connection_array, connection_t *, conn, {
1068         if (conn->outbuf)
1069           buf_shrink(conn->outbuf);
1070         if (conn->inbuf)
1071           buf_shrink(conn->inbuf);
1072       });
1073     clean_cell_pool();
1074     buf_shrink_freelists(0);
1075 /** How often do we check buffers and pools for empty space that can be
1076  * deallocated? */
1077 #define MEM_SHRINK_INTERVAL (60)
1078     time_to_shrink_memory = now + MEM_SHRINK_INTERVAL;
1079   }
1080
1081   /** 6. And remove any marked circuits... */
1082   circuit_close_all_marked();
1083
1084   /** 7. And upload service descriptors if necessary. */
1085   if (has_completed_circuit && !we_are_hibernating()) {
1086     rend_consider_services_upload(now);
1087     rend_consider_descriptor_republication();
1088   }
1089
1090   /** 8. and blow away any connections that need to die. have to do this now,
1091    * because if we marked a conn for close and left its socket -1, then
1092    * we'll pass it to poll/select and bad things will happen.
1093    */
1094   close_closeable_connections();
1095
1096   /** 8b. And if anything in our state is ready to get flushed to disk, we
1097    * flush it. */
1098   or_state_save(now);
1099
1100   /** 9. and if we're a server, check whether our DNS is telling stories to
1101    * us. */
1102   if (server_mode(options) && time_to_check_for_correct_dns < now) {
1103     if (!time_to_check_for_correct_dns) {
1104       time_to_check_for_correct_dns = now + 60 + crypto_rand_int(120);
1105     } else {
1106       dns_launch_correctness_checks();
1107       time_to_check_for_correct_dns = now + 12*3600 +
1108         crypto_rand_int(12*3600);
1109     }
1110   }
1111
1112   /** 10. write hidden service usage statistic to disk */
1113   if (options->HSAuthorityRecordStats && time_to_write_hs_statistics < now) {
1114     hs_usage_write_statistics_to_file(now);
1115 #define WRITE_HSUSAGE_INTERVAL (30*60)
1116     time_to_write_hs_statistics = now+WRITE_HSUSAGE_INTERVAL;
1117   }
1118   /** 10b. write bridge networkstatus file to disk */
1119   if (options->BridgeAuthoritativeDir &&
1120       time_to_write_bridge_status_file < now) {
1121     networkstatus_dump_bridge_status_to_file(now);
1122 #define BRIDGE_STATUSFILE_INTERVAL (30*60)
1123     time_to_write_bridge_status_file = now+BRIDGE_STATUSFILE_INTERVAL;
1124   }
1125 }
1126
1127 /** Libevent timer: used to invoke second_elapsed_callback() once per
1128  * second. */
1129 static struct event *timeout_event = NULL;
1130 /** Number of libevent errors in the last second: we die if we get too many. */
1131 static int n_libevent_errors = 0;
1132
1133 /** Libevent callback: invoked once every second. */
1134 static void
1135 second_elapsed_callback(int fd, short event, void *args)
1136 {
1137   /* XXXX This could be sensibly refactored into multiple callbacks, and we
1138    * could use libevent's timers for this rather than checking the current
1139    * time against a bunch of timeouts every second. */
1140   static struct timeval one_second;
1141   static long current_second = 0;
1142   time_t now;
1143   size_t bytes_written;
1144   size_t bytes_read;
1145   int seconds_elapsed;
1146   or_options_t *options = get_options();
1147   (void)fd;
1148   (void)event;
1149   (void)args;
1150   if (!timeout_event) {
1151     timeout_event = tor_malloc_zero(sizeof(struct event));
1152     evtimer_set(timeout_event, second_elapsed_callback, NULL);
1153     one_second.tv_sec = 1;
1154     one_second.tv_usec = 0;
1155   }
1156
1157   n_libevent_errors = 0;
1158
1159   /* log_fn(LOG_NOTICE, "Tick."); */
1160   now = time(NULL);
1161   update_approx_time(now);
1162
1163   /* the second has rolled over. check more stuff. */
1164   bytes_written = stats_prev_global_write_bucket - global_write_bucket;
1165   bytes_read = stats_prev_global_read_bucket - global_read_bucket;
1166   seconds_elapsed = current_second ? (int)(now - current_second) : 0;
1167   stats_n_bytes_read += bytes_read;
1168   stats_n_bytes_written += bytes_written;
1169   if (accounting_is_enabled(options) && seconds_elapsed >= 0)
1170     accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed);
1171   control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written);
1172   control_event_stream_bandwidth_used();
1173
1174   if (seconds_elapsed > 0)
1175     connection_bucket_refill(seconds_elapsed, now);
1176   stats_prev_global_read_bucket = global_read_bucket;
1177   stats_prev_global_write_bucket = global_write_bucket;
1178
1179   if (server_mode(options) &&
1180       !we_are_hibernating() &&
1181       seconds_elapsed > 0 &&
1182       has_completed_circuit &&
1183       stats_n_seconds_working / TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT !=
1184       (stats_n_seconds_working+seconds_elapsed) /
1185         TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT) {
1186     /* every 20 minutes, check and complain if necessary */
1187     routerinfo_t *me = router_get_my_routerinfo();
1188     if (me && !check_whether_orport_reachable())
1189       log_warn(LD_CONFIG,"Your server (%s:%d) has not managed to confirm that "
1190                "its ORPort is reachable. Please check your firewalls, ports, "
1191                "address, /etc/hosts file, etc.",
1192                me->address, me->or_port);
1193     if (me && !check_whether_dirport_reachable())
1194       log_warn(LD_CONFIG,
1195                "Your server (%s:%d) has not managed to confirm that its "
1196                "DirPort is reachable. Please check your firewalls, ports, "
1197                "address, /etc/hosts file, etc.",
1198                me->address, me->dir_port);
1199   }
1200
1201 /** If more than this many seconds have elapsed, probably the clock
1202  * jumped: doesn't count. */
1203 #define NUM_JUMPED_SECONDS_BEFORE_WARN 100
1204   if (seconds_elapsed < -NUM_JUMPED_SECONDS_BEFORE_WARN ||
1205       seconds_elapsed >= NUM_JUMPED_SECONDS_BEFORE_WARN) {
1206     circuit_note_clock_jumped(seconds_elapsed);
1207     /* XXX if the time jumps *back* many months, do our events in
1208      * run_scheduled_events() recover? I don't think they do. -RD */
1209   } else if (seconds_elapsed > 0)
1210     stats_n_seconds_working += seconds_elapsed;
1211
1212   run_scheduled_events(now);
1213
1214   current_second = now; /* remember which second it is, for next time */
1215
1216 #if 0
1217   if (current_second % 300 == 0) {
1218     rep_history_clean(current_second - options->RephistTrackTime);
1219     dumpmemusage(get_min_log_level()<LOG_INFO ?
1220                  get_min_log_level() : LOG_INFO);
1221   }
1222 #endif
1223
1224   if (evtimer_add(timeout_event, &one_second))
1225     log_err(LD_NET,
1226             "Error from libevent when setting one-second timeout event");
1227 }
1228
1229 #ifndef MS_WINDOWS
1230 /** Called when a possibly ignorable libevent error occurs; ensures that we
1231  * don't get into an infinite loop by ignoring too many errors from
1232  * libevent. */
1233 static int
1234 got_libevent_error(void)
1235 {
1236   if (++n_libevent_errors > 8) {
1237     log_err(LD_NET, "Too many libevent errors in one second; dying");
1238     return -1;
1239   }
1240   return 0;
1241 }
1242 #endif
1243
1244 #define UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST (6*60*60)
1245
1246 /** Called when our IP address seems to have changed. <b>at_interface</b>
1247  * should be true if we detected a change in our interface, and false if we
1248  * detected a change in our published address. */
1249 void
1250 ip_address_changed(int at_interface)
1251 {
1252   int server = server_mode(get_options());
1253
1254   if (at_interface) {
1255     if (! server) {
1256       /* Okay, change our keys. */
1257       init_keys();
1258     }
1259   } else {
1260     if (server) {
1261       if (stats_n_seconds_working > UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST)
1262         reset_bandwidth_test();
1263       stats_n_seconds_working = 0;
1264       router_reset_reachability();
1265       mark_my_descriptor_dirty();
1266     }
1267   }
1268
1269   dns_servers_relaunch_checks();
1270 }
1271
1272 /** Forget what we've learned about the correctness of our DNS servers, and
1273  * start learning again. */
1274 void
1275 dns_servers_relaunch_checks(void)
1276 {
1277   if (server_mode(get_options())) {
1278     dns_reset_correctness_checks();
1279     time_to_check_for_correct_dns = 0;
1280   }
1281 }
1282
1283 /** Called when we get a SIGHUP: reload configuration files and keys,
1284  * retry all connections, and so on. */
1285 static int
1286 do_hup(void)
1287 {
1288   or_options_t *options = get_options();
1289
1290 #ifdef USE_DMALLOC
1291   dmalloc_log_stats();
1292   dmalloc_log_changed(0, 1, 0, 0);
1293 #endif
1294
1295   log_notice(LD_GENERAL,"Received reload signal (hup). Reloading config and "
1296              "resetting internal state.");
1297   if (accounting_is_enabled(options))
1298     accounting_record_bandwidth_usage(time(NULL), get_or_state());
1299
1300   router_reset_warnings();
1301   routerlist_reset_warnings();
1302   addressmap_clear_transient();
1303   /* first, reload config variables, in case they've changed */
1304   if (options->ReloadTorrcOnSIGHUP) {
1305     /* no need to provide argc/v, they've been cached in init_from_config */
1306     if (options_init_from_torrc(0, NULL) < 0) {
1307       log_err(LD_CONFIG,"Reading config failed--see warnings above. "
1308               "For usage, try -h.");
1309       return -1;
1310     }
1311     options = get_options(); /* they have changed now */
1312   } else {
1313     log_notice(LD_GENERAL, "Not reloading config file: the controller told "
1314                "us not to.");
1315   }
1316   if (authdir_mode_handles_descs(options, -1)) {
1317     /* reload the approved-routers file */
1318     if (dirserv_load_fingerprint_file() < 0) {
1319       /* warnings are logged from dirserv_load_fingerprint_file() directly */
1320       log_info(LD_GENERAL, "Error reloading fingerprints. "
1321                "Continuing with old list.");
1322     }
1323   }
1324
1325   /* Rotate away from the old dirty circuits. This has to be done
1326    * after we've read the new options, but before we start using
1327    * circuits for directory fetches. */
1328   circuit_expire_all_dirty_circs();
1329
1330   /* retry appropriate downloads */
1331   router_reset_status_download_failures();
1332   router_reset_descriptor_download_failures();
1333   update_networkstatus_downloads(time(NULL));
1334
1335   /* We'll retry routerstatus downloads in about 10 seconds; no need to
1336    * force a retry there. */
1337
1338   if (server_mode(options)) {
1339     /* Restart cpuworker and dnsworker processes, so they get up-to-date
1340      * configuration options. */
1341     cpuworkers_rotate();
1342     dns_reset();
1343   }
1344   return 0;
1345 }
1346
1347 /** Tor main loop. */
1348 /* static */ int
1349 do_main_loop(void)
1350 {
1351   int loop_result;
1352   time_t now;
1353
1354   /* initialize dns resolve map, spawn workers if needed */
1355   if (dns_init() < 0) {
1356     if (get_options()->ServerDNSAllowBrokenConfig)
1357       log_warn(LD_GENERAL, "Couldn't set up any working nameservers. "
1358                "Network not up yet?  Will try again soon.");
1359     else {
1360       log_err(LD_GENERAL,"Error initializing dns subsystem; exiting.  To "
1361               "retry instead, set the ServerDNSAllowBrokenResolvConf option.");
1362     }
1363   }
1364
1365   handle_signals(1);
1366
1367   /* load the private keys, if we're supposed to have them, and set up the
1368    * TLS context. */
1369   if (! identity_key_is_set()) {
1370     if (init_keys() < 0) {
1371       log_err(LD_BUG,"Error initializing keys; exiting");
1372       return -1;
1373     }
1374   }
1375
1376   /* Set up the packed_cell_t memory pool. */
1377   init_cell_pool();
1378
1379   /* Set up our buckets */
1380   connection_bucket_init();
1381   stats_prev_global_read_bucket = global_read_bucket;
1382   stats_prev_global_write_bucket = global_write_bucket;
1383
1384   /* initialize the bootstrap status events to know we're starting up */
1385   control_event_bootstrap(BOOTSTRAP_STATUS_STARTING, 0);
1386
1387   if (trusted_dirs_reload_certs())
1388     return -1;
1389   if (router_reload_v2_networkstatus()) {
1390     return -1;
1391   }
1392   if (router_reload_consensus_networkstatus()) {
1393     return -1;
1394   }
1395   /* load the routers file, or assign the defaults. */
1396   if (router_reload_router_list()) {
1397     return -1;
1398   }
1399   /* load the networkstatuses. (This launches a download for new routers as
1400    * appropriate.)
1401    */
1402   now = time(NULL);
1403   directory_info_has_arrived(now, 1);
1404
1405   if (authdir_mode_tests_reachability(get_options())) {
1406     /* the directory is already here, run startup things */
1407     dirserv_test_reachability(now, 1);
1408   }
1409
1410   if (server_mode(get_options())) {
1411     /* launch cpuworkers. Need to do this *after* we've read the onion key. */
1412     cpu_init();
1413   }
1414
1415   /* set up once-a-second callback. */
1416   second_elapsed_callback(0,0,NULL);
1417
1418   for (;;) {
1419     if (nt_service_is_stopping())
1420       return 0;
1421
1422 #ifndef MS_WINDOWS
1423     /* Make it easier to tell whether libevent failure is our fault or not. */
1424     errno = 0;
1425 #endif
1426     /* All active linked conns should get their read events activated. */
1427     SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn,
1428                       event_active(conn->read_event, EV_READ, 1));
1429     called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0;
1430
1431     update_approx_time(time(NULL));
1432
1433     /* poll until we have an event, or the second ends, or until we have
1434      * some active linked connections to trigger events for. */
1435     loop_result = event_loop(called_loop_once ? EVLOOP_ONCE : 0);
1436
1437     /* let catch() handle things like ^c, and otherwise don't worry about it */
1438     if (loop_result < 0) {
1439       int e = tor_socket_errno(-1);
1440       /* let the program survive things like ^z */
1441       if (e != EINTR && !ERRNO_IS_EINPROGRESS(e)) {
1442 #ifdef HAVE_EVENT_GET_METHOD
1443         log_err(LD_NET,"libevent call with %s failed: %s [%d]",
1444                 event_get_method(), tor_socket_strerror(e), e);
1445 #else
1446         log_err(LD_NET,"libevent call failed: %s [%d]",
1447                 tor_socket_strerror(e), e);
1448 #endif
1449         return -1;
1450 #ifndef MS_WINDOWS
1451       } else if (e == EINVAL) {
1452         log_warn(LD_NET, "EINVAL from libevent: should you upgrade libevent?");
1453         if (got_libevent_error())
1454           return -1;
1455 #endif
1456       } else {
1457         if (ERRNO_IS_EINPROGRESS(e))
1458           log_warn(LD_BUG,
1459                    "libevent call returned EINPROGRESS? Please report.");
1460         log_debug(LD_NET,"libevent call interrupted.");
1461         /* You can't trust the results of this poll(). Go back to the
1462          * top of the big for loop. */
1463         continue;
1464       }
1465     }
1466   }
1467 }
1468
1469 /** Used to implement the SIGNAL control command: if we accept
1470  * <b>the_signal</b> as a remote pseudo-signal, act on it. */
1471 /* We don't re-use catch() here because:
1472  *   1. We handle a different set of signals than those allowed in catch.
1473  *   2. Platforms without signal() are unlikely to define SIGfoo.
1474  *   3. The control spec is defined to use fixed numeric signal values
1475  *      which just happen to match the unix values.
1476  */
1477 void
1478 control_signal_act(int the_signal)
1479 {
1480   switch (the_signal)
1481     {
1482     case 1:
1483       signal_callback(0,0,(void*)(uintptr_t)SIGHUP);
1484       break;
1485     case 2:
1486       signal_callback(0,0,(void*)(uintptr_t)SIGINT);
1487       break;
1488     case 10:
1489       signal_callback(0,0,(void*)(uintptr_t)SIGUSR1);
1490       break;
1491     case 12:
1492       signal_callback(0,0,(void*)(uintptr_t)SIGUSR2);
1493       break;
1494     case 15:
1495       signal_callback(0,0,(void*)(uintptr_t)SIGTERM);
1496       break;
1497     case SIGNEWNYM:
1498       signal_callback(0,0,(void*)(uintptr_t)SIGNEWNYM);
1499       break;
1500     case SIGCLEARDNSCACHE:
1501       signal_callback(0,0,(void*)(uintptr_t)SIGCLEARDNSCACHE);
1502       break;
1503     default:
1504       log_warn(LD_BUG, "Unrecognized signal number %d.", the_signal);
1505       break;
1506     }
1507 }
1508
1509 /** Libevent callback: invoked when we get a signal.
1510  */
1511 static void
1512 signal_callback(int fd, short events, void *arg)
1513 {
1514   uintptr_t sig = (uintptr_t)arg;
1515   (void)fd;
1516   (void)events;
1517   switch (sig)
1518     {
1519     case SIGTERM:
1520       log_notice(LD_GENERAL,"Catching signal TERM, exiting cleanly.");
1521       tor_cleanup();
1522       exit(0);
1523       break;
1524     case SIGINT:
1525       if (!server_mode(get_options())) { /* do it now */
1526         log_notice(LD_GENERAL,"Interrupt: exiting cleanly.");
1527         tor_cleanup();
1528         exit(0);
1529       }
1530       hibernate_begin_shutdown();
1531       break;
1532 #ifdef SIGPIPE
1533     case SIGPIPE:
1534       log_debug(LD_GENERAL,"Caught sigpipe. Ignoring.");
1535       break;
1536 #endif
1537     case SIGUSR1:
1538       /* prefer to log it at INFO, but make sure we always see it */
1539       dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO);
1540       break;
1541     case SIGUSR2:
1542       switch_logs_debug();
1543       log_debug(LD_GENERAL,"Caught USR2, going to loglevel debug. "
1544                 "Send HUP to change back.");
1545       break;
1546     case SIGHUP:
1547       if (do_hup() < 0) {
1548         log_warn(LD_CONFIG,"Restart failed (config error?). Exiting.");
1549         tor_cleanup();
1550         exit(1);
1551       }
1552       break;
1553 #ifdef SIGCHLD
1554     case SIGCHLD:
1555       while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more
1556                                                 zombies */
1557       break;
1558 #endif
1559     case SIGNEWNYM: {
1560       time_t now = time(NULL);
1561       if (time_of_last_signewnym + MAX_SIGNEWNYM_RATE > now) {
1562         signewnym_is_pending = 1;
1563         log(LOG_NOTICE, LD_CONTROL,
1564             "Rate limiting NEWNYM request: delaying by %d second(s)",
1565             (int)(MAX_SIGNEWNYM_RATE+time_of_last_signewnym-now));
1566       } else {
1567         signewnym_impl(now);
1568       }
1569       break;
1570     }
1571     case SIGCLEARDNSCACHE:
1572       addressmap_clear_transient();
1573       break;
1574   }
1575 }
1576
1577 extern uint64_t rephist_total_alloc;
1578 extern uint32_t rephist_total_num;
1579
1580 /**
1581  * Write current memory usage information to the log.
1582  */
1583 static void
1584 dumpmemusage(int severity)
1585 {
1586   connection_dump_buffer_mem_stats(severity);
1587   log(severity, LD_GENERAL, "In rephist: "U64_FORMAT" used by %d Tors.",
1588       U64_PRINTF_ARG(rephist_total_alloc), rephist_total_num);
1589   dump_routerlist_mem_usage(severity);
1590   dump_cell_pool_usage(severity);
1591   buf_dump_freelist_sizes(severity);
1592   tor_log_mallinfo(severity);
1593 }
1594
1595 /** Write all statistics to the log, with log level 'severity'.  Called
1596  * in response to a SIGUSR1. */
1597 static void
1598 dumpstats(int severity)
1599 {
1600   time_t now = time(NULL);
1601   time_t elapsed;
1602   int rbuf_cap, wbuf_cap, rbuf_len, wbuf_len;
1603
1604   log(severity, LD_GENERAL, "Dumping stats:");
1605
1606   SMARTLIST_FOREACH(connection_array, connection_t *, conn,
1607   {
1608     int i = conn_sl_idx;
1609     log(severity, LD_GENERAL,
1610         "Conn %d (socket %d) type %d (%s), state %d (%s), created %d secs ago",
1611         i, conn->s, conn->type, conn_type_to_string(conn->type),
1612         conn->state, conn_state_to_string(conn->type, conn->state),
1613         (int)(now - conn->timestamp_created));
1614     if (!connection_is_listener(conn)) {
1615       log(severity,LD_GENERAL,
1616           "Conn %d is to %s:%d.", i,
1617           safe_str(conn->address), conn->port);
1618       log(severity,LD_GENERAL,
1619           "Conn %d: %d bytes waiting on inbuf (len %d, last read %d secs ago)",
1620           i,
1621           (int)buf_datalen(conn->inbuf),
1622           (int)buf_allocation(conn->inbuf),
1623           (int)(now - conn->timestamp_lastread));
1624       log(severity,LD_GENERAL,
1625           "Conn %d: %d bytes waiting on outbuf "
1626           "(len %d, last written %d secs ago)",i,
1627           (int)buf_datalen(conn->outbuf),
1628           (int)buf_allocation(conn->outbuf),
1629           (int)(now - conn->timestamp_lastwritten));
1630       if (conn->type == CONN_TYPE_OR) {
1631         or_connection_t *or_conn = TO_OR_CONN(conn);
1632         if (or_conn->tls) {
1633           tor_tls_get_buffer_sizes(or_conn->tls, &rbuf_cap, &rbuf_len,
1634                                    &wbuf_cap, &wbuf_len);
1635           log(severity, LD_GENERAL,
1636               "Conn %d: %d/%d bytes used on openssl read buffer; "
1637               "%d/%d bytes used on write buffer.",
1638               i, rbuf_len, rbuf_cap, wbuf_len, wbuf_cap);
1639         }
1640       }
1641     }
1642     circuit_dump_by_conn(conn, severity); /* dump info about all the circuits
1643                                            * using this conn */
1644   });
1645   log(severity, LD_NET,
1646       "Cells processed: "U64_FORMAT" padding\n"
1647       "                 "U64_FORMAT" create\n"
1648       "                 "U64_FORMAT" created\n"
1649       "                 "U64_FORMAT" relay\n"
1650       "                        ("U64_FORMAT" relayed)\n"
1651       "                        ("U64_FORMAT" delivered)\n"
1652       "                 "U64_FORMAT" destroy",
1653       U64_PRINTF_ARG(stats_n_padding_cells_processed),
1654       U64_PRINTF_ARG(stats_n_create_cells_processed),
1655       U64_PRINTF_ARG(stats_n_created_cells_processed),
1656       U64_PRINTF_ARG(stats_n_relay_cells_processed),
1657       U64_PRINTF_ARG(stats_n_relay_cells_relayed),
1658       U64_PRINTF_ARG(stats_n_relay_cells_delivered),
1659       U64_PRINTF_ARG(stats_n_destroy_cells_processed));
1660   if (stats_n_data_cells_packaged)
1661     log(severity,LD_NET,"Average packaged cell fullness: %2.3f%%",
1662         100*(U64_TO_DBL(stats_n_data_bytes_packaged) /
1663              U64_TO_DBL(stats_n_data_cells_packaged*RELAY_PAYLOAD_SIZE)) );
1664   if (stats_n_data_cells_received)
1665     log(severity,LD_NET,"Average delivered cell fullness: %2.3f%%",
1666         100*(U64_TO_DBL(stats_n_data_bytes_received) /
1667              U64_TO_DBL(stats_n_data_cells_received*RELAY_PAYLOAD_SIZE)) );
1668
1669   if (now - time_of_process_start >= 0)
1670     elapsed = now - time_of_process_start;
1671   else
1672     elapsed = 0;
1673
1674   if (elapsed) {
1675     log(severity, LD_NET,
1676         "Average bandwidth: "U64_FORMAT"/%d = %d bytes/sec reading",
1677         U64_PRINTF_ARG(stats_n_bytes_read),
1678         (int)elapsed,
1679         (int) (stats_n_bytes_read/elapsed));
1680     log(severity, LD_NET,
1681         "Average bandwidth: "U64_FORMAT"/%d = %d bytes/sec writing",
1682         U64_PRINTF_ARG(stats_n_bytes_written),
1683         (int)elapsed,
1684         (int) (stats_n_bytes_written/elapsed));
1685   }
1686
1687   log(severity, LD_NET, "--------------- Dumping memory information:");
1688   dumpmemusage(severity);
1689
1690   rep_hist_dump_stats(now,severity);
1691   rend_service_dump_stats(severity);
1692   dump_pk_ops(severity);
1693   dump_distinct_digest_count(severity);
1694 }
1695
1696 /** Called by exit() as we shut down the process.
1697  */
1698 static void
1699 exit_function(void)
1700 {
1701   /* NOTE: If we ever daemonize, this gets called immediately.  That's
1702    * okay for now, because we only use this on Windows.  */
1703 #ifdef MS_WINDOWS
1704   WSACleanup();
1705 #endif
1706 }
1707
1708 /** Set up the signal handlers for either parent or child. */
1709 void
1710 handle_signals(int is_parent)
1711 {
1712 #ifndef MS_WINDOWS /* do signal stuff only on unix */
1713   int i;
1714   static int signals[] = {
1715     SIGINT,  /* do a controlled slow shutdown */
1716     SIGTERM, /* to terminate now */
1717     SIGPIPE, /* otherwise sigpipe kills us */
1718     SIGUSR1, /* dump stats */
1719     SIGUSR2, /* go to loglevel debug */
1720     SIGHUP,  /* to reload config, retry conns, etc */
1721 #ifdef SIGXFSZ
1722     SIGXFSZ, /* handle file-too-big resource exhaustion */
1723 #endif
1724     SIGCHLD, /* handle dns/cpu workers that exit */
1725     -1 };
1726   static struct event signal_events[16]; /* bigger than it has to be. */
1727   if (is_parent) {
1728     for (i = 0; signals[i] >= 0; ++i) {
1729       signal_set(&signal_events[i], signals[i], signal_callback,
1730                  (void*)(uintptr_t)signals[i]);
1731       if (signal_add(&signal_events[i], NULL))
1732         log_warn(LD_BUG, "Error from libevent when adding event for signal %d",
1733                  signals[i]);
1734     }
1735   } else {
1736     struct sigaction action;
1737     action.sa_flags = 0;
1738     sigemptyset(&action.sa_mask);
1739     action.sa_handler = SIG_IGN;
1740     sigaction(SIGINT,  &action, NULL);
1741     sigaction(SIGTERM, &action, NULL);
1742     sigaction(SIGPIPE, &action, NULL);
1743     sigaction(SIGUSR1, &action, NULL);
1744     sigaction(SIGUSR2, &action, NULL);
1745     sigaction(SIGHUP,  &action, NULL);
1746 #ifdef SIGXFSZ
1747     sigaction(SIGXFSZ, &action, NULL);
1748 #endif
1749   }
1750 #else /* MS windows */
1751   (void)is_parent;
1752 #endif /* signal stuff */
1753 }
1754
1755 /** Main entry point for the Tor command-line client.
1756  */
1757 /* static */ int
1758 tor_init(int argc, char *argv[])
1759 {
1760   char buf[256];
1761   int i, quiet = 0;
1762   time_of_process_start = time(NULL);
1763   if (!connection_array)
1764     connection_array = smartlist_create();
1765   if (!closeable_connection_lst)
1766     closeable_connection_lst = smartlist_create();
1767   if (!active_linked_connection_lst)
1768     active_linked_connection_lst = smartlist_create();
1769   /* Have the log set up with our application name. */
1770   tor_snprintf(buf, sizeof(buf), "Tor %s", get_version());
1771   log_set_application_name(buf);
1772   /* Initialize the history structures. */
1773   rep_hist_init();
1774   /* Initialize the service cache. */
1775   rend_cache_init();
1776   addressmap_init(); /* Init the client dns cache. Do it always, since it's
1777                       * cheap. */
1778
1779   /* We search for the "quiet" option first, since it decides whether we
1780    * will log anything at all to the command line. */
1781   for (i=1;i<argc;++i) {
1782     if (!strcmp(argv[i], "--hush"))
1783       quiet = 1;
1784     if (!strcmp(argv[i], "--quiet"))
1785       quiet = 2;
1786   }
1787  /* give it somewhere to log to initially */
1788   switch (quiet) {
1789     case 2:
1790       /* no initial logging */
1791       break;
1792     case 1:
1793       add_temp_log(LOG_WARN);
1794       break;
1795     default:
1796       add_temp_log(LOG_NOTICE);
1797   }
1798
1799   log(LOG_NOTICE, LD_GENERAL, "Tor v%s. This is experimental software. "
1800       "Do not rely on it for strong anonymity. (Running on %s)",get_version(),
1801       get_uname());
1802
1803   if (network_init()<0) {
1804     log_err(LD_BUG,"Error initializing network; exiting.");
1805     return -1;
1806   }
1807   atexit(exit_function);
1808
1809   if (options_init_from_torrc(argc,argv) < 0) {
1810     log_err(LD_CONFIG,"Reading config failed--see warnings above.");
1811     return -1;
1812   }
1813
1814 #ifndef MS_WINDOWS
1815   if (geteuid()==0)
1816     log_warn(LD_GENERAL,"You are running Tor as root. You don't need to, "
1817              "and you probably shouldn't.");
1818 #endif
1819
1820   crypto_global_init(get_options()->HardwareAccel);
1821   if (crypto_seed_rng(1)) {
1822     log_err(LD_BUG, "Unable to seed random number generator. Exiting.");
1823     return -1;
1824   }
1825
1826   return 0;
1827 }
1828
1829 /** A lockfile structure, used to prevent two Tors from messing with the
1830  * data directory at once.  If this variable is non-NULL, we're holding
1831  * the lockfile. */
1832 static tor_lockfile_t *lockfile = NULL;
1833
1834 /** Try to grab the lock file described in <b>options</b>, if we do not
1835  * already have it.  If <b>err_if_locked</b> is true, warn if somebody else is
1836  * holding the lock, and exit if we can't get it after waiting.  Otherwise,
1837  * return -1 if we can't get the lockfile.  Return 0 on success.
1838  */
1839 int
1840 try_locking(or_options_t *options, int err_if_locked)
1841 {
1842   if (lockfile)
1843     return 0;
1844   else {
1845     char *fname = options_get_datadir_fname2_suffix(options, "lock",NULL,NULL);
1846     int already_locked = 0;
1847     tor_lockfile_t *lf = tor_lockfile_lock(fname, 0, &already_locked);
1848     tor_free(fname);
1849     if (!lf) {
1850       if (err_if_locked && already_locked) {
1851         int r;
1852         log_warn(LD_GENERAL, "It looks like another Tor process is running "
1853                  "with the same data directory.  Waiting 5 seconds to see "
1854                  "if it goes away.");
1855 #ifndef WIN32
1856         sleep(5);
1857 #else
1858         Sleep(5000);
1859 #endif
1860         r = try_locking(options, 0);
1861         if (r<0) {
1862           log_err(LD_GENERAL, "No, it's still there.  Exiting.");
1863           exit(0);
1864         }
1865         return r;
1866       }
1867       return -1;
1868     }
1869     lockfile = lf;
1870     return 0;
1871   }
1872 }
1873
1874 /** Return true iff we've successfully acquired the lock file. */
1875 int
1876 have_lockfile(void)
1877 {
1878   return lockfile != NULL;
1879 }
1880
1881 /** If we have successfully acquired the lock file, release it. */
1882 void
1883 release_lockfile(void)
1884 {
1885   if (lockfile) {
1886     tor_lockfile_unlock(lockfile);
1887     lockfile = NULL;
1888   }
1889 }
1890
1891 /** Free all memory that we might have allocated somewhere.
1892  * If <b>postfork</b>, we are a worker process and we want to free
1893  * only the parts of memory that we won't touch. If !<b>postfork</b>,
1894  * Tor is shutting down and we should free everything.
1895  *
1896  * Helps us find the real leaks with dmalloc and the like. Also valgrind
1897  * should then report 0 reachable in its leak report (in an ideal world --
1898  * in practice libevent, ssl, libc etc never quite free everything). */
1899 void
1900 tor_free_all(int postfork)
1901 {
1902   if (!postfork) {
1903     evdns_shutdown(1);
1904   }
1905   geoip_free_all();
1906   dirvote_free_all();
1907   routerlist_free_all();
1908   networkstatus_free_all();
1909   addressmap_free_all();
1910   dirserv_free_all();
1911   rend_service_free_all();
1912   rend_cache_free_all();
1913   rend_service_authorization_free_all();
1914   rep_hist_free_all();
1915   hs_usage_free_all();
1916   dns_free_all();
1917   clear_pending_onions();
1918   circuit_free_all();
1919   entry_guards_free_all();
1920   connection_free_all();
1921   buf_shrink_freelists(1);
1922   memarea_clear_freelist();
1923   if (!postfork) {
1924     config_free_all();
1925     router_free_all();
1926     policies_free_all();
1927   }
1928   free_cell_pool();
1929   if (!postfork) {
1930     tor_tls_free_all();
1931   }
1932   /* stuff in main.c */
1933   if (connection_array)
1934     smartlist_free(connection_array);
1935   if (closeable_connection_lst)
1936     smartlist_free(closeable_connection_lst);
1937   if (active_linked_connection_lst)
1938     smartlist_free(active_linked_connection_lst);
1939   tor_free(timeout_event);
1940   if (!postfork) {
1941     release_lockfile();
1942   }
1943   /* Stuff in util.c and address.c*/
1944   if (!postfork) {
1945     escaped(NULL);
1946     esc_router_info(NULL);
1947     logs_free_all(); /* free log strings. do this last so logs keep working. */
1948   }
1949 }
1950
1951 /** Do whatever cleanup is necessary before shutting Tor down. */
1952 void
1953 tor_cleanup(void)
1954 {
1955   or_options_t *options = get_options();
1956   /* Remove our pid file. We don't care if there was an error when we
1957    * unlink, nothing we could do about it anyways. */
1958   if (options->command == CMD_RUN_TOR) {
1959     if (options->PidFile)
1960       unlink(options->PidFile);
1961     if (accounting_is_enabled(options))
1962       accounting_record_bandwidth_usage(time(NULL), get_or_state());
1963     or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
1964     or_state_save(time(NULL));
1965     if (authdir_mode_tests_reachability(options))
1966       rep_hist_record_mtbf_data();
1967   }
1968 #ifdef USE_DMALLOC
1969   dmalloc_log_stats();
1970 #endif
1971   tor_free_all(0); /* We could move tor_free_all back into the ifdef below
1972                       later, if it makes shutdown unacceptably slow.  But for
1973                       now, leave it here: it's helped us catch bugs in the
1974                       past. */
1975   crypto_global_cleanup();
1976 #ifdef USE_DMALLOC
1977   dmalloc_log_unfreed();
1978   dmalloc_shutdown();
1979 #endif
1980 }
1981
1982 /** Read/create keys as needed, and echo our fingerprint to stdout. */
1983 /* static */ int
1984 do_list_fingerprint(void)
1985 {
1986   char buf[FINGERPRINT_LEN+1];
1987   crypto_pk_env_t *k;
1988   const char *nickname = get_options()->Nickname;
1989   if (!server_mode(get_options())) {
1990     log_err(LD_GENERAL,
1991             "Clients don't have long-term identity keys. Exiting.\n");
1992     return -1;
1993   }
1994   tor_assert(nickname);
1995   if (init_keys() < 0) {
1996     log_err(LD_BUG,"Error initializing keys; can't display fingerprint");
1997     return -1;
1998   }
1999   if (!(k = get_identity_key())) {
2000     log_err(LD_GENERAL,"Error: missing identity key.");
2001     return -1;
2002   }
2003   if (crypto_pk_get_fingerprint(k, buf, 1)<0) {
2004     log_err(LD_BUG, "Error computing fingerprint");
2005     return -1;
2006   }
2007   printf("%s %s\n", nickname, buf);
2008   return 0;
2009 }
2010
2011 /** Entry point for password hashing: take the desired password from
2012  * the command line, and print its salted hash to stdout. **/
2013 /* static */ void
2014 do_hash_password(void)
2015 {
2016
2017   char output[256];
2018   char key[S2K_SPECIFIER_LEN+DIGEST_LEN];
2019
2020   crypto_rand(key, S2K_SPECIFIER_LEN-1);
2021   key[S2K_SPECIFIER_LEN-1] = (uint8_t)96; /* Hash 64 K of data. */
2022   secret_to_key(key+S2K_SPECIFIER_LEN, DIGEST_LEN,
2023                 get_options()->command_arg, strlen(get_options()->command_arg),
2024                 key);
2025   base16_encode(output, sizeof(output), key, sizeof(key));
2026   printf("16:%s\n",output);
2027 }
2028
2029 /** Main entry point for the Tor process.  Called from main(). */
2030 /* This function is distinct from main() only so we can link main.c into
2031  * the unittest binary without conflicting with the unittests' main. */
2032 int
2033 tor_main(int argc, char *argv[])
2034 {
2035   int result = 0;
2036   update_approx_time(time(NULL));
2037   tor_threads_init();
2038   init_logging();
2039 #ifdef USE_DMALLOC
2040   {
2041     /* Instruct OpenSSL to use our internal wrappers for malloc,
2042        realloc and free. */
2043     int r = CRYPTO_set_mem_ex_functions(_tor_malloc, _tor_realloc, _tor_free);
2044     tor_assert(r);
2045   }
2046 #endif
2047 #ifdef NT_SERVICE
2048   {
2049      int done = 0;
2050      result = nt_service_parse_options(argc, argv, &done);
2051      if (done) return result;
2052   }
2053 #endif
2054   if (tor_init(argc, argv)<0)
2055     return -1;
2056   switch (get_options()->command) {
2057   case CMD_RUN_TOR:
2058 #ifdef NT_SERVICE
2059     nt_service_set_state(SERVICE_RUNNING);
2060 #endif
2061     result = do_main_loop();
2062     break;
2063   case CMD_LIST_FINGERPRINT:
2064     result = do_list_fingerprint();
2065     break;
2066   case CMD_HASH_PASSWORD:
2067     do_hash_password();
2068     result = 0;
2069     break;
2070   case CMD_VERIFY_CONFIG:
2071     printf("Configuration was valid\n");
2072     result = 0;
2073     break;
2074   case CMD_RUN_UNITTESTS: /* only set by test.c */
2075   default:
2076     log_warn(LD_BUG,"Illegal command number %d: internal error.",
2077              get_options()->command);
2078     result = -1;
2079   }
2080   tor_cleanup();
2081   return result;
2082 }
2083