src/or/buffers.c

   1 /* Copyright (c) 2001 Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2011, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file buffers.c
   9  * \brief Implements a generic interface buffer.  Buffers are
  10  * fairly opaque string holders that can read to or flush from:
  11  * memory, file descriptors, or TLS connections.
  12  **/
  13 #define BUFFERS_PRIVATE
  14 #include "or.h"
  15 #ifdef HAVE_UNISTD_H
  16 #include <unistd.h>
  17 #endif
  18 #ifdef HAVE_SYS_UIO_H
  19 #include <sys/uio.h>
  20 #endif
  21
  22 //#define PARANOIA
  23
  24 #ifdef PARANOIA
  25 /** Helper: If PARANOIA is defined, assert that the buffer in local variable
  26  * <b>buf</b> is well-formed. */
  27 #define check() STMT_BEGIN assert_buf_ok(buf); STMT_END
  28 #else
  29 #define check() STMT_NIL
  30 #endif
  31
  32 /* Implementation notes:
  33  *
  34  * After flirting with memmove, and dallying with ring-buffers, we're finally
  35  * getting up to speed with the 1970s and implementing buffers as a linked
  36  * list of small chunks.  Each buffer has such a list; data is removed from
  37  * the head of the list, and added at the tail.  The list is singly linked,
  38  * and the buffer keeps a pointer to the head and the tail.
  39  *
  40  * Every chunk, except the tail, contains at least one byte of data.  Data in
  41  * each chunk is contiguous.
  42  *
  43  * When you need to treat the first N characters on a buffer as a contiguous
  44  * string, use the buf_pullup function to make them so.  Don't do this more
  45  * than necessary.
  46  *
  47  * The major free Unix kernels have handled buffers like this since, like,
  48  * forever.
  49  */
  50
  51 /* Chunk manipulation functions */
  52
  53 /** A single chunk on a buffer or in a freelist. */
  54 typedef struct chunk_t {
  55   struct chunk_t *next; /**< The next chunk on the buffer or freelist. */
  56   size_t datalen; /**< The number of bytes stored in this chunk */
  57   size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */
  58   char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */
  59   char mem[1]; /**< The actual memory used for storage in this chunk. May be
  60                 * more than one byte long. */
  61 } chunk_t;
  62
  63 #define CHUNK_HEADER_LEN STRUCT_OFFSET(chunk_t, mem[0])
  64
  65 /** Return the number of bytes needed to allocate a chunk to hold
  66  * <b>memlen</b> bytes. */
  67 #define CHUNK_ALLOC_SIZE(memlen) (CHUNK_HEADER_LEN + (memlen))
  68 /** Return the number of usable bytes in a chunk allocated with
  69  * malloc(<b>memlen</b>). */
  70 #define CHUNK_SIZE_WITH_ALLOC(memlen) ((memlen) - CHUNK_HEADER_LEN)
  71
  72 /** Return the next character in <b>chunk</b> onto which data can be appended.
  73  * If the chunk is full, this might be off the end of chunk->mem. */
  74 static INLINE char *
  75 CHUNK_WRITE_PTR(chunk_t *chunk)
  76 {
  77   return chunk->data + chunk->datalen;
  78 }
  79
  80 /** Return the number of bytes that can be written onto <b>chunk</b> without
  81  * running out of space. */
  82 static INLINE size_t
  83 CHUNK_REMAINING_CAPACITY(const chunk_t *chunk)
  84 {
  85   return (chunk->mem + chunk->memlen) - (chunk->data + chunk->datalen);
  86 }
  87
  88 /** Move all bytes stored in <b>chunk</b> to the front of <b>chunk</b>->mem,
  89  * to free up space at the end. */
  90 static INLINE void
  91 chunk_repack(chunk_t *chunk)
  92 {
  93   if (chunk->datalen && chunk->data != &chunk->mem[0]) {
  94     memmove(chunk->mem, chunk->data, chunk->datalen);
  95   }
  96   chunk->data = &chunk->mem[0];
  97 }
  98
  99 #ifdef ENABLE_BUF_FREELISTS
 100 /** A freelist of chunks. */
 101 typedef struct chunk_freelist_t {
 102   size_t alloc_size; /**< What size chunks does this freelist hold? */
 103   int max_length; /**< Never allow more than this number of chunks in the
 104                    * freelist. */
 105   int slack; /**< When trimming the freelist, leave this number of extra
 106               * chunks beyond lowest_length.*/
 107   int cur_length; /**< How many chunks on the freelist now? */
 108   int lowest_length; /**< What's the smallest value of cur_length since the
 109                       * last time we cleaned this freelist? */
 110   uint64_t n_alloc;
 111   uint64_t n_free;
 112   uint64_t n_hit;
 113   chunk_t *head; /**< First chunk on the freelist. */
 114 } chunk_freelist_t;
 115
 116 /** Macro to help define freelists. */
 117 #define FL(a,m,s) { a, m, s, 0, 0, 0, 0, 0, NULL }
 118
 119 /** Static array of freelists, sorted by alloc_len, terminated by an entry
 120  * with alloc_size of 0. */
 121 static chunk_freelist_t freelists[] = {
 122   FL(4096, 256, 8), FL(8192, 128, 4), FL(16384, 64, 4), FL(32768, 32, 2),
 123   FL(0, 0, 0)
 124 };
 125 #undef FL
 126 /** How many times have we looked for a chunk of a size that no freelist
 127  * could help with? */
 128 static uint64_t n_freelist_miss = 0;
 129
 130 static void assert_freelist_ok(chunk_freelist_t *fl);
 131
 132 /** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if
 133  * no freelist exists for that size. */
 134 static INLINE chunk_freelist_t *
 135 get_freelist(size_t alloc)
 136 {
 137   int i;
 138   for (i=0; freelists[i].alloc_size <= alloc; ++i) {
 139     if (freelists[i].alloc_size == alloc) {
 140       return &freelists[i];
 141     }
 142   }
 143   return NULL;
 144 }
 145
 146 /** Deallocate a chunk or put it on a freelist */
 147 static void
 148 chunk_free(chunk_t *chunk)
 149 {
 150   size_t alloc = CHUNK_ALLOC_SIZE(chunk->memlen);
 151   chunk_freelist_t *freelist = get_freelist(alloc);
 152   if (freelist && freelist->cur_length < freelist->max_length) {
 153     chunk->next = freelist->head;
 154     freelist->head = chunk;
 155     ++freelist->cur_length;
 156   } else {
 157     if (freelist)
 158       ++freelist->n_free;
 159     tor_free(chunk);
 160   }
 161 }
 162
 163 /** Allocate a new chunk with a given allocation size, or get one from the
 164  * freelist.  Note that a chunk with allocation size A can actually hold only
 165  * CHUNK_SIZE_WITH_ALLOC(A) bytes in its mem field. */
 166 static INLINE chunk_t *
 167 chunk_new_with_alloc_size(size_t alloc)
 168 {
 169   chunk_t *ch;
 170   chunk_freelist_t *freelist;
 171   tor_assert(alloc >= sizeof(chunk_t));
 172   freelist = get_freelist(alloc);
 173   if (freelist && freelist->head) {
 174     ch = freelist->head;
 175     freelist->head = ch->next;
 176     if (--freelist->cur_length < freelist->lowest_length)
 177       freelist->lowest_length = freelist->cur_length;
 178     ++freelist->n_hit;
 179   } else {
 180     /* XXXX take advantage of tor_malloc_roundup, once we know how that
 181      * affects freelists. */
 182     if (freelist)
 183       ++freelist->n_alloc;
 184     else
 185       ++n_freelist_miss;
 186     ch = tor_malloc(alloc);
 187   }
 188   ch->next = NULL;
 189   ch->datalen = 0;
 190   ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
 191   ch->data = &ch->mem[0];
 192   return ch;
 193 }
 194 #else
 195 static void
 196 chunk_free(chunk_t *chunk)
 197 {
 198   tor_free(chunk);
 199 }
 200 static INLINE chunk_t *
 201 chunk_new_with_alloc_size(size_t alloc)
 202 {
 203   chunk_t *ch;
 204   ch = tor_malloc_roundup(&alloc);
 205   ch->next = NULL;
 206   ch->datalen = 0;
 207   ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
 208   ch->data = &ch->mem[0];
 209   return ch;
 210 }
 211 #endif
 212
 213 /** Expand <b>chunk</b> until it can hold <b>sz</b> bytes, and return a
 214  * new pointer to <b>chunk</b>.  Old pointers are no longer valid. */
 215 static INLINE chunk_t *
 216 chunk_grow(chunk_t *chunk, size_t sz)
 217 {
 218   off_t offset;
 219   tor_assert(sz > chunk->memlen);
 220   offset = chunk->data - chunk->mem;
 221   chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz));
 222   chunk->memlen = sz;
 223   chunk->data = chunk->mem + offset;
 224   return chunk;
 225 }
 226
 227 /** If a read onto the end of a chunk would be smaller than this number, then
 228  * just start a new chunk. */
 229 #define MIN_READ_LEN 8
 230 /** Every chunk should take up at least this many bytes. */
 231 #define MIN_CHUNK_ALLOC 256
 232 /** No chunk should take up more than this many bytes. */
 233 #define MAX_CHUNK_ALLOC 65536
 234
 235 /** Return the allocation size we'd like to use to hold <b>target</b>
 236  * bytes. */
 237 static INLINE size_t
 238 preferred_chunk_size(size_t target)
 239 {
 240   size_t sz = MIN_CHUNK_ALLOC;
 241   while (CHUNK_SIZE_WITH_ALLOC(sz) < target) {
 242     sz <<= 1;
 243   }
 244   return sz;
 245 }
 246
 247 /** Remove from the freelists most chunks that have not been used since the
 248  * last call to buf_shrink_freelists(). */
 249 void
 250 buf_shrink_freelists(int free_all)
 251 {
 252 #ifdef ENABLE_BUF_FREELISTS
 253   int i;
 254   disable_control_logging();
 255   for (i = 0; freelists[i].alloc_size; ++i) {
 256     int slack = freelists[i].slack;
 257     assert_freelist_ok(&freelists[i]);
 258     if (free_all || freelists[i].lowest_length > slack) {
 259       int n_to_free = free_all ? freelists[i].cur_length :
 260         (freelists[i].lowest_length - slack);
 261       int n_to_skip = freelists[i].cur_length - n_to_free;
 262       int orig_n_to_free = n_to_free, n_freed=0;
 263       int orig_n_to_skip = n_to_skip;
 264       int new_length = n_to_skip;
 265       chunk_t **chp = &freelists[i].head;
 266       chunk_t *chunk;
 267       while (n_to_skip) {
 268         tor_assert((*chp)->next);
 269         chp = &(*chp)->next;
 270         --n_to_skip;
 271       }
 272       chunk = *chp;
 273       *chp = NULL;
 274       while (chunk) {
 275         chunk_t *next = chunk->next;
 276         tor_free(chunk);
 277         chunk = next;
 278         --n_to_free;
 279         ++n_freed;
 280         ++freelists[i].n_free;
 281       }
 282       if (n_to_free) {
 283         log_warn(LD_BUG, "Freelist length for %d-byte chunks may have been "
 284                  "messed up somehow.", (int)freelists[i].alloc_size);
 285         log_warn(LD_BUG, "There were %d chunks at the start.  I decided to "
 286                  "keep %d. I wanted to free %d.  I freed %d.  I somehow think "
 287                  "I have %d left to free.",
 288                  freelists[i].cur_length, n_to_skip, orig_n_to_free,
 289                  n_freed, n_to_free);
 290       }
 291       // tor_assert(!n_to_free);
 292       freelists[i].cur_length = new_length;
 293       log_info(LD_MM, "Cleaned freelist for %d-byte chunks: kept %d, "
 294                "dropped %d.",
 295                (int)freelists[i].alloc_size, orig_n_to_skip, orig_n_to_free);
 296     }
 297     freelists[i].lowest_length = freelists[i].cur_length;
 298     assert_freelist_ok(&freelists[i]);
 299   }
 300   enable_control_logging();
 301 #else
 302   (void) free_all;
 303 #endif
 304 }
 305
 306 /** Describe the current status of the freelists at log level <b>severity</b>.
 307  */
 308 void
 309 buf_dump_freelist_sizes(int severity)
 310 {
 311 #ifdef ENABLE_BUF_FREELISTS
 312   int i;
 313   log(severity, LD_MM, "====== Buffer freelists:");
 314   for (i = 0; freelists[i].alloc_size; ++i) {
 315     uint64_t total = ((uint64_t)freelists[i].cur_length) *
 316       freelists[i].alloc_size;
 317     log(severity, LD_MM,
 318         U64_FORMAT" bytes in %d %d-byte chunks ["U64_FORMAT
 319         " misses; "U64_FORMAT" frees; "U64_FORMAT" hits]",
 320         U64_PRINTF_ARG(total),
 321         freelists[i].cur_length, (int)freelists[i].alloc_size,
 322         U64_PRINTF_ARG(freelists[i].n_alloc),
 323         U64_PRINTF_ARG(freelists[i].n_free),
 324         U64_PRINTF_ARG(freelists[i].n_hit));
 325   }
 326   log(severity, LD_MM, U64_FORMAT" allocations in non-freelist sizes",
 327       U64_PRINTF_ARG(n_freelist_miss));
 328 #else
 329   (void)severity;
 330 #endif
 331 }
 332
 333 /** Magic value for buf_t.magic, to catch pointer errors. */
 334 #define BUFFER_MAGIC 0xB0FFF312u
 335 /** A resizeable buffer, optimized for reading and writing. */
 336 struct buf_t {
 337   uint32_t magic; /**< Magic cookie for debugging: Must be set to
 338                    *   BUFFER_MAGIC. */
 339   size_t datalen; /**< How many bytes is this buffer holding right now? */
 340   size_t default_chunk_size; /**< Don't allocate any chunks smaller than
 341                               * this for this buffer. */
 342   chunk_t *head; /**< First chunk in the list, or NULL for none. */
 343   chunk_t *tail; /**< Last chunk in the list, or NULL for none. */
 344 };
 345
 346 /** Collapse data from the first N chunks from <b>buf</b> into buf->head,
 347  * growing it as necessary, until buf->head has the first <b>bytes</b> bytes
 348  * of data from the buffer, or until buf->head has all the data in <b>buf</b>.
 349  *
 350  * If <b>nulterminate</b> is true, ensure that there is a 0 byte in
 351  * buf->head->mem right after all the data. */
 352 static void
 353 buf_pullup(buf_t *buf, size_t bytes, int nulterminate)
 354 {
 355   chunk_t *dest, *src;
 356   size_t capacity;
 357   if (!buf->head)
 358     return;
 359
 360   check();
 361   if (buf->datalen < bytes)
 362     bytes = buf->datalen;
 363
 364   if (nulterminate) {
 365     capacity = bytes + 1;
 366     if (buf->head->datalen >= bytes && CHUNK_REMAINING_CAPACITY(buf->head)) {
 367       *CHUNK_WRITE_PTR(buf->head) = '\0';
 368       return;
 369     }
 370   } else {
 371     capacity = bytes;
 372     if (buf->head->datalen >= bytes)
 373       return;
 374   }
 375
 376   if (buf->head->memlen >= capacity) {
 377     /* We don't need to grow the first chunk, but we might need to repack it.*/
 378     if (CHUNK_REMAINING_CAPACITY(buf->head) < capacity-buf->datalen)
 379       chunk_repack(buf->head);
 380     tor_assert(CHUNK_REMAINING_CAPACITY(buf->head) >= capacity-buf->datalen);
 381   } else {
 382     chunk_t *newhead;
 383     size_t newsize;
 384     /* We need to grow the chunk. */
 385     chunk_repack(buf->head);
 386     newsize = CHUNK_SIZE_WITH_ALLOC(preferred_chunk_size(capacity));
 387     newhead = chunk_grow(buf->head, newsize);
 388     tor_assert(newhead->memlen >= capacity);
 389     if (newhead != buf->head) {
 390       if (buf->tail == buf->head)
 391         buf->tail = newhead;
 392       buf->head = newhead;
 393     }
 394   }
 395
 396   dest = buf->head;
 397   while (dest->datalen < bytes) {
 398     size_t n = bytes - dest->datalen;
 399     src = dest->next;
 400     tor_assert(src);
 401     if (n > src->datalen) {
 402       memcpy(CHUNK_WRITE_PTR(dest), src->data, src->datalen);
 403       dest->datalen += src->datalen;
 404       dest->next = src->next;
 405       if (buf->tail == src)
 406         buf->tail = dest;
 407       chunk_free(src);
 408     } else {
 409       memcpy(CHUNK_WRITE_PTR(dest), src->data, n);
 410       dest->datalen += n;
 411       src->data += n;
 412       src->datalen -= n;
 413       tor_assert(dest->datalen == bytes);
 414     }
 415   }
 416
 417   if (nulterminate) {
 418     tor_assert(CHUNK_REMAINING_CAPACITY(buf->head));
 419     *CHUNK_WRITE_PTR(buf->head) = '\0';
 420   }
 421
 422   check();
 423 }
 424
 425 /** Resize buf so it won't hold extra memory that we haven't been
 426  * using lately.
 427  */
 428 void
 429 buf_shrink(buf_t *buf)
 430 {
 431   (void)buf;
 432 }
 433
 434 /** Remove the first <b>n</b> bytes from buf. */
 435 static INLINE void
 436 buf_remove_from_front(buf_t *buf, size_t n)
 437 {
 438   tor_assert(buf->datalen >= n);
 439   while (n) {
 440     tor_assert(buf->head);
 441     if (buf->head->datalen > n) {
 442       buf->head->datalen -= n;
 443       buf->head->data += n;
 444       buf->datalen -= n;
 445       return;
 446     } else {
 447       chunk_t *victim = buf->head;
 448       n -= victim->datalen;
 449       buf->datalen -= victim->datalen;
 450       buf->head = victim->next;
 451       if (buf->tail == victim)
 452         buf->tail = NULL;
 453       chunk_free(victim);
 454     }
 455   }
 456   check();
 457 }
 458
 459 /** Create and return a new buf with default chunk capacity <b>size</b>.
 460  */
 461 buf_t *
 462 buf_new_with_capacity(size_t size)
 463 {
 464   buf_t *b = buf_new();
 465   b->default_chunk_size = preferred_chunk_size(size);
 466   return b;
 467 }
 468
 469 /** Allocate and return a new buffer with default capacity. */
 470 buf_t *
 471 buf_new(void)
 472 {
 473   buf_t *buf = tor_malloc_zero(sizeof(buf_t));
 474   buf->magic = BUFFER_MAGIC;
 475   buf->default_chunk_size = 4096;
 476   return buf;
 477 }
 478
 479 /** Remove all data from <b>buf</b>. */
 480 void
 481 buf_clear(buf_t *buf)
 482 {
 483   chunk_t *chunk, *next;
 484   buf->datalen = 0;
 485   for (chunk = buf->head; chunk; chunk = next) {
 486     next = chunk->next;
 487     chunk_free(chunk);
 488   }
 489   buf->head = buf->tail = NULL;
 490 }
 491
 492 /** Return the number of bytes stored in <b>buf</b> */
 493 size_t
 494 buf_datalen(const buf_t *buf)
 495 {
 496   return buf->datalen;
 497 }
 498
 499 /** Return the total length of all chunks used in <b>buf</b>. */
 500 size_t
 501 buf_allocation(const buf_t *buf)
 502 {
 503   size_t total = 0;
 504   const chunk_t *chunk;
 505   for (chunk = buf->head; chunk; chunk = chunk->next) {
 506     total += chunk->memlen;
 507   }
 508   return total;
 509 }
 510
 511 /** Return the number of bytes that can be added to <b>buf</b> without
 512  * performing any additional allocation. */
 513 size_t
 514 buf_slack(const buf_t *buf)
 515 {
 516   if (!buf->tail)
 517     return 0;
 518   else
 519     return CHUNK_REMAINING_CAPACITY(buf->tail);
 520 }
 521
 522 /** Release storage held by <b>buf</b>. */
 523 void
 524 buf_free(buf_t *buf)
 525 {
 526   buf_clear(buf);
 527   buf->magic = 0xdeadbeef;
 528   tor_free(buf);
 529 }
 530
 531 /** Append a new chunk with enough capacity to hold <b>capacity</b> bytes to
 532  * the tail of <b>buf</b>.  If <b>capped</b>, don't allocate a chunk bigger
 533  * than MAX_CHUNK_ALLOC. */
 534 static chunk_t *
 535 buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
 536 {
 537   chunk_t *chunk;
 538   if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) {
 539     chunk = chunk_new_with_alloc_size(buf->default_chunk_size);
 540   } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) {
 541     chunk = chunk_new_with_alloc_size(MAX_CHUNK_ALLOC);
 542   } else {
 543     chunk = chunk_new_with_alloc_size(preferred_chunk_size(capacity));
 544   }
 545   if (buf->tail) {
 546     tor_assert(buf->head);
 547     buf->tail->next = chunk;
 548     buf->tail = chunk;
 549   } else {
 550     tor_assert(!buf->head);
 551     buf->head = buf->tail = chunk;
 552   }
 553   check();
 554   return chunk;
 555 }
 556
 557 /** If we're using readv and writev, how many chunks are we willing to
 558  * read/write at a time? */
 559 #define N_IOV 3
 560
 561 /** Read up to <b>at_most</b> bytes from the socket <b>fd</b> into
 562  * <b>chunk</b> (which must be on <b>buf</b>). If we get an EOF, set
 563  * *<b>reached_eof</b> to 1.  Return -1 on error, 0 on eof or blocking,
 564  * and the number of bytes read otherwise. */
 565 static INLINE int
 566 read_to_chunk(buf_t *buf, chunk_t *chunk, int fd, size_t at_most,
 567               int *reached_eof, int *socket_error)
 568 {
 569   ssize_t read_result;
 570 #if 0 && defined(HAVE_READV) && !defined(WIN32)
 571   struct iovec iov[N_IOV];
 572   int i;
 573   size_t remaining = at_most;
 574   for (i=0; chunk && i < N_IOV && remaining; ++i) {
 575     iov[i].iov_base = CHUNK_WRITE_PTR(chunk);
 576     if (remaining > CHUNK_REMAINING_CAPACITY(chunk))
 577       iov[i].iov_len = CHUNK_REMAINING_CAPACITY(chunk);
 578     else
 579       iov[i].iov_len = remaining;
 580     remaining -= iov[i].iov_len;
 581     chunk = chunk->next;
 582   }
 583   read_result = readv(fd, iov, i);
 584 #else
 585   if (at_most > CHUNK_REMAINING_CAPACITY(chunk))
 586     at_most = CHUNK_REMAINING_CAPACITY(chunk);
 587   read_result = tor_socket_recv(fd, CHUNK_WRITE_PTR(chunk), at_most, 0);
 588 #endif
 589
 590   if (read_result < 0) {
 591     int e = tor_socket_errno(fd);
 592     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
 593 #ifdef MS_WINDOWS
 594       if (e == WSAENOBUFS)
 595         log_warn(LD_NET,"recv() failed: WSAENOBUFS. Not enough ram?");
 596 #endif
 597       *socket_error = e;
 598       return -1;
 599     }
 600     return 0; /* would block. */
 601   } else if (read_result == 0) {
 602     log_debug(LD_NET,"Encountered eof on fd %d", (int)fd);
 603     *reached_eof = 1;
 604     return 0;
 605   } else { /* actually got bytes. */
 606     buf->datalen += read_result;
 607 #if 0 && defined(HAVE_READV) && !defined(WIN32)
 608     while ((size_t)read_result > CHUNK_REMAINING_CAPACITY(chunk)) {
 609       chunk->datalen += CHUNK_REMAINING_CAPACITY(chunk);
 610       read_result -= CHUNK_REMAINING_CAPACITY(chunk);
 611       chunk = chunk->next;
 612       tor_assert(chunk);
 613     }
 614 #endif
 615     chunk->datalen += read_result;
 616     log_debug(LD_NET,"Read %ld bytes. %d on inbuf.", (long)read_result,
 617               (int)buf->datalen);
 618     tor_assert(read_result < INT_MAX);
 619     return (int)read_result;
 620   }
 621 }
 622
 623 /** As read_to_chunk(), but return (negative) error code on error, blocking,
 624  * or TLS, and the number of bytes read otherwise. */
 625 static INLINE int
 626 read_to_chunk_tls(buf_t *buf, chunk_t *chunk, tor_tls_t *tls,
 627                   size_t at_most)
 628 {
 629   int read_result;
 630
 631   tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= at_most);
 632   read_result = tor_tls_read(tls, CHUNK_WRITE_PTR(chunk), at_most);
 633   if (read_result < 0)
 634     return read_result;
 635   buf->datalen += read_result;
 636   chunk->datalen += read_result;
 637   return read_result;
 638 }
 639
 640 /** Read from socket <b>s</b>, writing onto end of <b>buf</b>.  Read at most
 641  * <b>at_most</b> bytes, growing the buffer as necessary.  If recv() returns 0
 642  * (because of EOF), set *<b>reached_eof</b> to 1 and return 0. Return -1 on
 643  * error; else return the number of bytes read.
 644  */
 645 /* XXXX021 indicate "read blocked" somehow? */
 646 int
 647 read_to_buf(int s, size_t at_most, buf_t *buf, int *reached_eof,
 648             int *socket_error)
 649 {
 650   /* XXXX021 It's stupid to overload the return values for these functions:
 651    * "error status" and "number of bytes read" are not mutually exclusive.
 652    */
 653   int r = 0;
 654   size_t total_read = 0;
 655
 656   check();
 657   tor_assert(reached_eof);
 658   tor_assert(s >= 0);
 659
 660   while (at_most > total_read) {
 661     size_t readlen = at_most - total_read;
 662     chunk_t *chunk;
 663     if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
 664       chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
 665       if (readlen > chunk->memlen)
 666         readlen = chunk->memlen;
 667     } else {
 668       size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
 669       chunk = buf->tail;
 670       if (cap < readlen)
 671         readlen = cap;
 672     }
 673
 674     r = read_to_chunk(buf, chunk, s, readlen, reached_eof, socket_error);
 675     check();
 676     if (r < 0)
 677       return r; /* Error */
 678     tor_assert(total_read+r < INT_MAX);
 679     total_read += r;
 680     if ((size_t)r < readlen) { /* eof, block, or no more to read. */
 681       break;
 682     }
 683   }
 684   return (int)total_read;
 685 }
 686
 687 /** As read_to_buf, but reads from a TLS connection, and returns a TLS
 688  * status value rather than the number of bytes read.
 689  *
 690  * Using TLS on OR connections complicates matters in two ways.
 691  *
 692  * First, a TLS stream has its own read buffer independent of the
 693  * connection's read buffer.  (TLS needs to read an entire frame from
 694  * the network before it can decrypt any data.  Thus, trying to read 1
 695  * byte from TLS can require that several KB be read from the network
 696  * and decrypted.  The extra data is stored in TLS's decrypt buffer.)
 697  * Because the data hasn't been read by Tor (it's still inside the TLS),
 698  * this means that sometimes a connection "has stuff to read" even when
 699  * poll() didn't return POLLIN. The tor_tls_get_pending_bytes function is
 700  * used in connection.c to detect TLS objects with non-empty internal
 701  * buffers and read from them again.
 702  *
 703  * Second, the TLS stream's events do not correspond directly to network
 704  * events: sometimes, before a TLS stream can read, the network must be
 705  * ready to write -- or vice versa.
 706  */
 707 int
 708 read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf)
 709 {
 710   int r = 0;
 711   size_t total_read = 0;
 712   check();
 713
 714   while (at_most > total_read) {
 715     size_t readlen = at_most - total_read;
 716     chunk_t *chunk;
 717     if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
 718       chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
 719       if (readlen > chunk->memlen)
 720         readlen = chunk->memlen;
 721     } else {
 722       size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
 723       chunk = buf->tail;
 724       if (cap < readlen)
 725         readlen = cap;
 726     }
 727
 728     r = read_to_chunk_tls(buf, chunk, tls, readlen);
 729     check();
 730     if (r < 0)
 731       return r; /* Error */
 732     tor_assert(total_read+r < INT_MAX);
 733      total_read += r;
 734     if ((size_t)r < readlen) /* eof, block, or no more to read. */
 735       break;
 736   }
 737   return (int)total_read;
 738 }
 739
 740 /** Helper for flush_buf(): try to write <b>sz</b> bytes from chunk
 741  * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  On success, deduct
 742  * the bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
 743  * written on success, 0 on blocking, -1 on failure.
 744  */
 745 static INLINE int
 746 flush_chunk(int s, buf_t *buf, chunk_t *chunk, size_t sz,
 747             size_t *buf_flushlen)
 748 {
 749   ssize_t write_result;
 750 #if 0 && defined(HAVE_WRITEV) && !defined(WIN32)
 751   struct iovec iov[N_IOV];
 752   int i;
 753   size_t remaining = sz;
 754   for (i=0; chunk && i < N_IOV && remaining; ++i) {
 755     iov[i].iov_base = chunk->data;
 756     if (remaining > chunk->datalen)
 757       iov[i].iov_len = chunk->datalen;
 758     else
 759       iov[i].iov_len = remaining;
 760     remaining -= iov[i].iov_len;
 761     chunk = chunk->next;
 762   }
 763   write_result = writev(s, iov, i);
 764 #else
 765   if (sz > chunk->datalen)
 766     sz = chunk->datalen;
 767   write_result = tor_socket_send(s, chunk->data, sz, 0);
 768 #endif
 769
 770   if (write_result < 0) {
 771     int e = tor_socket_errno(s);
 772     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
 773 #ifdef MS_WINDOWS
 774       if (e == WSAENOBUFS)
 775         log_warn(LD_NET,"write() failed: WSAENOBUFS. Not enough ram?");
 776 #endif
 777       return -1;
 778     }
 779     log_debug(LD_NET,"write() would block, returning.");
 780     return 0;
 781   } else {
 782     *buf_flushlen -= write_result;
 783     buf_remove_from_front(buf, write_result);
 784     tor_assert(write_result < INT_MAX);
 785     return (int)write_result;
 786   }
 787 }
 788
 789 /** Helper for flush_buf_tls(): try to write <b>sz</b> bytes from chunk
 790  * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  (Tries to write
 791  * more if there is a forced pending write size.)  On success, deduct the
 792  * bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
 793  * written on success, and a TOR_TLS error code on failure or blocking.
 794  */
 795 static INLINE int
 796 flush_chunk_tls(tor_tls_t *tls, buf_t *buf, chunk_t *chunk,
 797                 size_t sz, size_t *buf_flushlen)
 798 {
 799   int r;
 800   size_t forced;
 801   char *data;
 802
 803   forced = tor_tls_get_forced_write_size(tls);
 804   if (forced > sz)
 805     sz = forced;
 806   if (chunk) {
 807     data = chunk->data;
 808     tor_assert(sz <= chunk->datalen);
 809   } else {
 810     data = NULL;
 811     tor_assert(sz == 0);
 812   }
 813   r = tor_tls_write(tls, data, sz);
 814   if (r < 0)
 815     return r;
 816   if (*buf_flushlen > (size_t)r)
 817     *buf_flushlen -= r;
 818   else
 819     *buf_flushlen = 0;
 820   buf_remove_from_front(buf, r);
 821   log_debug(LD_NET,"flushed %d bytes, %d ready to flush, %d remain.",
 822             r,(int)*buf_flushlen,(int)buf->datalen);
 823   return r;
 824 }
 825
 826 /** Write data from <b>buf</b> to the socket <b>s</b>.  Write at most
 827  * <b>sz</b> bytes, decrement *<b>buf_flushlen</b> by
 828  * the number of bytes actually written, and remove the written bytes
 829  * from the buffer.  Return the number of bytes written on success,
 830  * -1 on failure.  Return 0 if write() would block.
 831  */
 832 int
 833 flush_buf(int s, buf_t *buf, size_t sz, size_t *buf_flushlen)
 834 {
 835   /* XXXX021 It's stupid to overload the return values for these functions:
 836    * "error status" and "number of bytes flushed" are not mutually exclusive.
 837    */
 838   int r;
 839   size_t flushed = 0;
 840   tor_assert(buf_flushlen);
 841   tor_assert(s >= 0);
 842   tor_assert(*buf_flushlen <= buf->datalen);
 843   tor_assert(sz <= *buf_flushlen);
 844
 845   check();
 846   while (sz) {
 847     size_t flushlen0;
 848     tor_assert(buf->head);
 849     if (buf->head->datalen >= sz)
 850       flushlen0 = sz;
 851     else
 852       flushlen0 = buf->head->datalen;
 853
 854     r = flush_chunk(s, buf, buf->head, flushlen0, buf_flushlen);
 855     check();
 856     if (r < 0)
 857       return r;
 858     flushed += r;
 859     sz -= r;
 860     if (r == 0 || (size_t)r < flushlen0) /* can't flush any more now. */
 861       break;
 862   }
 863   tor_assert(flushed < INT_MAX);
 864   return (int)flushed;
 865 }
 866
 867 /** As flush_buf(), but writes data to a TLS connection.  Can write more than
 868  * <b>flushlen</b> bytes.
 869  */
 870 int
 871 flush_buf_tls(tor_tls_t *tls, buf_t *buf, size_t flushlen,
 872               size_t *buf_flushlen)
 873 {
 874   int r;
 875   size_t flushed = 0;
 876   ssize_t sz;
 877   tor_assert(buf_flushlen);
 878   tor_assert(*buf_flushlen <= buf->datalen);
 879   tor_assert(flushlen <= *buf_flushlen);
 880   sz = (ssize_t) flushlen;
 881
 882   /* we want to let tls write even if flushlen is zero, because it might
 883    * have a partial record pending */
 884   check_no_tls_errors();
 885
 886   check();
 887   do {
 888     size_t flushlen0;
 889     if (buf->head) {
 890       if ((ssize_t)buf->head->datalen >= sz)
 891         flushlen0 = sz;
 892       else
 893         flushlen0 = buf->head->datalen;
 894     } else {
 895       flushlen0 = 0;
 896     }
 897
 898     r = flush_chunk_tls(tls, buf, buf->head, flushlen0, buf_flushlen);
 899     check();
 900     if (r < 0)
 901       return r;
 902     flushed += r;
 903     sz -= r;
 904     if (r == 0) /* Can't flush any more now. */
 905       break;
 906   } while (sz > 0);
 907   tor_assert(flushed < INT_MAX);
 908   return (int)flushed;
 909 }
 910
 911 /** Append <b>string_len</b> bytes from <b>string</b> to the end of
 912  * <b>buf</b>.
 913  *
 914  * Return the new length of the buffer on success, -1 on failure.
 915  */
 916 int
 917 write_to_buf(const char *string, size_t string_len, buf_t *buf)
 918 {
 919   if (!string_len)
 920     return (int)buf->datalen;
 921   check();
 922
 923   while (string_len) {
 924     size_t copy;
 925     if (!buf->tail || !CHUNK_REMAINING_CAPACITY(buf->tail))
 926       buf_add_chunk_with_capacity(buf, string_len, 1);
 927
 928     copy = CHUNK_REMAINING_CAPACITY(buf->tail);
 929     if (copy > string_len)
 930       copy = string_len;
 931     memcpy(CHUNK_WRITE_PTR(buf->tail), string, copy);
 932     string_len -= copy;
 933     string += copy;
 934     buf->datalen += copy;
 935     buf->tail->datalen += copy;
 936   }
 937
 938   check();
 939   tor_assert(buf->datalen < INT_MAX);
 940   return (int)buf->datalen;
 941 }
 942
 943 /** Helper: copy the first <b>string_len</b> bytes from <b>buf</b>
 944  * onto <b>string</b>.
 945  */
 946 static INLINE void
 947 peek_from_buf(char *string, size_t string_len, const buf_t *buf)
 948 {
 949   chunk_t *chunk;
 950
 951   tor_assert(string);
 952   /* make sure we don't ask for too much */
 953   tor_assert(string_len <= buf->datalen);
 954   /* assert_buf_ok(buf); */
 955
 956   chunk = buf->head;
 957   while (string_len) {
 958     size_t copy = string_len;
 959     tor_assert(chunk);
 960     if (chunk->datalen < copy)
 961       copy = chunk->datalen;
 962     memcpy(string, chunk->data, copy);
 963     string_len -= copy;
 964     string += copy;
 965     chunk = chunk->next;
 966   }
 967 }
 968
 969 /** Remove <b>string_len</b> bytes from the front of <b>buf</b>, and store
 970  * them into <b>string</b>.  Return the new buffer size.  <b>string_len</b>
 971  * must be \<= the number of bytes on the buffer.
 972  */
 973 int
 974 fetch_from_buf(char *string, size_t string_len, buf_t *buf)
 975 {
 976   /* There must be string_len bytes in buf; write them onto string,
 977    * then memmove buf back (that is, remove them from buf).
 978    *
 979    * Return the number of bytes still on the buffer. */
 980
 981   check();
 982   peek_from_buf(string, string_len, buf);
 983   buf_remove_from_front(buf, string_len);
 984   check();
 985   tor_assert(buf->datalen < INT_MAX);
 986   return (int)buf->datalen;
 987 }
 988
 989 /** Check <b>buf</b> for a variable-length cell according to the rules of link
 990  * protocol version <b>linkproto</b>.  If one is found, pull it off the buffer
 991  * and assign a newly allocated var_cell_t to *<b>out</b>, and return 1.
 992  * Return 0 if whatever is on the start of buf_t is not a variable-length
 993  * cell.  Return 1 and set *<b>out</b> to NULL if there seems to be the start
 994  * of a variable-length cell on <b>buf</b>, but the whole thing isn't there
 995  * yet. */
 996 int
 997 fetch_var_cell_from_buf(buf_t *buf, var_cell_t **out, int linkproto)
 998 {
 999   char hdr[VAR_CELL_HEADER_SIZE];
1000   var_cell_t *result;
1001   uint8_t command;
1002   uint16_t length;
1003   /* If linkproto is unknown (0) or v2 (2), variable-length cells work as
1004    * implemented here. If it's 1, there are no variable-length cells.  Tor
1005    * does not support other versions right now, and so can't negotiate them.
1006    */
1007   if (linkproto == 1)
1008     return 0;
1009   check();
1010   *out = NULL;
1011   if (buf->datalen < VAR_CELL_HEADER_SIZE)
1012     return 0;
1013   peek_from_buf(hdr, sizeof(hdr), buf);
1014
1015   command = get_uint8(hdr+2);
1016   if (!(CELL_COMMAND_IS_VAR_LENGTH(command)))
1017     return 0;
1018
1019   length = ntohs(get_uint16(hdr+3));
1020   if (buf->datalen < (size_t)(VAR_CELL_HEADER_SIZE+length))
1021     return 1;
1022   result = var_cell_new(length);
1023   result->command = command;
1024   result->circ_id = ntohs(get_uint16(hdr));
1025
1026   buf_remove_from_front(buf, VAR_CELL_HEADER_SIZE);
1027   peek_from_buf((char*) result->payload, length, buf);
1028   buf_remove_from_front(buf, length);
1029   check();
1030
1031   *out = result;
1032   return 1;
1033 }
1034
1035 /** Move up to *<b>buf_flushlen</b> bytes from <b>buf_in</b> to
1036  * <b>buf_out</b>, and modify *<b>buf_flushlen</b> appropriately.
1037  * Return the number of bytes actually copied.
1038  */
1039 int
1040 move_buf_to_buf(buf_t *buf_out, buf_t *buf_in, size_t *buf_flushlen)
1041 {
1042   /* XXXX we can do way better here, but this doesn't turn up in any
1043    * profiles. */
1044   char b[4096];
1045   size_t cp, len;
1046   len = *buf_flushlen;
1047   if (len > buf_in->datalen)
1048     len = buf_in->datalen;
1049
1050   cp = len; /* Remember the number of bytes we intend to copy. */
1051   tor_assert(cp < INT_MAX);
1052   while (len) {
1053     /* This isn't the most efficient implementation one could imagine, since
1054      * it does two copies instead of 1, but I kinda doubt that this will be
1055      * critical path. */
1056     size_t n = len > sizeof(b) ? sizeof(b) : len;
1057     fetch_from_buf(b, n, buf_in);
1058     write_to_buf(b, n, buf_out);
1059     len -= n;
1060   }
1061   *buf_flushlen -= cp;
1062   return (int)cp;
1063 }
1064
1065 /** Internal structure: represents a position in a buffer. */
1066 typedef struct buf_pos_t {
1067   const chunk_t *chunk; /**< Which chunk are we pointing to? */
1068   int pos;/**< Which character inside the chunk's data are we pointing to? */
1069   size_t chunk_pos; /**< Total length of all previous chunks. */
1070 } buf_pos_t;
1071
1072 /** Initialize <b>out</b> to point to the first character of <b>buf</b>.*/
1073 static void
1074 buf_pos_init(const buf_t *buf, buf_pos_t *out)
1075 {
1076   out->chunk = buf->head;
1077   out->pos = 0;
1078   out->chunk_pos = 0;
1079 }
1080
1081 /** Advance <b>out</b> to the first appearance of <b>ch</b> at the current
1082  * position of <b>out</b>, or later.  Return -1 if no instances are found;
1083  * otherwise returns the absolute position of the character. */
1084 static off_t
1085 buf_find_pos_of_char(char ch, buf_pos_t *out)
1086 {
1087   const chunk_t *chunk;
1088   int pos;
1089   tor_assert(out);
1090   if (out->chunk) {
1091     if (out->chunk->datalen) {
1092       tor_assert(out->pos < (off_t)out->chunk->datalen);
1093     } else {
1094       tor_assert(out->pos == 0);
1095     }
1096   }
1097   pos = out->pos;
1098   for (chunk = out->chunk; chunk; chunk = chunk->next) {
1099     char *cp = memchr(chunk->data+pos, ch, chunk->datalen - pos);
1100     if (cp) {
1101       out->chunk = chunk;
1102       tor_assert(cp - chunk->data < INT_MAX);
1103       out->pos = (int)(cp - chunk->data);
1104       return out->chunk_pos + out->pos;
1105     } else {
1106       out->chunk_pos += chunk->datalen;
1107       pos = 0;
1108     }
1109   }
1110   return -1;
1111 }
1112
1113 /** Advance <b>pos</b> by a single character, if there are any more characters
1114  * in the buffer.  Returns 0 on success, -1 on failure. */
1115 static INLINE int
1116 buf_pos_inc(buf_pos_t *pos)
1117 {
1118   ++pos->pos;
1119   if (pos->pos == (off_t)pos->chunk->datalen) {
1120     if (!pos->chunk->next)
1121       return -1;
1122     pos->chunk_pos += pos->chunk->datalen;
1123     pos->chunk = pos->chunk->next;
1124     pos->pos = 0;
1125   }
1126   return 0;
1127 }
1128
1129 /** Return true iff the <b>n</b>-character string in <b>s</b> appears
1130  * (verbatim) at <b>pos</b>. */
1131 static int
1132 buf_matches_at_pos(const buf_pos_t *pos, const char *s, size_t n)
1133 {
1134   buf_pos_t p;
1135   if (!n)
1136     return 1;
1137
1138   memcpy(&p, pos, sizeof(p));
1139
1140   while (1) {
1141     char ch = p.chunk->data[p.pos];
1142     if (ch != *s)
1143       return 0;
1144     ++s;
1145     /* If we're out of characters that don't match, we match.  Check this
1146      * _before_ we test incrementing pos, in case we're at the end of the
1147      * string. */
1148     if (--n == 0)
1149       return 1;
1150     if (buf_pos_inc(&p)<0)
1151       return 0;
1152   }
1153 }
1154
1155 /** Return the first position in <b>buf</b> at which the <b>n</b>-character
1156  * string <b>s</b> occurs, or -1 if it does not occur. */
1157 /*private*/ int
1158 buf_find_string_offset(const buf_t *buf, const char *s, size_t n)
1159 {
1160   buf_pos_t pos;
1161   buf_pos_init(buf, &pos);
1162   while (buf_find_pos_of_char(*s, &pos) >= 0) {
1163     if (buf_matches_at_pos(&pos, s, n)) {
1164       tor_assert(pos.chunk_pos + pos.pos < INT_MAX);
1165       return (int)(pos.chunk_pos + pos.pos);
1166     } else {
1167       if (buf_pos_inc(&pos)<0)
1168         return -1;
1169     }
1170   }
1171   return -1;
1172 }
1173
1174 /** There is a (possibly incomplete) http statement on <b>buf</b>, of the
1175  * form "\%s\\r\\n\\r\\n\%s", headers, body. (body may contain NULs.)
1176  * If a) the headers include a Content-Length field and all bytes in
1177  * the body are present, or b) there's no Content-Length field and
1178  * all headers are present, then:
1179  *
1180  *  - strdup headers into <b>*headers_out</b>, and NUL-terminate it.
1181  *  - memdup body into <b>*body_out</b>, and NUL-terminate it.
1182  *  - Then remove them from <b>buf</b>, and return 1.
1183  *
1184  *  - If headers or body is NULL, discard that part of the buf.
1185  *  - If a headers or body doesn't fit in the arg, return -1.
1186  *  (We ensure that the headers or body don't exceed max len,
1187  *   _even if_ we're planning to discard them.)
1188  *  - If force_complete is true, then succeed even if not all of the
1189  *    content has arrived.
1190  *
1191  * Else, change nothing and return 0.
1192  */
1193 int
1194 fetch_from_buf_http(buf_t *buf,
1195                     char **headers_out, size_t max_headerlen,
1196                     char **body_out, size_t *body_used, size_t max_bodylen,
1197                     int force_complete)
1198 {
1199   char *headers, *p;
1200   size_t headerlen, bodylen, contentlen;
1201   int crlf_offset;
1202
1203   check();
1204   if (!buf->head)
1205     return 0;
1206
1207   crlf_offset = buf_find_string_offset(buf, "\r\n\r\n", 4);
1208   if (crlf_offset > (int)max_headerlen ||
1209       (crlf_offset < 0 && buf->datalen > max_headerlen)) {
1210     log_debug(LD_HTTP,"headers too long.");
1211     return -1;
1212   } else if (crlf_offset < 0) {
1213     log_debug(LD_HTTP,"headers not all here yet.");
1214     return 0;
1215   }
1216   /* Okay, we have a full header.  Make sure it all appears in the first
1217    * chunk. */
1218   if ((int)buf->head->datalen < crlf_offset + 4)
1219     buf_pullup(buf, crlf_offset+4, 0);
1220   headerlen = crlf_offset + 4;
1221
1222   headers = buf->head->data;
1223   bodylen = buf->datalen - headerlen;
1224   log_debug(LD_HTTP,"headerlen %d, bodylen %d.", (int)headerlen, (int)bodylen);
1225
1226   if (max_headerlen <= headerlen) {
1227     log_warn(LD_HTTP,"headerlen %d larger than %d. Failing.",
1228              (int)headerlen, (int)max_headerlen-1);
1229     return -1;
1230   }
1231   if (max_bodylen <= bodylen) {
1232     log_warn(LD_HTTP,"bodylen %d larger than %d. Failing.",
1233              (int)bodylen, (int)max_bodylen-1);
1234     return -1;
1235   }
1236
1237 #define CONTENT_LENGTH "\r\nContent-Length: "
1238   p = (char*) tor_memstr(headers, headerlen, CONTENT_LENGTH);
1239   if (p) {
1240     int i;
1241     i = atoi(p+strlen(CONTENT_LENGTH));
1242     if (i < 0) {
1243       log_warn(LD_PROTOCOL, "Content-Length is less than zero; it looks like "
1244                "someone is trying to crash us.");
1245       return -1;
1246     }
1247     contentlen = i;
1248     /* if content-length is malformed, then our body length is 0. fine. */
1249     log_debug(LD_HTTP,"Got a contentlen of %d.",(int)contentlen);
1250     if (bodylen < contentlen) {
1251       if (!force_complete) {
1252         log_debug(LD_HTTP,"body not all here yet.");
1253         return 0; /* not all there yet */
1254       }
1255     }
1256     if (bodylen > contentlen) {
1257       bodylen = contentlen;
1258       log_debug(LD_HTTP,"bodylen reduced to %d.",(int)bodylen);
1259     }
1260   }
1261   /* all happy. copy into the appropriate places, and return 1 */
1262   if (headers_out) {
1263     *headers_out = tor_malloc(headerlen+1);
1264     fetch_from_buf(*headers_out, headerlen, buf);
1265     (*headers_out)[headerlen] = 0; /* NUL terminate it */
1266   }
1267   if (body_out) {
1268     tor_assert(body_used);
1269     *body_used = bodylen;
1270     *body_out = tor_malloc(bodylen+1);
1271     fetch_from_buf(*body_out, bodylen, buf);
1272     (*body_out)[bodylen] = 0; /* NUL terminate it */
1273   }
1274   check();
1275   return 1;
1276 }
1277
1278 /** There is a (possibly incomplete) socks handshake on <b>buf</b>, of one
1279  * of the forms
1280  *  - socks4: "socksheader username\\0"
1281  *  - socks4a: "socksheader username\\0 destaddr\\0"
1282  *  - socks5 phase one: "version #methods methods"
1283  *  - socks5 phase two: "version command 0 addresstype..."
1284  * If it's a complete and valid handshake, and destaddr fits in
1285  *   MAX_SOCKS_ADDR_LEN bytes, then pull the handshake off the buf,
1286  *   assign to <b>req</b>, and return 1.
1287  *
1288  * If it's invalid or too big, return -1.
1289  *
1290  * Else it's not all there yet, leave buf alone and return 0.
1291  *
1292  * If you want to specify the socks reply, write it into <b>req->reply</b>
1293  *   and set <b>req->replylen</b>, else leave <b>req->replylen</b> alone.
1294  *
1295  * If <b>log_sockstype</b> is non-zero, then do a notice-level log of whether
1296  * the connection is possibly leaking DNS requests locally or not.
1297  *
1298  * If <b>safe_socks</b> is true, then reject unsafe socks protocols.
1299  *
1300  * If returning 0 or -1, <b>req->address</b> and <b>req->port</b> are
1301  * undefined.
1302  */
1303 int
1304 fetch_from_buf_socks(buf_t *buf, socks_request_t *req,
1305                      int log_sockstype, int safe_socks)
1306 {
1307   unsigned int len;
1308   char tmpbuf[TOR_ADDR_BUF_LEN+1];
1309   tor_addr_t destaddr;
1310   uint32_t destip;
1311   uint8_t socksver;
1312   enum {socks4, socks4a} socks4_prot = socks4a;
1313   char *next, *startaddr;
1314   struct in_addr in;
1315
1316   /* If the user connects with socks4 or the wrong variant of socks5,
1317    * then log a warning to let him know that it might be unwise. */
1318   static int have_warned_about_unsafe_socks = 0;
1319
1320   if (buf->datalen < 2) /* version and another byte */
1321     return 0;
1322
1323   buf_pullup(buf, 128, 0);
1324   tor_assert(buf->head && buf->head->datalen >= 2);
1325
1326   socksver = *buf->head->data;
1327
1328   switch (socksver) { /* which version of socks? */
1329
1330     case 5: /* socks5 */
1331
1332       if (req->socks_version != 5) { /* we need to negotiate a method */
1333         unsigned char nummethods = (unsigned char)*(buf->head->data+1);
1334         tor_assert(!req->socks_version);
1335         if (buf->datalen < 2u+nummethods)
1336           return 0;
1337         buf_pullup(buf, 2u+nummethods, 0);
1338         if (!nummethods || !memchr(buf->head->data+2, 0, nummethods)) {
1339           log_warn(LD_APP,
1340                    "socks5: offered methods don't include 'no auth'. "
1341                    "Rejecting.");
1342           req->replylen = 2; /* 2 bytes of response */
1343           req->reply[0] = 5;
1344           req->reply[1] = '\xFF'; /* reject all methods */
1345           return -1;
1346         }
1347         /* remove packet from buf. also remove any other extraneous
1348          * bytes, to support broken socks clients. */
1349         buf_clear(buf);
1350
1351         req->replylen = 2; /* 2 bytes of response */
1352         req->reply[0] = 5; /* socks5 reply */
1353         req->reply[1] = 0; /* tell client to use "none" auth method */
1354         req->socks_version = 5; /* remember we've already negotiated auth */
1355         log_debug(LD_APP,"socks5: accepted method 0");
1356         return 0;
1357       }
1358       /* we know the method; read in the request */
1359       log_debug(LD_APP,"socks5: checking request");
1360       if (buf->datalen < 8) /* basic info plus >=2 for addr plus 2 for port */
1361         return 0; /* not yet */
1362       tor_assert(buf->head->datalen >= 8);
1363       req->command = (unsigned char) *(buf->head->data+1);
1364       if (req->command != SOCKS_COMMAND_CONNECT &&
1365           req->command != SOCKS_COMMAND_RESOLVE &&
1366           req->command != SOCKS_COMMAND_RESOLVE_PTR) {
1367         /* not a connect or resolve or a resolve_ptr? we don't support it. */
1368         log_warn(LD_APP,"socks5: command %d not recognized. Rejecting.",
1369                  req->command);
1370         return -1;
1371       }
1372       switch (*(buf->head->data+3)) { /* address type */
1373         case 1: /* IPv4 address */
1374         case 4: /* IPv6 address */ {
1375           const int is_v6 = *(buf->head->data+3) == 4;
1376           const unsigned addrlen = is_v6 ? 16 : 4;
1377           log_debug(LD_APP,"socks5: ipv4 address type");
1378           if (buf->datalen < 6+addrlen) /* ip/port there? */
1379             return 0; /* not yet */
1380
1381           if (is_v6)
1382             tor_addr_from_ipv6_bytes(&destaddr, buf->head->data+4);
1383           else
1384             tor_addr_from_ipv4n(&destaddr, get_uint32(buf->head->data+4));
1385
1386           tor_addr_to_str(tmpbuf, &destaddr, sizeof(tmpbuf), 1);
1387
1388           if (strlen(tmpbuf)+1 > MAX_SOCKS_ADDR_LEN) {
1389             log_warn(LD_APP,
1390                      "socks5 IP takes %d bytes, which doesn't fit in %d. "
1391                      "Rejecting.",
1392                      (int)strlen(tmpbuf)+1,(int)MAX_SOCKS_ADDR_LEN);
1393             return -1;
1394           }
1395           strlcpy(req->address,tmpbuf,sizeof(req->address));
1396           req->port = ntohs(get_uint16(buf->head->data+4+addrlen));
1397           buf_remove_from_front(buf, 6+addrlen);
1398           if (req->command != SOCKS_COMMAND_RESOLVE_PTR &&
1399               !addressmap_have_mapping(req->address,0) &&
1400               !have_warned_about_unsafe_socks) {
1401             log_warn(LD_APP,
1402                 "Your application (using socks5 to port %d) is giving "
1403                 "Tor only an IP address. Applications that do DNS resolves "
1404                 "themselves may leak information. Consider using Socks4A "
1405                 "(e.g. via privoxy or socat) instead. For more information, "
1406                 "please see http://wiki.noreply.org/noreply/TheOnionRouter/"
1407                 "TorFAQ#SOCKSAndDNS.%s", req->port,
1408                 safe_socks ? " Rejecting." : "");
1409             /*have_warned_about_unsafe_socks = 1;*/
1410                                       /*(for now, warn every time)*/
1411             control_event_client_status(LOG_WARN,
1412                           "DANGEROUS_SOCKS PROTOCOL=SOCKS5 ADDRESS=%s:%d",
1413                           req->address, req->port);
1414             if (safe_socks)
1415               return -1;
1416           }
1417           return 1;
1418         }
1419         case 3: /* fqdn */
1420           log_debug(LD_APP,"socks5: fqdn address type");
1421           if (req->command == SOCKS_COMMAND_RESOLVE_PTR) {
1422             log_warn(LD_APP, "socks5 received RESOLVE_PTR command with "
1423                      "hostname type. Rejecting.");
1424             return -1;
1425           }
1426           len = (unsigned char)*(buf->head->data+4);
1427           if (buf->datalen < 7+len) /* addr/port there? */
1428             return 0; /* not yet */
1429           buf_pullup(buf, 7+len, 0);
1430           tor_assert(buf->head->datalen >= 7+len);
1431           if (len+1 > MAX_SOCKS_ADDR_LEN) {
1432             log_warn(LD_APP,
1433                      "socks5 hostname is %d bytes, which doesn't fit in "
1434                      "%d. Rejecting.", len+1,MAX_SOCKS_ADDR_LEN);
1435             return -1;
1436           }
1437           memcpy(req->address,buf->head->data+5,len);
1438           req->address[len] = 0;
1439           req->port = ntohs(get_uint16(buf->head->data+5+len));
1440           buf_remove_from_front(buf, 5+len+2);
1441           if (!tor_strisprint(req->address) || strchr(req->address,'\"')) {
1442             log_warn(LD_PROTOCOL,
1443                      "Your application (using socks5 to port %d) gave Tor "
1444                      "a malformed hostname: %s. Rejecting the connection.",
1445                      req->port, escaped(req->address));
1446             return -1;
1447           }
1448           if (log_sockstype)
1449             log_notice(LD_APP,
1450                   "Your application (using socks5 to port %d) gave "
1451                   "Tor a hostname, which means Tor will do the DNS resolve "
1452                   "for you. This is good.", req->port);
1453           return 1;
1454         default: /* unsupported */
1455           log_warn(LD_APP,"socks5: unsupported address type %d. Rejecting.",
1456                    (int) *(buf->head->data+3));
1457           return -1;
1458       }
1459       tor_assert(0);
1460     case 4: /* socks4 */
1461       /* http://archive.socks.permeo.com/protocol/socks4.protocol */
1462       /* http://archive.socks.permeo.com/protocol/socks4a.protocol */
1463
1464       req->socks_version = 4;
1465       if (buf->datalen < SOCKS4_NETWORK_LEN) /* basic info available? */
1466         return 0; /* not yet */
1467       buf_pullup(buf, 1280, 0);
1468       req->command = (unsigned char) *(buf->head->data+1);
1469       if (req->command != SOCKS_COMMAND_CONNECT &&
1470           req->command != SOCKS_COMMAND_RESOLVE) {
1471         /* not a connect or resolve? we don't support it. (No resolve_ptr with
1472          * socks4.) */
1473         log_warn(LD_APP,"socks4: command %d not recognized. Rejecting.",
1474                  req->command);
1475         return -1;
1476       }
1477
1478       req->port = ntohs(*(uint16_t*)(buf->head->data+2));
1479       destip = ntohl(*(uint32_t*)(buf->head->data+4));
1480       if ((!req->port && req->command!=SOCKS_COMMAND_RESOLVE) || !destip) {
1481         log_warn(LD_APP,"socks4: Port or DestIP is zero. Rejecting.");
1482         return -1;
1483       }
1484       if (destip >> 8) {
1485         log_debug(LD_APP,"socks4: destip not in form 0.0.0.x.");
1486         in.s_addr = htonl(destip);
1487         tor_inet_ntoa(&in,tmpbuf,sizeof(tmpbuf));
1488         if (strlen(tmpbuf)+1 > MAX_SOCKS_ADDR_LEN) {
1489           log_debug(LD_APP,"socks4 addr (%d bytes) too long. Rejecting.",
1490                     (int)strlen(tmpbuf));
1491           return -1;
1492         }
1493         log_debug(LD_APP,
1494                   "socks4: successfully read destip (%s)", safe_str(tmpbuf));
1495         socks4_prot = socks4;
1496       }
1497
1498       next = memchr(buf->head->data+SOCKS4_NETWORK_LEN, 0,
1499                     buf->head->datalen-SOCKS4_NETWORK_LEN);
1500       if (!next) {
1501         if (buf->head->datalen >= 1024) {
1502           log_debug(LD_APP, "Socks4 user name too long; rejecting.");
1503           return -1;
1504         }
1505         log_debug(LD_APP,"socks4: Username not here yet.");
1506         return 0;
1507       }
1508       tor_assert(next < CHUNK_WRITE_PTR(buf->head));
1509
1510       startaddr = NULL;
1511       if (socks4_prot != socks4a &&
1512           !addressmap_have_mapping(tmpbuf,0) &&
1513           !have_warned_about_unsafe_socks) {
1514         log_warn(LD_APP,
1515                  "Your application (using socks4 to port %d) is giving Tor "
1516                  "only an IP address. Applications that do DNS resolves "
1517                  "themselves may leak information. Consider using Socks4A "
1518                  "(e.g. via privoxy or socat) instead. For more information, "
1519                  "please see http://wiki.noreply.org/noreply/TheOnionRouter/"
1520                  "TorFAQ#SOCKSAndDNS.%s", req->port,
1521                  safe_socks ? " Rejecting." : "");
1522         /*have_warned_about_unsafe_socks = 1;*/  /*(for now, warn every time)*/
1523         control_event_client_status(LOG_WARN,
1524                         "DANGEROUS_SOCKS PROTOCOL=SOCKS4 ADDRESS=%s:%d",
1525                         tmpbuf, req->port);
1526         if (safe_socks)
1527           return -1;
1528       }
1529       if (socks4_prot == socks4a) {
1530         if (next+1 == CHUNK_WRITE_PTR(buf->head)) {
1531           log_debug(LD_APP,"socks4: No part of destaddr here yet.");
1532           return 0;
1533         }
1534         startaddr = next+1;
1535         next = memchr(startaddr, 0, CHUNK_WRITE_PTR(buf->head)-startaddr);
1536         if (!next) {
1537           if (buf->head->datalen >= 1024) {
1538             log_debug(LD_APP,"socks4: Destaddr too long.");
1539             return -1;
1540           }
1541           log_debug(LD_APP,"socks4: Destaddr not all here yet.");
1542           return 0;
1543         }
1544         if (MAX_SOCKS_ADDR_LEN <= next-startaddr) {
1545           log_warn(LD_APP,"socks4: Destaddr too long. Rejecting.");
1546           return -1;
1547         }
1548         // tor_assert(next < buf->cur+buf->datalen);
1549
1550         if (log_sockstype)
1551           log_notice(LD_APP,
1552                      "Your application (using socks4a to port %d) gave "
1553                      "Tor a hostname, which means Tor will do the DNS resolve "
1554                      "for you. This is good.", req->port);
1555       }
1556       log_debug(LD_APP,"socks4: Everything is here. Success.");
1557       strlcpy(req->address, startaddr ? startaddr : tmpbuf,
1558               sizeof(req->address));
1559       if (!tor_strisprint(req->address) || strchr(req->address,'\"')) {
1560         log_warn(LD_PROTOCOL,
1561                  "Your application (using socks4 to port %d) gave Tor "
1562                  "a malformed hostname: %s. Rejecting the connection.",
1563                  req->port, escaped(req->address));
1564         return -1;
1565       }
1566       /* next points to the final \0 on inbuf */
1567       buf_remove_from_front(buf, next - buf->head->data + 1);
1568       return 1;
1569
1570     case 'G': /* get */
1571     case 'H': /* head */
1572     case 'P': /* put/post */
1573     case 'C': /* connect */
1574       strlcpy(req->reply,
1575 "HTTP/1.0 501 Tor is not an HTTP Proxy\r\n"
1576 "Content-Type: text/html; charset=iso-8859-1\r\n\r\n"
1577 "<html>\n"
1578 "<head>\n"
1579 "<title>Tor is not an HTTP Proxy</title>\n"
1580 "</head>\n"
1581 "<body>\n"
1582 "<h1>Tor is not an HTTP Proxy</h1>\n"
1583 "<p>\n"
1584 "It appears you have configured your web browser to use Tor as an HTTP proxy."
1585 "\n"
1586 "This is not correct: Tor is a SOCKS proxy, not an HTTP proxy.\n"
1587 "Please configure your client accordingly.\n"
1588 "</p>\n"
1589 "<p>\n"
1590 "See <a href=\"https://www.torproject.org/documentation.html\">"
1591            "https://www.torproject.org/documentation.html</a> for more "
1592            "information.\n"
1593 "<!-- Plus this comment, to make the body response more than 512 bytes, so "
1594 "     IE will be willing to display it. Comment comment comment comment "
1595 "     comment comment comment comment comment comment comment comment.-->\n"
1596 "</p>\n"
1597 "</body>\n"
1598 "</html>\n"
1599              , MAX_SOCKS_REPLY_LEN);
1600       req->replylen = strlen(req->reply)+1;
1601       /* fall through */
1602     default: /* version is not socks4 or socks5 */
1603       log_warn(LD_APP,
1604                "Socks version %d not recognized. (Tor is not an http proxy.)",
1605                *(buf->head->data));
1606       {
1607         char *tmp = tor_strndup(buf->head->data, 8); /*XXXX what if longer?*/
1608         control_event_client_status(LOG_WARN,
1609                                     "SOCKS_UNKNOWN_PROTOCOL DATA=\"%s\"",
1610                                     escaped(tmp));
1611         tor_free(tmp);
1612       }
1613       return -1;
1614   }
1615 }
1616
1617 /** Return 1 iff buf looks more like it has an (obsolete) v0 controller
1618  * command on it than any valid v1 controller command. */
1619 int
1620 peek_buf_has_control0_command(buf_t *buf)
1621 {
1622   if (buf->datalen >= 4) {
1623     char header[4];
1624     uint16_t cmd;
1625     peek_from_buf(header, sizeof(header), buf);
1626     cmd = ntohs(get_uint16(header+2));
1627     if (cmd <= 0x14)
1628       return 1; /* This is definitely not a v1 control command. */
1629   }
1630   return 0;
1631 }
1632
1633 /** Return the index within <b>buf</b> at which <b>ch</b> first appears,
1634  * or -1 if <b>ch</b> does not appear on buf. */
1635 static off_t
1636 buf_find_offset_of_char(buf_t *buf, char ch)
1637 {
1638   chunk_t *chunk;
1639   off_t offset = 0;
1640   for (chunk = buf->head; chunk; chunk = chunk->next) {
1641     char *cp = memchr(chunk->data, ch, chunk->datalen);
1642     if (cp)
1643       return offset + (cp - chunk->data);
1644     else
1645       offset += chunk->datalen;
1646   }
1647   return -1;
1648 }
1649
1650 /** Try to read a single LF-terminated line from <b>buf</b>, and write it,
1651  * NUL-terminated, into the *<b>data_len</b> byte buffer at <b>data_out</b>.
1652  * Set *<b>data_len</b> to the number of bytes in the line, not counting the
1653  * terminating NUL.  Return 1 if we read a whole line, return 0 if we don't
1654  * have a whole line yet, and return -1 if the line length exceeds
1655  * *<b>data_len</b>.
1656  */
1657 int
1658 fetch_from_buf_line(buf_t *buf, char *data_out, size_t *data_len)
1659 {
1660   size_t sz;
1661   off_t offset;
1662
1663   if (!buf->head)
1664     return 0;
1665
1666   offset = buf_find_offset_of_char(buf, '\n');
1667   if (offset < 0)
1668     return 0;
1669   sz = (size_t) offset;
1670   if (sz+2 > *data_len) {
1671     *data_len = sz + 2;
1672     return -1;
1673   }
1674   fetch_from_buf(data_out, sz+1, buf);
1675   data_out[sz+1] = '\0';
1676   *data_len = sz+1;
1677   return 1;
1678 }
1679
1680 /** Compress on uncompress the <b>data_len</b> bytes in <b>data</b> using the
1681  * zlib state <b>state</b>, appending the result to <b>buf</b>.  If
1682  * <b>done</b> is true, flush the data in the state and finish the
1683  * compression/uncompression.  Return -1 on failure, 0 on success. */
1684 int
1685 write_to_buf_zlib(buf_t *buf, tor_zlib_state_t *state,
1686                   const char *data, size_t data_len,
1687                   int done)
1688 {
1689   char *next;
1690   size_t old_avail, avail;
1691   int over = 0;
1692   do {
1693     int need_new_chunk = 0;
1694     if (!buf->tail || ! CHUNK_REMAINING_CAPACITY(buf->tail)) {
1695       size_t cap = data_len / 4;
1696       buf_add_chunk_with_capacity(buf, cap, 1);
1697     }
1698     next = CHUNK_WRITE_PTR(buf->tail);
1699     avail = old_avail = CHUNK_REMAINING_CAPACITY(buf->tail);
1700     switch (tor_zlib_process(state, &next, &avail, &data, &data_len, done)) {
1701       case TOR_ZLIB_DONE:
1702         over = 1;
1703         break;
1704       case TOR_ZLIB_ERR:
1705         return -1;
1706       case TOR_ZLIB_OK:
1707         if (data_len == 0)
1708           over = 1;
1709         break;
1710       case TOR_ZLIB_BUF_FULL:
1711         if (avail) {
1712           /* Zlib says we need more room (ZLIB_BUF_FULL).  Start a new chunk
1713            * automatically, whether were going to or not. */
1714           need_new_chunk = 1;
1715         }
1716         break;
1717     }
1718     buf->datalen += old_avail - avail;
1719     buf->tail->datalen += old_avail - avail;
1720     if (need_new_chunk) {
1721       buf_add_chunk_with_capacity(buf, data_len/4, 1);
1722     }
1723
1724   } while (!over);
1725   check();
1726   return 0;
1727 }
1728
1729 /** Log an error and exit if <b>buf</b> is corrupted.
1730  */
1731 void
1732 assert_buf_ok(buf_t *buf)
1733 {
1734   tor_assert(buf);
1735   tor_assert(buf->magic == BUFFER_MAGIC);
1736
1737   if (! buf->head) {
1738     tor_assert(!buf->tail);
1739     tor_assert(buf->datalen == 0);
1740   } else {
1741     chunk_t *ch;
1742     size_t total = 0;
1743     tor_assert(buf->tail);
1744     for (ch = buf->head; ch; ch = ch->next) {
1745       total += ch->datalen;
1746       tor_assert(ch->datalen <= ch->memlen);
1747       tor_assert(ch->data >= &ch->mem[0]);
1748       tor_assert(ch->data < &ch->mem[0]+ch->memlen);
1749       tor_assert(ch->data+ch->datalen <= &ch->mem[0] + ch->memlen);
1750       if (!ch->next)
1751         tor_assert(ch == buf->tail);
1752     }
1753     tor_assert(buf->datalen == total);
1754   }
1755 }
1756
1757 #ifdef ENABLE_BUF_FREELISTS
1758 /** Log an error and exit if <b>fl</b> is corrupted.
1759  */
1760 static void
1761 assert_freelist_ok(chunk_freelist_t *fl)
1762 {
1763   chunk_t *ch;
1764   int n;
1765   tor_assert(fl->alloc_size > 0);
1766   n = 0;
1767   for (ch = fl->head; ch; ch = ch->next) {
1768     tor_assert(CHUNK_ALLOC_SIZE(ch->memlen) == fl->alloc_size);
1769     ++n;
1770   }
1771   tor_assert(n == fl->cur_length);
1772   tor_assert(n >= fl->lowest_length);
1773   tor_assert(n <= fl->max_length);
1774 }
1775 #endif
1776