src/or/buffers.c

   1 /* Copyright (c) 2001 Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2008, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file buffers.c
   9  * \brief Implements a generic interface buffer.  Buffers are
  10  * fairly opaque string holders that can read to or flush from:
  11  * memory, file descriptors, or TLS connections.
  12  **/
  13 #define BUFFERS_PRIVATE
  14 #include "or.h"
  15 #ifdef HAVE_UNISTD_H
  16 #include <unistd.h>
  17 #endif
  18 #ifdef HAVE_SYS_UIO_H
  19 #include <sys/uio.h>
  20 #endif
  21
  22 //#define PARANOIA
  23
  24 #ifdef PARANOIA
  25 /** Helper: If PARANOIA is defined, assert that the buffer in local variable
  26  * <b>buf</b> is well-formed. */
  27 #define check() STMT_BEGIN assert_buf_ok(buf); STMT_END
  28 #else
  29 #define check() STMT_NIL
  30 #endif
  31
  32 /* Implementation notes:
  33  *
  34  * After flirting with memmove, and dallying with ring-buffers, we're finally
  35  * getting up to speed with the 1970s and implementing buffers as a linked
  36  * list of small chunks.  Each buffer has such a list; data is removed from
  37  * the head of the list, and added at the tail.  The list is singly linked,
  38  * and the buffer keeps a pointer to the head and the tail.
  39  *
  40  * Every chunk, except the tail, contains at least one byte of data.  Data in
  41  * each chunk is contiguous.
  42  *
  43  * When you need to treat the first N characters on a buffer as a contiguous
  44  * string, use the buf_pullup function to make them so.  Don't do this more
  45  * than necessary.
  46  *
  47  * The major free Unix kernels have handled buffers like this since, like,
  48  * forever.
  49  */
  50
  51 /* Chunk manipulation functions */
  52
  53 /** A single chunk on a buffer or in a freelist. */
  54 typedef struct chunk_t {
  55   struct chunk_t *next; /**< The next chunk on the buffer or freelist. */
  56   size_t datalen; /**< The number of bytes stored in this chunk */
  57   size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */
  58   char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */
  59   char mem[1]; /**< The actual memory used for storage in this chunk. May be
  60                 * more than one byte long. */
  61 } chunk_t;
  62
  63 #define CHUNK_HEADER_LEN STRUCT_OFFSET(chunk_t, mem[0])
  64
  65 /** Return the number of bytes needed to allocate a chunk to hold
  66  * <b>memlen</b> bytes. */
  67 #define CHUNK_ALLOC_SIZE(memlen) (CHUNK_HEADER_LEN + (memlen))
  68 /** Return the number of usable bytes in a chunk allocated with
  69  * malloc(<b>memlen</b>). */
  70 #define CHUNK_SIZE_WITH_ALLOC(memlen) ((memlen) - CHUNK_HEADER_LEN)
  71
  72 /** Return the next character in <b>chunk</b> onto which data can be appended.
  73  * If the chunk is full, this might be off the end of chunk->mem. */
  74 static INLINE char *
  75 CHUNK_WRITE_PTR(chunk_t *chunk)
  76 {
  77   return chunk->data + chunk->datalen;
  78 }
  79
  80 /** Return the number of bytes that can be written onto <b>chunk</b> without
  81  * running out of space. */
  82 static INLINE size_t
  83 CHUNK_REMAINING_CAPACITY(const chunk_t *chunk)
  84 {
  85   return (chunk->mem + chunk->memlen) - (chunk->data + chunk->datalen);
  86 }
  87
  88 /** Move all bytes stored in <b>chunk</b> to the front of <b>chunk</b>->mem,
  89  * to free up space at the end. */
  90 static INLINE void
  91 chunk_repack(chunk_t *chunk)
  92 {
  93   if (chunk->datalen && chunk->data != &chunk->mem[0]) {
  94     memmove(chunk->mem, chunk->data, chunk->datalen);
  95   }
  96   chunk->data = &chunk->mem[0];
  97 }
  98
  99 #ifdef ENABLE_BUF_FREELISTS
 100 /** A freelist of chunks. */
 101 typedef struct chunk_freelist_t {
 102   size_t alloc_size; /**< What size chunks does this freelist hold? */
 103   int max_length; /**< Never allow more than this number of chunks in the
 104                    * freelist. */
 105   int slack; /**< When trimming the freelist, leave this number of extra
 106               * chunks beyond lowest_length.*/
 107   int cur_length; /**< How many chunks on the freelist now? */
 108   int lowest_length; /**< What's the smallest value of cur_length since the
 109                       * last time we cleaned this freelist? */
 110   uint64_t n_alloc;
 111   uint64_t n_free;
 112   uint64_t n_hit;
 113   chunk_t *head; /**< First chunk on the freelist. */
 114 } chunk_freelist_t;
 115
 116 /** Macro to help define freelists. */
 117 #define FL(a,m,s) { a, m, s, 0, 0, 0, 0, 0, NULL }
 118
 119 /** Static array of freelists, sorted by alloc_len, terminated by an entry
 120  * with alloc_size of 0. */
 121 static chunk_freelist_t freelists[] = {
 122   FL(4096, 256, 8), FL(8192, 128, 4), FL(16384, 64, 4), FL(32768, 32, 2),
 123   FL(0, 0, 0)
 124 };
 125 #undef FL
 126 /** How many times have we looked for a chunk of a size that no freelist
 127  * could help with? */
 128 static uint64_t n_freelist_miss = 0;
 129
 130 static void assert_freelist_ok(chunk_freelist_t *fl);
 131
 132 /** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if
 133  * no freelist exists for that size. */
 134 static INLINE chunk_freelist_t *
 135 get_freelist(size_t alloc)
 136 {
 137   int i;
 138   for (i=0; freelists[i].alloc_size <= alloc; ++i) {
 139     if (freelists[i].alloc_size == alloc) {
 140       return &freelists[i];
 141     }
 142   }
 143   return NULL;
 144 }
 145
 146 /** Deallocate a chunk or put it on a freelist */
 147 static void
 148 chunk_free(chunk_t *chunk)
 149 {
 150   size_t alloc = CHUNK_ALLOC_SIZE(chunk->memlen);
 151   chunk_freelist_t *freelist = get_freelist(alloc);
 152   if (freelist && freelist->cur_length < freelist->max_length) {
 153     chunk->next = freelist->head;
 154     freelist->head = chunk;
 155     ++freelist->cur_length;
 156   } else {
 157     if (freelist)
 158       ++freelist->n_free;
 159     tor_free(chunk);
 160   }
 161 }
 162
 163 /** Allocate a new chunk with a given allocation size, or get one from the
 164  * freelist.  Note that a chunk with allocation size A can actualy hold only
 165  * CHUNK_SIZE_WITH_ALLOC(A) bytes in its mem field. */
 166 static INLINE chunk_t *
 167 chunk_new_with_alloc_size(size_t alloc)
 168 {
 169   chunk_t *ch;
 170   chunk_freelist_t *freelist;
 171   tor_assert(alloc >= sizeof(chunk_t));
 172   freelist = get_freelist(alloc);
 173   if (freelist && freelist->head) {
 174     ch = freelist->head;
 175     freelist->head = ch->next;
 176     if (--freelist->cur_length < freelist->lowest_length)
 177       freelist->lowest_length = freelist->cur_length;
 178     ++freelist->n_hit;
 179   } else {
 180     /* XXXX take advantage of tor_malloc_roundup, once we know how that
 181      * affects freelists. */
 182     if (freelist)
 183       ++freelist->n_alloc;
 184     else
 185       ++n_freelist_miss;
 186     ch = tor_malloc(alloc);
 187   }
 188   ch->next = NULL;
 189   ch->datalen = 0;
 190   ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
 191   ch->data = &ch->mem[0];
 192   return ch;
 193 }
 194 #else
 195 static void
 196 chunk_free(chunk_t *chunk)
 197 {
 198   tor_free(chunk);
 199 }
 200 static INLINE chunk_t *
 201 chunk_new_with_alloc_size(size_t alloc)
 202 {
 203   chunk_t *ch;
 204   ch = tor_malloc_roundup(&alloc);
 205   ch->next = NULL;
 206   ch->datalen = 0;
 207   ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
 208   ch->data = &ch->mem[0];
 209   return ch;
 210 }
 211 #endif
 212
 213 /** Expand <b>chunk</b> until it can hold <b>sz</b> bytes, and return a
 214  * new pointer to <b>chunk</b>.  Old pointers are no longer valid. */
 215 static INLINE chunk_t *
 216 chunk_grow(chunk_t *chunk, size_t sz)
 217 {
 218   off_t offset;
 219   tor_assert(sz > chunk->memlen);
 220   offset = chunk->data - chunk->mem;
 221   chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz));
 222   chunk->memlen = sz;
 223   chunk->data = chunk->mem + offset;
 224   return chunk;
 225 }
 226
 227 /** If a read onto the end of a chunk would be smaller than this number, then
 228  * just start a new chunk. */
 229 #define MIN_READ_LEN 8
 230 /** Every chunk should take up at least this many bytes. */
 231 #define MIN_CHUNK_ALLOC 256
 232 /** No chunk should take up more than this many bytes. */
 233 #define MAX_CHUNK_ALLOC 65536
 234
 235 /** Return the allocation size we'd like to use to hold <b>target</b>
 236  * bytes. */
 237 static INLINE size_t
 238 preferred_chunk_size(size_t target)
 239 {
 240   size_t sz = MIN_CHUNK_ALLOC;
 241   while (CHUNK_SIZE_WITH_ALLOC(sz) < target) {
 242     sz <<= 1;
 243   }
 244   return sz;
 245 }
 246
 247 /** Remove from the freelists most chunks that have not been used since the
 248  * last call to buf_shrink_freelists(). */
 249 void
 250 buf_shrink_freelists(int free_all)
 251 {
 252 #ifdef ENABLE_BUF_FREELISTS
 253   int i;
 254   for (i = 0; freelists[i].alloc_size; ++i) {
 255     int slack = freelists[i].slack;
 256     assert_freelist_ok(&freelists[i]);
 257     if (free_all || freelists[i].lowest_length > slack) {
 258       int n_to_free = free_all ? freelists[i].cur_length :
 259         (freelists[i].lowest_length - slack);
 260       int n_to_skip = freelists[i].cur_length - n_to_free;
 261       int new_length = n_to_skip;
 262       chunk_t **chp = &freelists[i].head;
 263       chunk_t *chunk;
 264       log_info(LD_MM, "Cleaning freelist for %d-byte chunks: keeping %d, "
 265                "dropping %d.",
 266                (int)freelists[i].alloc_size, n_to_skip, n_to_free);
 267       while (n_to_skip) {
 268         tor_assert((*chp)->next);
 269         chp = &(*chp)->next;
 270         --n_to_skip;
 271       }
 272       chunk = *chp;
 273       *chp = NULL;
 274       while (chunk) {
 275         chunk_t *next = chunk->next;
 276         tor_free(chunk);
 277         chunk = next;
 278         --n_to_free;
 279         ++freelists[i].n_free;
 280       }
 281       tor_assert(!n_to_free);
 282       freelists[i].cur_length = new_length;
 283     }
 284     freelists[i].lowest_length = freelists[i].cur_length;
 285     assert_freelist_ok(&freelists[i]);
 286   }
 287 #else
 288   (void) free_all;
 289 #endif
 290 }
 291
 292 /** Describe the current status of the freelists at log level <b>severity</b>.
 293  */
 294 void
 295 buf_dump_freelist_sizes(int severity)
 296 {
 297 #ifdef ENABLE_BUF_FREELISTS
 298   int i;
 299   log(severity, LD_MM, "====== Buffer freelists:");
 300   for (i = 0; freelists[i].alloc_size; ++i) {
 301     uint64_t total = ((uint64_t)freelists[i].cur_length) *
 302       freelists[i].alloc_size;
 303     log(severity, LD_MM,
 304         U64_FORMAT" bytes in %d %d-byte chunks ["U64_FORMAT
 305         " misses; "U64_FORMAT" frees; "U64_FORMAT" hits]",
 306         U64_PRINTF_ARG(total),
 307         freelists[i].cur_length, (int)freelists[i].alloc_size,
 308         U64_PRINTF_ARG(freelists[i].n_alloc),
 309         U64_PRINTF_ARG(freelists[i].n_free),
 310         U64_PRINTF_ARG(freelists[i].n_hit));
 311   }
 312   log(severity, LD_MM, U64_FORMAT" allocations in non-freelist sizes",
 313       U64_PRINTF_ARG(n_freelist_miss));
 314 #else
 315   (void)severity;
 316 #endif
 317 }
 318
 319 /** Magic value for buf_t.magic, to catch pointer errors. */
 320 #define BUFFER_MAGIC 0xB0FFF312u
 321 /** A resizeable buffer, optimized for reading and writing. */
 322 struct buf_t {
 323   uint32_t magic; /**< Magic cookie for debugging: Must be set to
 324                    *   BUFFER_MAGIC. */
 325   size_t datalen; /**< How many bytes is this buffer holding right now? */
 326   size_t default_chunk_size; /**< Don't allocate any chunks smaller than
 327                               * this for this buffer. */
 328   chunk_t *head; /**< First chunk in the list, or NULL for none. */
 329   chunk_t *tail; /**< Last chunk in the list, or NULL for none. */
 330 };
 331
 332 /** Collapse data from the first N chunks from <b>buf</b> into buf->head,
 333  * growing it as necessary, until buf->head has the first <b>bytes</b> bytes
 334  * of data from the buffer, or until buf->head has all the data in <b>buf</b>.
 335  *
 336  * If <b>nulterminate</b> is true, ensure that there is a 0 byte in
 337  * buf->head->mem right after all the data. */
 338 static void
 339 buf_pullup(buf_t *buf, size_t bytes, int nulterminate)
 340 {
 341   chunk_t *dest, *src;
 342   size_t capacity;
 343   if (!buf->head)
 344     return;
 345
 346   check();
 347   if (buf->datalen < bytes)
 348     bytes = buf->datalen;
 349
 350   if (nulterminate) {
 351     capacity = bytes + 1;
 352     if (buf->head->datalen >= bytes && CHUNK_REMAINING_CAPACITY(buf->head)) {
 353       *CHUNK_WRITE_PTR(buf->head) = '\0';
 354       return;
 355     }
 356   } else {
 357     capacity = bytes;
 358     if (buf->head->datalen >= bytes)
 359       return;
 360   }
 361
 362   if (buf->head->memlen >= capacity) {
 363     /* We don't need to grow the first chunk, but we might need to repack it.*/
 364     if (CHUNK_REMAINING_CAPACITY(buf->head) < capacity-buf->datalen)
 365       chunk_repack(buf->head);
 366     tor_assert(CHUNK_REMAINING_CAPACITY(buf->head) >= capacity-buf->datalen);
 367   } else {
 368     chunk_t *newhead;
 369     size_t newsize;
 370     /* We need to grow the chunk. */
 371     chunk_repack(buf->head);
 372     newsize = CHUNK_SIZE_WITH_ALLOC(preferred_chunk_size(capacity));
 373     newhead = chunk_grow(buf->head, newsize);
 374     tor_assert(newhead->memlen >= capacity);
 375     if (newhead != buf->head) {
 376       if (buf->tail == buf->head)
 377         buf->tail = newhead;
 378       buf->head = newhead;
 379     }
 380   }
 381
 382   dest = buf->head;
 383   while (dest->datalen < bytes) {
 384     size_t n = bytes - dest->datalen;
 385     src = dest->next;
 386     tor_assert(src);
 387     if (n > src->datalen) {
 388       memcpy(CHUNK_WRITE_PTR(dest), src->data, src->datalen);
 389       dest->datalen += src->datalen;
 390       dest->next = src->next;
 391       if (buf->tail == src)
 392         buf->tail = dest;
 393       chunk_free(src);
 394     } else {
 395       memcpy(CHUNK_WRITE_PTR(dest), src->data, n);
 396       dest->datalen += n;
 397       src->data += n;
 398       src->datalen -= n;
 399       tor_assert(dest->datalen == bytes);
 400     }
 401   }
 402
 403   if (nulterminate) {
 404     tor_assert(CHUNK_REMAINING_CAPACITY(buf->head));
 405     *CHUNK_WRITE_PTR(buf->head) = '\0';
 406   }
 407
 408   check();
 409 }
 410
 411 /** Resize buf so it won't hold extra memory that we haven't been
 412  * using lately.
 413  */
 414 void
 415 buf_shrink(buf_t *buf)
 416 {
 417   (void)buf;
 418 }
 419
 420 /** Remove the first <b>n</b> bytes from buf. */
 421 static INLINE void
 422 buf_remove_from_front(buf_t *buf, size_t n)
 423 {
 424   tor_assert(buf->datalen >= n);
 425   while (n) {
 426     tor_assert(buf->head);
 427     if (buf->head->datalen > n) {
 428       buf->head->datalen -= n;
 429       buf->head->data += n;
 430       buf->datalen -= n;
 431       return;
 432     } else {
 433       chunk_t *victim = buf->head;
 434       n -= victim->datalen;
 435       buf->datalen -= victim->datalen;
 436       buf->head = victim->next;
 437       if (buf->tail == victim)
 438         buf->tail = NULL;
 439       chunk_free(victim);
 440     }
 441   }
 442   check();
 443 }
 444
 445 /** Create and return a new buf with default chunk capacity <b>size</b>.
 446  */
 447 buf_t *
 448 buf_new_with_capacity(size_t size)
 449 {
 450   buf_t *b = buf_new();
 451   b->default_chunk_size = preferred_chunk_size(size);
 452   return b;
 453 }
 454
 455 /** Allocate and return a new buffer with default capacity. */
 456 buf_t *
 457 buf_new(void)
 458 {
 459   buf_t *buf = tor_malloc_zero(sizeof(buf_t));
 460   buf->magic = BUFFER_MAGIC;
 461   buf->default_chunk_size = 4096;
 462   return buf;
 463 }
 464
 465 /** Remove all data from <b>buf</b>. */
 466 void
 467 buf_clear(buf_t *buf)
 468 {
 469   chunk_t *chunk, *next;
 470   buf->datalen = 0;
 471   for (chunk = buf->head; chunk; chunk = next) {
 472     next = chunk->next;
 473     chunk_free(chunk);
 474   }
 475   buf->head = buf->tail = NULL;
 476 }
 477
 478 /** Return the number of bytes stored in <b>buf</b> */
 479 size_t
 480 buf_datalen(const buf_t *buf)
 481 {
 482   return buf->datalen;
 483 }
 484
 485 /** Return the total length of all chunks used in <b>buf</b>. */
 486 size_t
 487 buf_allocation(const buf_t *buf)
 488 {
 489   size_t total = 0;
 490   const chunk_t *chunk;
 491   for (chunk = buf->head; chunk; chunk = chunk->next) {
 492     total += chunk->memlen;
 493   }
 494   return total;
 495 }
 496
 497 /** Return the number of bytes that can be added to <b>buf</b> without
 498  * performing any additional allocation. */
 499 size_t
 500 buf_slack(const buf_t *buf)
 501 {
 502   if (!buf->tail)
 503     return 0;
 504   else
 505     return CHUNK_REMAINING_CAPACITY(buf->tail);
 506 }
 507
 508 /** Release storage held by <b>buf</b>. */
 509 void
 510 buf_free(buf_t *buf)
 511 {
 512   buf_clear(buf);
 513   buf->magic = 0xdeadbeef;
 514   tor_free(buf);
 515 }
 516
 517 /** Append a new chunk with enough capacity to hold <b>capacity</b> bytes to
 518  * the tail of <b>buf</b>.  If <b>capped</b>, don't allocate a chunk bigger
 519  * than MAX_CHUNK_ALLOC. */
 520 static chunk_t *
 521 buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
 522 {
 523   chunk_t *chunk;
 524   if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) {
 525     chunk = chunk_new_with_alloc_size(buf->default_chunk_size);
 526   } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) {
 527     chunk = chunk_new_with_alloc_size(MAX_CHUNK_ALLOC);
 528   } else {
 529     chunk = chunk_new_with_alloc_size(preferred_chunk_size(capacity));
 530   }
 531   if (buf->tail) {
 532     tor_assert(buf->head);
 533     buf->tail->next = chunk;
 534     buf->tail = chunk;
 535   } else {
 536     tor_assert(!buf->head);
 537     buf->head = buf->tail = chunk;
 538   }
 539   check();
 540   return chunk;
 541 }
 542
 543 /** If we're using readv and writev, how many chunks are we willing to
 544  * read/write at a time? */
 545 #define N_IOV 3
 546
 547 /** Read up to <b>at_most</b> bytes from the socket <b>fd</b> into
 548  * <b>chunk</b> (which must be on <b>buf</b>). If we get an EOF, set
 549  * *<b>reached_eof</b> to 1.  Return -1 on error, 0 on eof or blocking,
 550  * and the number of bytes read otherwise. */
 551 static INLINE int
 552 read_to_chunk(buf_t *buf, chunk_t *chunk, int fd, size_t at_most,
 553               int *reached_eof, int *socket_error)
 554 {
 555   ssize_t read_result;
 556 #if 0 && defined(HAVE_READV) && !defined(WIN32)
 557   struct iovec iov[N_IOV];
 558   int i;
 559   size_t remaining = at_most;
 560   for (i=0; chunk && i < N_IOV && remaining; ++i) {
 561     iov[i].iov_base = CHUNK_WRITE_PTR(chunk);
 562     if (remaining > CHUNK_REMAINING_CAPACITY(chunk))
 563       iov[i].iov_len = CHUNK_REMAINING_CAPACITY(chunk);
 564     else
 565       iov[i].iov_len = remaining;
 566     remaining -= iov[i].iov_len;
 567     chunk = chunk->next;
 568   }
 569   read_result = readv(fd, iov, i);
 570 #else
 571   if (at_most > CHUNK_REMAINING_CAPACITY(chunk))
 572     at_most = CHUNK_REMAINING_CAPACITY(chunk);
 573   read_result = tor_socket_recv(fd, CHUNK_WRITE_PTR(chunk), at_most, 0);
 574 #endif
 575
 576   if (read_result < 0) {
 577     int e = tor_socket_errno(fd);
 578     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
 579 #ifdef MS_WINDOWS
 580       if (e == WSAENOBUFS)
 581         log_warn(LD_NET,"recv() failed: WSAENOBUFS. Not enough ram?");
 582 #endif
 583       *socket_error = e;
 584       return -1;
 585     }
 586     return 0; /* would block. */
 587   } else if (read_result == 0) {
 588     log_debug(LD_NET,"Encountered eof on fd %d", (int)fd);
 589     *reached_eof = 1;
 590     return 0;
 591   } else { /* actually got bytes. */
 592     buf->datalen += read_result;
 593 #if 0 && defined(HAVE_READV) && !defined(WIN32)
 594     while ((size_t)read_result > CHUNK_REMAINING_CAPACITY(chunk)) {
 595       chunk->datalen += CHUNK_REMAINING_CAPACITY(chunk);
 596       read_result -= CHUNK_REMAINING_CAPACITY(chunk);
 597       chunk = chunk->next;
 598       tor_assert(chunk);
 599     }
 600 #endif
 601     chunk->datalen += read_result;
 602     log_debug(LD_NET,"Read %ld bytes. %d on inbuf.", (long)read_result,
 603               (int)buf->datalen);
 604     tor_assert(read_result < INT_MAX);
 605     return (int)read_result;
 606   }
 607 }
 608
 609 /** As read_to_chunk(), but return (negative) error code on error, blocking,
 610  * or TLS, and the number of bytes read otherwise. */
 611 static INLINE int
 612 read_to_chunk_tls(buf_t *buf, chunk_t *chunk, tor_tls_t *tls,
 613                   size_t at_most)
 614 {
 615   int read_result;
 616
 617   tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= at_most);
 618   read_result = tor_tls_read(tls, CHUNK_WRITE_PTR(chunk), at_most);
 619   if (read_result < 0)
 620     return read_result;
 621   buf->datalen += read_result;
 622   chunk->datalen += read_result;
 623   return read_result;
 624 }
 625
 626 /** Read from socket <b>s</b>, writing onto end of <b>buf</b>.  Read at most
 627  * <b>at_most</b> bytes, growing the buffer as necessary.  If recv() returns 0
 628  * (because of EOF), set *<b>reached_eof</b> to 1 and return 0. Return -1 on
 629  * error; else return the number of bytes read.
 630  */
 631 /* XXXX021 indicate "read blocked" somehow? */
 632 int
 633 read_to_buf(int s, size_t at_most, buf_t *buf, int *reached_eof,
 634             int *socket_error)
 635 {
 636   /* XXXX021 It's stupid to overload the return values for these functions:
 637    * "error status" and "number of bytes read" are not mutually exclusive.
 638    */
 639   int r = 0;
 640   size_t total_read = 0;
 641
 642   check();
 643   tor_assert(reached_eof);
 644   tor_assert(s >= 0);
 645
 646   while (at_most > total_read) {
 647     size_t readlen = at_most - total_read;
 648     chunk_t *chunk;
 649     if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
 650       chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
 651       if (readlen > chunk->memlen)
 652         readlen = chunk->memlen;
 653     } else {
 654       size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
 655       chunk = buf->tail;
 656       if (cap < readlen)
 657         readlen = cap;
 658     }
 659
 660     r = read_to_chunk(buf, chunk, s, readlen, reached_eof, socket_error);
 661     check();
 662     if (r < 0)
 663       return r; /* Error */
 664     tor_assert(total_read+r < INT_MAX);
 665     total_read += r;
 666     if ((size_t)r < readlen) { /* eof, block, or no more to read. */
 667       break;
 668     }
 669   }
 670   return (int)total_read;
 671 }
 672
 673 /** As read_to_buf, but reads from a TLS connection, and returns a TLS
 674  * status value rather than the number of bytes read.
 675  *
 676  * Using TLS on OR connections complicates matters in two ways.
 677  *
 678  * First, a TLS stream has its own read buffer independent of the
 679  * connection's read buffer.  (TLS needs to read an entire frame from
 680  * the network before it can decrypt any data.  Thus, trying to read 1
 681  * byte from TLS can require that several KB be read from the network
 682  * and decrypted.  The extra data is stored in TLS's decrypt buffer.)
 683  * Because the data hasn't been read by Tor (it's still inside the TLS),
 684  * this means that sometimes a connection "has stuff to read" even when
 685  * poll() didn't return POLLIN. The tor_tls_get_pending_bytes function is
 686  * used in connection.c to detect TLS objects with non-empty internal
 687  * buffers and read from them again.
 688  *
 689  * Second, the TLS stream's events do not correspond directly to network
 690  * events: sometimes, before a TLS stream can read, the network must be
 691  * ready to write -- or vice versa.
 692  */
 693 int
 694 read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf)
 695 {
 696   int r = 0;
 697   size_t total_read = 0;
 698   check();
 699
 700   while (at_most > total_read) {
 701     size_t readlen = at_most - total_read;
 702     chunk_t *chunk;
 703     if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
 704       chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
 705       if (readlen > chunk->memlen)
 706         readlen = chunk->memlen;
 707     } else {
 708       size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
 709       chunk = buf->tail;
 710       if (cap < readlen)
 711         readlen = cap;
 712     }
 713
 714     r = read_to_chunk_tls(buf, chunk, tls, readlen);
 715     check();
 716     if (r < 0)
 717       return r; /* Error */
 718     tor_assert(total_read+r < INT_MAX);
 719      total_read += r;
 720     if ((size_t)r < readlen) /* eof, block, or no more to read. */
 721       break;
 722   }
 723   return (int)total_read;
 724 }
 725
 726 /** Helper for flush_buf(): try to write <b>sz</b> bytes from chunk
 727  * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  On success, deduct
 728  * the bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
 729  * written on success, 0 on blocking, -1 on failure.
 730  */
 731 static INLINE int
 732 flush_chunk(int s, buf_t *buf, chunk_t *chunk, size_t sz,
 733             size_t *buf_flushlen)
 734 {
 735   ssize_t write_result;
 736 #if 0 && defined(HAVE_WRITEV) && !defined(WIN32)
 737   struct iovec iov[N_IOV];
 738   int i;
 739   size_t remaining = sz;
 740   for (i=0; chunk && i < N_IOV && remaining; ++i) {
 741     iov[i].iov_base = chunk->data;
 742     if (remaining > chunk->datalen)
 743       iov[i].iov_len = chunk->datalen;
 744     else
 745       iov[i].iov_len = remaining;
 746     remaining -= iov[i].iov_len;
 747     chunk = chunk->next;
 748   }
 749   write_result = writev(s, iov, i);
 750 #else
 751   if (sz > chunk->datalen)
 752     sz = chunk->datalen;
 753   write_result = tor_socket_send(s, chunk->data, sz, 0);
 754 #endif
 755
 756   if (write_result < 0) {
 757     int e = tor_socket_errno(s);
 758     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
 759 #ifdef MS_WINDOWS
 760       if (e == WSAENOBUFS)
 761         log_warn(LD_NET,"write() failed: WSAENOBUFS. Not enough ram?");
 762 #endif
 763       return -1;
 764     }
 765     log_debug(LD_NET,"write() would block, returning.");
 766     return 0;
 767   } else {
 768     *buf_flushlen -= write_result;
 769     buf_remove_from_front(buf, write_result);
 770     tor_assert(write_result < INT_MAX);
 771     return (int)write_result;
 772   }
 773 }
 774
 775 /** Helper for flush_buf_tls(): try to write <b>sz</b> bytes from chunk
 776  * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  (Tries to write
 777  * more if there is a forced pending write size.)  On success, deduct the
 778  * bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
 779  * written on success, and a TOR_TLS error code on failue or blocking.
 780  */
 781 static INLINE int
 782 flush_chunk_tls(tor_tls_t *tls, buf_t *buf, chunk_t *chunk,
 783                 size_t sz, size_t *buf_flushlen)
 784 {
 785   int r;
 786   size_t forced;
 787   char *data;
 788
 789   forced = tor_tls_get_forced_write_size(tls);
 790   if (forced > sz)
 791     sz = forced;
 792   if (chunk) {
 793     data = chunk->data;
 794     tor_assert(sz <= chunk->datalen);
 795   } else {
 796     data = NULL;
 797     tor_assert(sz == 0);
 798   }
 799   r = tor_tls_write(tls, data, sz);
 800   if (r < 0)
 801     return r;
 802   if (*buf_flushlen > (size_t)r)
 803     *buf_flushlen -= r;
 804   else
 805     *buf_flushlen = 0;
 806   buf_remove_from_front(buf, r);
 807   log_debug(LD_NET,"flushed %d bytes, %d ready to flush, %d remain.",
 808             r,(int)*buf_flushlen,(int)buf->datalen);
 809   return r;
 810 }
 811
 812 /** Write data from <b>buf</b> to the socket <b>s</b>.  Write at most
 813  * <b>sz</b> bytes, decrement *<b>buf_flushlen</b> by
 814  * the number of bytes actually written, and remove the written bytes
 815  * from the buffer.  Return the number of bytes written on success,
 816  * -1 on failure.  Return 0 if write() would block.
 817  */
 818 int
 819 flush_buf(int s, buf_t *buf, size_t sz, size_t *buf_flushlen)
 820 {
 821   /* XXXX021 It's stupid to overload the return values for these functions:
 822    * "error status" and "number of bytes flushed" are not mutually exclusive.
 823    */
 824   int r;
 825   size_t flushed = 0;
 826   tor_assert(buf_flushlen);
 827   tor_assert(s >= 0);
 828   tor_assert(*buf_flushlen <= buf->datalen);
 829   tor_assert(sz <= *buf_flushlen);
 830
 831   check();
 832   while (sz) {
 833     size_t flushlen0;
 834     tor_assert(buf->head);
 835     if (buf->head->datalen >= sz)
 836       flushlen0 = sz;
 837     else
 838       flushlen0 = buf->head->datalen;
 839
 840     r = flush_chunk(s, buf, buf->head, flushlen0, buf_flushlen);
 841     check();
 842     if (r < 0)
 843       return r;
 844     flushed += r;
 845     sz -= r;
 846     if (r == 0 || (size_t)r < flushlen0) /* can't flush any more now. */
 847       break;
 848   }
 849   tor_assert(flushed < INT_MAX);
 850   return (int)flushed;
 851 }
 852
 853 /** As flush_buf(), but writes data to a TLS connection.  Can write more than
 854  * <b>flushlen</b> bytes.
 855  */
 856 int
 857 flush_buf_tls(tor_tls_t *tls, buf_t *buf, size_t flushlen,
 858               size_t *buf_flushlen)
 859 {
 860   int r;
 861   size_t flushed = 0;
 862   ssize_t sz;
 863   tor_assert(buf_flushlen);
 864   tor_assert(*buf_flushlen <= buf->datalen);
 865   tor_assert(flushlen <= *buf_flushlen);
 866   sz = (ssize_t) flushlen;
 867
 868   /* we want to let tls write even if flushlen is zero, because it might
 869    * have a partial record pending */
 870   check_no_tls_errors();
 871
 872   check();
 873   do {
 874     size_t flushlen0;
 875     if (buf->head) {
 876       if ((ssize_t)buf->head->datalen >= sz)
 877         flushlen0 = sz;
 878       else
 879         flushlen0 = buf->head->datalen;
 880     } else {
 881       flushlen0 = 0;
 882     }
 883
 884     r = flush_chunk_tls(tls, buf, buf->head, flushlen0, buf_flushlen);
 885     check();
 886     if (r < 0)
 887       return r;
 888     flushed += r;
 889     sz -= r;
 890     if (r == 0) /* Can't flush any more now. */
 891       break;
 892   } while (sz > 0);
 893   tor_assert(flushed < INT_MAX);
 894   return (int)flushed;
 895 }
 896
 897 /** Append <b>string_len</b> bytes from <b>string</b> to the end of
 898  * <b>buf</b>.
 899  *
 900  * Return the new length of the buffer on success, -1 on failure.
 901  */
 902 int
 903 write_to_buf(const char *string, size_t string_len, buf_t *buf)
 904 {
 905   if (!string_len)
 906     return (int)buf->datalen;
 907   check();
 908
 909   while (string_len) {
 910     size_t copy;
 911     if (!buf->tail || !CHUNK_REMAINING_CAPACITY(buf->tail))
 912       buf_add_chunk_with_capacity(buf, string_len, 1);
 913
 914     copy = CHUNK_REMAINING_CAPACITY(buf->tail);
 915     if (copy > string_len)
 916       copy = string_len;
 917     memcpy(CHUNK_WRITE_PTR(buf->tail), string, copy);
 918     string_len -= copy;
 919     string += copy;
 920     buf->datalen += copy;
 921     buf->tail->datalen += copy;
 922   }
 923
 924   check();
 925   tor_assert(buf->datalen < INT_MAX);
 926   return (int)buf->datalen;
 927 }
 928
 929 /** Helper: copy the first <b>string_len</b> bytes from <b>buf</b>
 930  * onto <b>string</b>.
 931  */
 932 static INLINE void
 933 peek_from_buf(char *string, size_t string_len, const buf_t *buf)
 934 {
 935   chunk_t *chunk;
 936
 937   tor_assert(string);
 938   /* make sure we don't ask for too much */
 939   tor_assert(string_len <= buf->datalen);
 940   /* assert_buf_ok(buf); */
 941
 942   chunk = buf->head;
 943   while (string_len) {
 944     size_t copy = string_len;
 945     tor_assert(chunk);
 946     if (chunk->datalen < copy)
 947       copy = chunk->datalen;
 948     memcpy(string, chunk->data, copy);
 949     string_len -= copy;
 950     string += copy;
 951     chunk = chunk->next;
 952   }
 953 }
 954
 955 /** Remove <b>string_len</b> bytes from the front of <b>buf</b>, and store
 956  * them into <b>string</b>.  Return the new buffer size.  <b>string_len</b>
 957  * must be \<= the number of bytes on the buffer.
 958  */
 959 int
 960 fetch_from_buf(char *string, size_t string_len, buf_t *buf)
 961 {
 962   /* There must be string_len bytes in buf; write them onto string,
 963    * then memmove buf back (that is, remove them from buf).
 964    *
 965    * Return the number of bytes still on the buffer. */
 966
 967   check();
 968   peek_from_buf(string, string_len, buf);
 969   buf_remove_from_front(buf, string_len);
 970   check();
 971   tor_assert(buf->datalen < INT_MAX);
 972   return (int)buf->datalen;
 973 }
 974
 975 /** Check <b>buf</b> for a variable-length cell according to the rules of link
 976  * protocol version <b>linkproto</b>.  If one is found, pull it off the buffer
 977  * and assign a newly allocated var_cell_t to *<b>out</b>, and return 1.
 978  * Return 0 if whatever is on the start of buf_t is not a variable-length
 979  * cell.  Return 1 and set *<b>out</b> to NULL if there seems to be the start
 980  * of a variable-length cell on <b>buf</b>, but the whole thing isn't there
 981  * yet. */
 982 int
 983 fetch_var_cell_from_buf(buf_t *buf, var_cell_t **out, int linkproto)
 984 {
 985   char hdr[VAR_CELL_HEADER_SIZE];
 986   var_cell_t *result;
 987   uint8_t command;
 988   uint16_t length;
 989   /* If linkproto is unknown (0) or v2 (2), variable-length cells work as
 990    * implemented here. If it's 1, there are no variable-length cells.  Tor
 991    * does not support other versions right now, and so can't negotiate them.
 992    */
 993   if (linkproto == 1)
 994     return 0;
 995   check();
 996   *out = NULL;
 997   if (buf->datalen < VAR_CELL_HEADER_SIZE)
 998     return 0;
 999   peek_from_buf(hdr, sizeof(hdr), buf);
1000
1001   command = get_uint8(hdr+2);
1002   if (!(CELL_COMMAND_IS_VAR_LENGTH(command)))
1003     return 0;
1004
1005   length = ntohs(get_uint16(hdr+3));
1006   if (buf->datalen < (size_t)(VAR_CELL_HEADER_SIZE+length))
1007     return 1;
1008   result = var_cell_new(length);
1009   result->command = command;
1010   result->circ_id = ntohs(get_uint16(hdr));
1011
1012   buf_remove_from_front(buf, VAR_CELL_HEADER_SIZE);
1013   peek_from_buf(result->payload, length, buf);
1014   buf_remove_from_front(buf, length);
1015   check();
1016
1017   *out = result;
1018   return 1;
1019 }
1020
1021 /** Move up to *<b>buf_flushlen</b> bytes from <b>buf_in</b> to
1022  * <b>buf_out</b>, and modify *<b>buf_flushlen</b> appropriately.
1023  * Return the number of bytes actually copied.
1024  */
1025 int
1026 move_buf_to_buf(buf_t *buf_out, buf_t *buf_in, size_t *buf_flushlen)
1027 {
1028   /* XXXX we can do way better here, but this doesn't turn up in any
1029    * profiles. */
1030   char b[4096];
1031   size_t cp, len;
1032   len = *buf_flushlen;
1033   if (len > buf_in->datalen)
1034     len = buf_in->datalen;
1035
1036   cp = len; /* Remember the number of bytes we intend to copy. */
1037   tor_assert(cp < INT_MAX);
1038   while (len) {
1039     /* This isn't the most efficient implementation one could imagine, since
1040      * it does two copies instead of 1, but I kinda doubt that this will be
1041      * critical path. */
1042     size_t n = len > sizeof(b) ? sizeof(b) : len;
1043     fetch_from_buf(b, n, buf_in);
1044     write_to_buf(b, n, buf_out);
1045     len -= n;
1046   }
1047   *buf_flushlen -= cp;
1048   return (int)cp;
1049 }
1050
1051 /** Internal structure: represents a position in a buffer. */
1052 typedef struct buf_pos_t {
1053   const chunk_t *chunk; /**< Which chunk are we pointing to? */
1054   int pos;/**< Which character inside the chunk's data are we pointing to? */
1055   size_t chunk_pos; /**< Total length of all previous chunks. */
1056 } buf_pos_t;
1057
1058 /** Initialize <b>out</b> to point to the first character of <b>buf</b>.*/
1059 static void
1060 buf_pos_init(const buf_t *buf, buf_pos_t *out)
1061 {
1062   out->chunk = buf->head;
1063   out->pos = 0;
1064   out->chunk_pos = 0;
1065 }
1066
1067 /** Advance <b>out</b> to the first appearance of <b>ch</b> at the current
1068  * position of <b>out</b>, or later.  Return -1 if no instances are found;
1069  * otherwise returns the absolute position of the character. */
1070 static off_t
1071 buf_find_pos_of_char(char ch, buf_pos_t *out)
1072 {
1073   const chunk_t *chunk;
1074   int pos;
1075   tor_assert(out);
1076   if (out->chunk) {
1077     if (out->chunk->datalen) {
1078       tor_assert(out->pos < (off_t)out->chunk->datalen);
1079     } else {
1080       tor_assert(out->pos == 0);
1081     }
1082   }
1083   pos = out->pos;
1084   for (chunk = out->chunk; chunk; chunk = chunk->next) {
1085     char *cp = memchr(chunk->data+pos, ch, chunk->datalen - pos);
1086     if (cp) {
1087       out->chunk = chunk;
1088       tor_assert(cp - chunk->data < INT_MAX);
1089       out->pos = (int)(cp - chunk->data);
1090       return out->chunk_pos + out->pos;
1091     } else {
1092       out->chunk_pos += chunk->datalen;
1093       pos = 0;
1094     }
1095   }
1096   return -1;
1097 }
1098
1099 /** Advance <b>pos</b> by a single character, if there are any more characters
1100  * in the buffer.  Returns 0 on sucess, -1 on failure. */
1101 static INLINE int
1102 buf_pos_inc(buf_pos_t *pos)
1103 {
1104   ++pos->pos;
1105   if (pos->pos == (off_t)pos->chunk->datalen) {
1106     if (!pos->chunk->next)
1107       return -1;
1108     pos->chunk_pos += pos->chunk->datalen;
1109     pos->chunk = pos->chunk->next;
1110     pos->pos = 0;
1111   }
1112   return 0;
1113 }
1114
1115 /** Return true iff the <b>n</b>-character string in <b>s</b> appears
1116  * (verbatim) at <b>pos</b>. */
1117 static int
1118 buf_matches_at_pos(const buf_pos_t *pos, const char *s, size_t n)
1119 {
1120   buf_pos_t p;
1121   if (!n)
1122     return 1;
1123
1124   memcpy(&p, pos, sizeof(p));
1125
1126   while (1) {
1127     char ch = p.chunk->data[p.pos];
1128     if (ch != *s)
1129       return 0;
1130     ++s;
1131     /* If we're out of characters that don't match, we match.  Check this
1132      * _before_ we test incrementing pos, in case we're at the end of the
1133      * string. */
1134     if (--n == 0)
1135       return 1;
1136     if (buf_pos_inc(&p)<0)
1137       return 0;
1138   }
1139 }
1140
1141 /** Return the first position in <b>buf</b> at which the <b>n</b>-character
1142  * string <b>s</b> occurs, or -1 if it does not occur. */
1143 /*private*/ int
1144 buf_find_string_offset(const buf_t *buf, const char *s, size_t n)
1145 {
1146   buf_pos_t pos;
1147   buf_pos_init(buf, &pos);
1148   while (buf_find_pos_of_char(*s, &pos) >= 0) {
1149     if (buf_matches_at_pos(&pos, s, n)) {
1150       tor_assert(pos.chunk_pos + pos.pos < INT_MAX);
1151       return (int)(pos.chunk_pos + pos.pos);
1152     } else {
1153       if (buf_pos_inc(&pos)<0)
1154         return -1;
1155     }
1156   }
1157   return -1;
1158 }
1159
1160 /** There is a (possibly incomplete) http statement on <b>buf</b>, of the
1161  * form "\%s\\r\\n\\r\\n\%s", headers, body. (body may contain nuls.)
1162  * If a) the headers include a Content-Length field and all bytes in
1163  * the body are present, or b) there's no Content-Length field and
1164  * all headers are present, then:
1165  *
1166  *  - strdup headers into <b>*headers_out</b>, and nul-terminate it.
1167  *  - memdup body into <b>*body_out</b>, and nul-terminate it.
1168  *  - Then remove them from <b>buf</b>, and return 1.
1169  *
1170  *  - If headers or body is NULL, discard that part of the buf.
1171  *  - If a headers or body doesn't fit in the arg, return -1.
1172  *  (We ensure that the headers or body don't exceed max len,
1173  *   _even if_ we're planning to discard them.)
1174  *  - If force_complete is true, then succeed even if not all of the
1175  *    content has arrived.
1176  *
1177  * Else, change nothing and return 0.
1178  */
1179 int
1180 fetch_from_buf_http(buf_t *buf,
1181                     char **headers_out, size_t max_headerlen,
1182                     char **body_out, size_t *body_used, size_t max_bodylen,
1183                     int force_complete)
1184 {
1185   char *headers, *p;
1186   size_t headerlen, bodylen, contentlen;
1187   int crlf_offset;
1188
1189   check();
1190   if (!buf->head)
1191     return 0;
1192
1193   crlf_offset = buf_find_string_offset(buf, "\r\n\r\n", 4);
1194   if (crlf_offset > (int)max_headerlen ||
1195       (crlf_offset < 0 && buf->datalen > max_headerlen)) {
1196     log_debug(LD_HTTP,"headers too long.");
1197     return -1;
1198   } else if (crlf_offset < 0) {
1199     log_debug(LD_HTTP,"headers not all here yet.");
1200     return 0;
1201   }
1202   /* Okay, we have a full header.  Make sure it all appears in the first
1203    * chunk. */
1204   if ((int)buf->head->datalen < crlf_offset + 4)
1205     buf_pullup(buf, crlf_offset+4, 0);
1206   headerlen = crlf_offset + 4;
1207
1208   headers = buf->head->data;
1209   bodylen = buf->datalen - headerlen;
1210   log_debug(LD_HTTP,"headerlen %d, bodylen %d.", (int)headerlen, (int)bodylen);
1211
1212   if (max_headerlen <= headerlen) {
1213     log_warn(LD_HTTP,"headerlen %d larger than %d. Failing.",
1214              (int)headerlen, (int)max_headerlen-1);
1215     return -1;
1216   }
1217   if (max_bodylen <= bodylen) {
1218     log_warn(LD_HTTP,"bodylen %d larger than %d. Failing.",
1219              (int)bodylen, (int)max_bodylen-1);
1220     return -1;
1221   }
1222
1223 #define CONTENT_LENGTH "\r\nContent-Length: "
1224   p = (char*) tor_memstr(headers, headerlen, CONTENT_LENGTH);
1225   if (p) {
1226     int i;
1227     i = atoi(p+strlen(CONTENT_LENGTH));
1228     if (i < 0) {
1229       log_warn(LD_PROTOCOL, "Content-Length is less than zero; it looks like "
1230                "someone is trying to crash us.");
1231       return -1;
1232     }
1233     contentlen = i;
1234     /* if content-length is malformed, then our body length is 0. fine. */
1235     log_debug(LD_HTTP,"Got a contentlen of %d.",(int)contentlen);
1236     if (bodylen < contentlen) {
1237       if (!force_complete) {
1238         log_debug(LD_HTTP,"body not all here yet.");
1239         return 0; /* not all there yet */
1240       }
1241     }
1242     if (bodylen > contentlen) {
1243       bodylen = contentlen;
1244       log_debug(LD_HTTP,"bodylen reduced to %d.",(int)bodylen);
1245     }
1246   }
1247   /* all happy. copy into the appropriate places, and return 1 */
1248   if (headers_out) {
1249     *headers_out = tor_malloc(headerlen+1);
1250     fetch_from_buf(*headers_out, headerlen, buf);
1251     (*headers_out)[headerlen] = 0; /* nul terminate it */
1252   }
1253   if (body_out) {
1254     tor_assert(body_used);
1255     *body_used = bodylen;
1256     *body_out = tor_malloc(bodylen+1);
1257     fetch_from_buf(*body_out, bodylen, buf);
1258     (*body_out)[bodylen] = 0; /* nul terminate it */
1259   }
1260   check();
1261   return 1;
1262 }
1263
1264 /** There is a (possibly incomplete) socks handshake on <b>buf</b>, of one
1265  * of the forms
1266  *  - socks4: "socksheader username\\0"
1267  *  - socks4a: "socksheader username\\0 destaddr\\0"
1268  *  - socks5 phase one: "version #methods methods"
1269  *  - socks5 phase two: "version command 0 addresstype..."
1270  * If it's a complete and valid handshake, and destaddr fits in
1271  *   MAX_SOCKS_ADDR_LEN bytes, then pull the handshake off the buf,
1272  *   assign to <b>req</b>, and return 1.
1273  *
1274  * If it's invalid or too big, return -1.
1275  *
1276  * Else it's not all there yet, leave buf alone and return 0.
1277  *
1278  * If you want to specify the socks reply, write it into <b>req->reply</b>
1279  *   and set <b>req->replylen</b>, else leave <b>req->replylen</b> alone.
1280  *
1281  * If <b>log_sockstype</b> is non-zero, then do a notice-level log of whether
1282  * the connection is possibly leaking DNS requests locally or not.
1283  *
1284  * If <b>safe_socks</b> is true, then reject unsafe socks protocols.
1285  *
1286  * If returning 0 or -1, <b>req->address</b> and <b>req->port</b> are
1287  * undefined.
1288  */
1289 int
1290 fetch_from_buf_socks(buf_t *buf, socks_request_t *req,
1291                      int log_sockstype, int safe_socks)
1292 {
1293   unsigned int len;
1294   char tmpbuf[TOR_ADDR_BUF_LEN+1];
1295   tor_addr_t destaddr;
1296   uint32_t destip;
1297   uint8_t socksver;
1298   enum {socks4, socks4a} socks4_prot = socks4a;
1299   char *next, *startaddr;
1300   struct in_addr in;
1301
1302   /* If the user connects with socks4 or the wrong variant of socks5,
1303    * then log a warning to let him know that it might be unwise. */
1304   static int have_warned_about_unsafe_socks = 0;
1305
1306   if (buf->datalen < 2) /* version and another byte */
1307     return 0;
1308
1309   buf_pullup(buf, 128, 0);
1310   tor_assert(buf->head && buf->head->datalen >= 2);
1311
1312   socksver = *buf->head->data;
1313
1314   switch (socksver) { /* which version of socks? */
1315
1316     case 5: /* socks5 */
1317
1318       if (req->socks_version != 5) { /* we need to negotiate a method */
1319         unsigned char nummethods = (unsigned char)*(buf->head->data+1);
1320         tor_assert(!req->socks_version);
1321         if (buf->datalen < 2u+nummethods)
1322           return 0;
1323         buf_pullup(buf, 2u+nummethods, 0);
1324         if (!nummethods || !memchr(buf->head->data+2, 0, nummethods)) {
1325           log_warn(LD_APP,
1326                    "socks5: offered methods don't include 'no auth'. "
1327                    "Rejecting.");
1328           req->replylen = 2; /* 2 bytes of response */
1329           req->reply[0] = 5;
1330           req->reply[1] = '\xFF'; /* reject all methods */
1331           return -1;
1332         }
1333         /* remove packet from buf. also remove any other extraneous
1334          * bytes, to support broken socks clients. */
1335         buf_clear(buf);
1336
1337         req->replylen = 2; /* 2 bytes of response */
1338         req->reply[0] = 5; /* socks5 reply */
1339         req->reply[1] = 0; /* tell client to use "none" auth method */
1340         req->socks_version = 5; /* remember we've already negotiated auth */
1341         log_debug(LD_APP,"socks5: accepted method 0");
1342         return 0;
1343       }
1344       /* we know the method; read in the request */
1345       log_debug(LD_APP,"socks5: checking request");
1346       if (buf->datalen < 8) /* basic info plus >=2 for addr plus 2 for port */
1347         return 0; /* not yet */
1348       tor_assert(buf->head->datalen >= 8);
1349       req->command = (unsigned char) *(buf->head->data+1);
1350       if (req->command != SOCKS_COMMAND_CONNECT &&
1351           req->command != SOCKS_COMMAND_RESOLVE &&
1352           req->command != SOCKS_COMMAND_RESOLVE_PTR) {
1353         /* not a connect or resolve or a resolve_ptr? we don't support it. */
1354         log_warn(LD_APP,"socks5: command %d not recognized. Rejecting.",
1355                  req->command);
1356         return -1;
1357       }
1358       switch (*(buf->head->data+3)) { /* address type */
1359         case 1: /* IPv4 address */
1360         case 4: /* IPv6 address */ {
1361           const int is_v6 = *(buf->head->data+3) == 4;
1362           const unsigned addrlen = is_v6 ? 16 : 4;
1363           log_debug(LD_APP,"socks5: ipv4 address type");
1364           if (buf->datalen < 6+addrlen) /* ip/port there? */
1365             return 0; /* not yet */
1366
1367           if (is_v6)
1368             tor_addr_from_ipv6_bytes(&destaddr, buf->head->data+4);
1369           else
1370             tor_addr_from_ipv4n(&destaddr, get_uint32(buf->head->data+4));
1371
1372           tor_addr_to_str(tmpbuf, &destaddr, sizeof(tmpbuf), 1);
1373
1374           if (strlen(tmpbuf)+1 > MAX_SOCKS_ADDR_LEN) {
1375             log_warn(LD_APP,
1376                      "socks5 IP takes %d bytes, which doesn't fit in %d. "
1377                      "Rejecting.",
1378                      (int)strlen(tmpbuf)+1,(int)MAX_SOCKS_ADDR_LEN);
1379             return -1;
1380           }
1381           strlcpy(req->address,tmpbuf,sizeof(req->address));
1382           req->port = ntohs(get_uint16(buf->head->data+4+addrlen));
1383           buf_remove_from_front(buf, 6+addrlen);
1384           if (req->command != SOCKS_COMMAND_RESOLVE_PTR &&
1385               !addressmap_have_mapping(req->address,0) &&
1386               !have_warned_about_unsafe_socks) {
1387             log_warn(LD_APP,
1388                 "Your application (using socks5 to port %d) is giving "
1389                 "Tor only an IP address. Applications that do DNS resolves "
1390                 "themselves may leak information. Consider using Socks4A "
1391                 "(e.g. via privoxy or socat) instead. For more information, "
1392                 "please see http://wiki.noreply.org/noreply/TheOnionRouter/"
1393                 "TorFAQ#SOCKSAndDNS.%s", req->port,
1394                 safe_socks ? " Rejecting." : "");
1395             /*have_warned_about_unsafe_socks = 1;*/
1396                                       /*(for now, warn every time)*/
1397             control_event_client_status(LOG_WARN,
1398                           "DANGEROUS_SOCKS PROTOCOL=SOCKS5 ADDRESS=%s:%d",
1399                           req->address, req->port);
1400             if (safe_socks)
1401               return -1;
1402           }
1403           return 1;
1404         }
1405         case 3: /* fqdn */
1406           log_debug(LD_APP,"socks5: fqdn address type");
1407           if (req->command == SOCKS_COMMAND_RESOLVE_PTR) {
1408             log_warn(LD_APP, "socks5 received RESOLVE_PTR command with "
1409                      "hostname type. Rejecting.");
1410             return -1;
1411           }
1412           len = (unsigned char)*(buf->head->data+4);
1413           if (buf->datalen < 7+len) /* addr/port there? */
1414             return 0; /* not yet */
1415           buf_pullup(buf, 7+len, 0);
1416           tor_assert(buf->head->datalen >= 7+len);
1417           if (len+1 > MAX_SOCKS_ADDR_LEN) {
1418             log_warn(LD_APP,
1419                      "socks5 hostname is %d bytes, which doesn't fit in "
1420                      "%d. Rejecting.", len+1,MAX_SOCKS_ADDR_LEN);
1421             return -1;
1422           }
1423           memcpy(req->address,buf->head->data+5,len);
1424           req->address[len] = 0;
1425           req->port = ntohs(get_uint16(buf->head->data+5+len));
1426           buf_remove_from_front(buf, 5+len+2);
1427           if (!tor_strisprint(req->address) || strchr(req->address,'\"')) {
1428             log_warn(LD_PROTOCOL,
1429                      "Your application (using socks5 to port %d) gave Tor "
1430                      "a malformed hostname: %s. Rejecting the connection.",
1431                      req->port, escaped(req->address));
1432             return -1;
1433           }
1434           if (log_sockstype)
1435             log_notice(LD_APP,
1436                   "Your application (using socks5 to port %d) gave "
1437                   "Tor a hostname, which means Tor will do the DNS resolve "
1438                   "for you. This is good.", req->port);
1439           return 1;
1440         default: /* unsupported */
1441           log_warn(LD_APP,"socks5: unsupported address type %d. Rejecting.",
1442                    (int) *(buf->head->data+3));
1443           return -1;
1444       }
1445       tor_assert(0);
1446     case 4: /* socks4 */
1447       /* http://archive.socks.permeo.com/protocol/socks4.protocol */
1448       /* http://archive.socks.permeo.com/protocol/socks4a.protocol */
1449
1450       req->socks_version = 4;
1451       if (buf->datalen < SOCKS4_NETWORK_LEN) /* basic info available? */
1452         return 0; /* not yet */
1453       buf_pullup(buf, 1280, 0);
1454       req->command = (unsigned char) *(buf->head->data+1);
1455       if (req->command != SOCKS_COMMAND_CONNECT &&
1456           req->command != SOCKS_COMMAND_RESOLVE) {
1457         /* not a connect or resolve? we don't support it. (No resolve_ptr with
1458          * socks4.) */
1459         log_warn(LD_APP,"socks4: command %d not recognized. Rejecting.",
1460                  req->command);
1461         return -1;
1462       }
1463
1464       req->port = ntohs(*(uint16_t*)(buf->head->data+2));
1465       destip = ntohl(*(uint32_t*)(buf->head->data+4));
1466       if ((!req->port && req->command!=SOCKS_COMMAND_RESOLVE) || !destip) {
1467         log_warn(LD_APP,"socks4: Port or DestIP is zero. Rejecting.");
1468         return -1;
1469       }
1470       if (destip >> 8) {
1471         log_debug(LD_APP,"socks4: destip not in form 0.0.0.x.");
1472         in.s_addr = htonl(destip);
1473         tor_inet_ntoa(&in,tmpbuf,sizeof(tmpbuf));
1474         if (strlen(tmpbuf)+1 > MAX_SOCKS_ADDR_LEN) {
1475           log_debug(LD_APP,"socks4 addr (%d bytes) too long. Rejecting.",
1476                     (int)strlen(tmpbuf));
1477           return -1;
1478         }
1479         log_debug(LD_APP,
1480                   "socks4: successfully read destip (%s)", safe_str(tmpbuf));
1481         socks4_prot = socks4;
1482       }
1483
1484       next = memchr(buf->head->data+SOCKS4_NETWORK_LEN, 0,
1485                     buf->head->datalen-SOCKS4_NETWORK_LEN);
1486       if (!next) {
1487         if (buf->head->datalen >= 1024) {
1488           log_debug(LD_APP, "Socks4 user name too long; rejecting.");
1489           return -1;
1490         }
1491         log_debug(LD_APP,"socks4: Username not here yet.");
1492         return 0;
1493       }
1494       tor_assert(next < CHUNK_WRITE_PTR(buf->head));
1495
1496       startaddr = NULL;
1497       if (socks4_prot != socks4a &&
1498           !addressmap_have_mapping(tmpbuf,0) &&
1499           !have_warned_about_unsafe_socks) {
1500         log_warn(LD_APP,
1501                  "Your application (using socks4 to port %d) is giving Tor "
1502                  "only an IP address. Applications that do DNS resolves "
1503                  "themselves may leak information. Consider using Socks4A "
1504                  "(e.g. via privoxy or socat) instead. For more information, "
1505                  "please see http://wiki.noreply.org/noreply/TheOnionRouter/"
1506                  "TorFAQ#SOCKSAndDNS.%s", req->port,
1507                  safe_socks ? " Rejecting." : "");
1508         /*have_warned_about_unsafe_socks = 1;*/  /*(for now, warn every time)*/
1509         control_event_client_status(LOG_WARN,
1510                         "DANGEROUS_SOCKS PROTOCOL=SOCKS4 ADDRESS=%s:%d",
1511                         tmpbuf, req->port);
1512         if (safe_socks)
1513           return -1;
1514       }
1515       if (socks4_prot == socks4a) {
1516         if (next+1 == CHUNK_WRITE_PTR(buf->head)) {
1517           log_debug(LD_APP,"socks4: No part of destaddr here yet.");
1518           return 0;
1519         }
1520         startaddr = next+1;
1521         next = memchr(startaddr, 0, CHUNK_WRITE_PTR(buf->head)-startaddr);
1522         if (!next) {
1523           if (buf->head->datalen >= 1024) {
1524             log_debug(LD_APP,"socks4: Destaddr too long.");
1525             return -1;
1526           }
1527           log_debug(LD_APP,"socks4: Destaddr not all here yet.");
1528           return 0;
1529         }
1530         if (MAX_SOCKS_ADDR_LEN <= next-startaddr) {
1531           log_warn(LD_APP,"socks4: Destaddr too long. Rejecting.");
1532           return -1;
1533         }
1534         // tor_assert(next < buf->cur+buf->datalen);
1535
1536         if (log_sockstype)
1537           log_notice(LD_APP,
1538                      "Your application (using socks4a to port %d) gave "
1539                      "Tor a hostname, which means Tor will do the DNS resolve "
1540                      "for you. This is good.", req->port);
1541       }
1542       log_debug(LD_APP,"socks4: Everything is here. Success.");
1543       strlcpy(req->address, startaddr ? startaddr : tmpbuf,
1544               sizeof(req->address));
1545       if (!tor_strisprint(req->address) || strchr(req->address,'\"')) {
1546         log_warn(LD_PROTOCOL,
1547                  "Your application (using socks4 to port %d) gave Tor "
1548                  "a malformed hostname: %s. Rejecting the connection.",
1549                  req->port, escaped(req->address));
1550         return -1;
1551       }
1552       /* next points to the final \0 on inbuf */
1553       buf_remove_from_front(buf, next - buf->head->data + 1);
1554       return 1;
1555
1556     case 'G': /* get */
1557     case 'H': /* head */
1558     case 'P': /* put/post */
1559     case 'C': /* connect */
1560       strlcpy(req->reply,
1561 "HTTP/1.0 501 Tor is not an HTTP Proxy\r\n"
1562 "Content-Type: text/html; charset=iso-8859-1\r\n\r\n"
1563 "<html>\n"
1564 "<head>\n"
1565 "<title>Tor is not an HTTP Proxy</title>\n"
1566 "</head>\n"
1567 "<body>\n"
1568 "<h1>Tor is not an HTTP Proxy</h1>\n"
1569 "<p>\n"
1570 "It appears you have configured your web browser to use Tor as an HTTP proxy."
1571 "\n"
1572 "This is not correct: Tor is a SOCKS proxy, not an HTTP proxy.\n"
1573 "Please configure your client accordingly.\n"
1574 "</p>\n"
1575 "<p>\n"
1576 "See <a href=\"https://www.torproject.org/documentation.html\">"
1577            "https://www.torproject.org/documentation.html</a> for more "
1578            "information.\n"
1579 "<!-- Plus this comment, to make the body response more than 512 bytes, so "
1580 "     IE will be willing to display it. Comment comment comment comment "
1581 "     comment comment comment comment comment comment comment comment.-->\n"
1582 "</p>\n"
1583 "</body>\n"
1584 "</html>\n"
1585              , MAX_SOCKS_REPLY_LEN);
1586       req->replylen = strlen(req->reply)+1;
1587       /* fall through */
1588     default: /* version is not socks4 or socks5 */
1589       log_warn(LD_APP,
1590                "Socks version %d not recognized. (Tor is not an http proxy.)",
1591                *(buf->head->data));
1592       {
1593         char *tmp = tor_strndup(buf->head->data, 8); /*XXXX what if longer?*/
1594         control_event_client_status(LOG_WARN,
1595                                     "SOCKS_UNKNOWN_PROTOCOL DATA=\"%s\"",
1596                                     escaped(tmp));
1597         tor_free(tmp);
1598       }
1599       return -1;
1600   }
1601 }
1602
1603 /** Return 1 iff buf looks more like it has an (obsolete) v0 controller
1604  * command on it than any valid v1 controller command. */
1605 int
1606 peek_buf_has_control0_command(buf_t *buf)
1607 {
1608   if (buf->datalen >= 4) {
1609     char header[4];
1610     uint16_t cmd;
1611     peek_from_buf(header, sizeof(header), buf);
1612     cmd = ntohs(get_uint16(header+2));
1613     if (cmd <= 0x14)
1614       return 1; /* This is definitely not a v1 control command. */
1615   }
1616   return 0;
1617 }
1618
1619 /** Return the index within <b>buf</b> at which <b>ch</b> first appears,
1620  * or -1 if <b>ch</b> does not appear on buf. */
1621 static off_t
1622 buf_find_offset_of_char(buf_t *buf, char ch)
1623 {
1624   chunk_t *chunk;
1625   off_t offset = 0;
1626   for (chunk = buf->head; chunk; chunk = chunk->next) {
1627     char *cp = memchr(chunk->data, ch, chunk->datalen);
1628     if (cp)
1629       return offset + (cp - chunk->data);
1630     else
1631       offset += chunk->datalen;
1632   }
1633   return -1;
1634 }
1635
1636 /** Try to read a single LF-terminated line from <b>buf</b>, and write it,
1637  * NUL-terminated, into the *<b>data_len</b> byte buffer at <b>data_out</b>.
1638  * Set *<b>data_len</b> to the number of bytes in the line, not counting the
1639  * terminating NUL.  Return 1 if we read a whole line, return 0 if we don't
1640  * have a whole line yet, and return -1 if the line length exceeds
1641  * *<b>data_len</b>.
1642  */
1643 int
1644 fetch_from_buf_line(buf_t *buf, char *data_out, size_t *data_len)
1645 {
1646   size_t sz;
1647   off_t offset;
1648
1649   if (!buf->head)
1650     return 0;
1651
1652   offset = buf_find_offset_of_char(buf, '\n');
1653   if (offset < 0)
1654     return 0;
1655   sz = (size_t) offset;
1656   if (sz+2 > *data_len) {
1657     *data_len = sz + 2;
1658     return -1;
1659   }
1660   fetch_from_buf(data_out, sz+1, buf);
1661   data_out[sz+1] = '\0';
1662   *data_len = sz+1;
1663   return 1;
1664 }
1665
1666 /** Compress on uncompress the <b>data_len</b> bytes in <b>data</b> using the
1667  * zlib state <b>state</b>, appending the result to <b>buf</b>.  If
1668  * <b>done</b> is true, flush the data in the state and finish the
1669  * compression/uncompression.  Return -1 on failure, 0 on success. */
1670 int
1671 write_to_buf_zlib(buf_t *buf, tor_zlib_state_t *state,
1672                   const char *data, size_t data_len,
1673                   int done)
1674 {
1675   char *next;
1676   size_t old_avail, avail;
1677   int over = 0;
1678   do {
1679     int need_new_chunk = 0;
1680     if (!buf->tail || ! CHUNK_REMAINING_CAPACITY(buf->tail)) {
1681       size_t cap = data_len / 4;
1682       buf_add_chunk_with_capacity(buf, cap, 1);
1683     }
1684     next = CHUNK_WRITE_PTR(buf->tail);
1685     avail = old_avail = CHUNK_REMAINING_CAPACITY(buf->tail);
1686     switch (tor_zlib_process(state, &next, &avail, &data, &data_len, done)) {
1687       case TOR_ZLIB_DONE:
1688         over = 1;
1689         break;
1690       case TOR_ZLIB_ERR:
1691         return -1;
1692       case TOR_ZLIB_OK:
1693         if (data_len == 0)
1694           over = 1;
1695         break;
1696       case TOR_ZLIB_BUF_FULL:
1697         if (avail) {
1698           /* Zlib says we need more room (ZLIB_BUF_FULL).  Start a new chunk
1699            * automatically, whether were going to or not. */
1700           need_new_chunk = 1;
1701         }
1702         break;
1703     }
1704     buf->datalen += old_avail - avail;
1705     buf->tail->datalen += old_avail - avail;
1706     if (need_new_chunk) {
1707       buf_add_chunk_with_capacity(buf, data_len/4, 1);
1708     }
1709
1710   } while (!over);
1711   check();
1712   return 0;
1713 }
1714
1715 /** Log an error and exit if <b>buf</b> is corrupted.
1716  */
1717 void
1718 assert_buf_ok(buf_t *buf)
1719 {
1720   tor_assert(buf);
1721   tor_assert(buf->magic == BUFFER_MAGIC);
1722
1723   if (! buf->head) {
1724     tor_assert(!buf->tail);
1725     tor_assert(buf->datalen == 0);
1726   } else {
1727     chunk_t *ch;
1728     size_t total = 0;
1729     tor_assert(buf->tail);
1730     for (ch = buf->head; ch; ch = ch->next) {
1731       total += ch->datalen;
1732       tor_assert(ch->datalen <= ch->memlen);
1733       tor_assert(ch->data >= &ch->mem[0]);
1734       tor_assert(ch->data < &ch->mem[0]+ch->memlen);
1735       tor_assert(ch->data+ch->datalen <= &ch->mem[0] + ch->memlen);
1736       if (!ch->next)
1737         tor_assert(ch == buf->tail);
1738     }
1739     tor_assert(buf->datalen == total);
1740   }
1741 }
1742
1743 #ifdef ENABLE_BUF_FREELISTS
1744 /** Log an error and exit if <b>fl</b> is corrupted.
1745  */
1746 static void
1747 assert_freelist_ok(chunk_freelist_t *fl)
1748 {
1749   chunk_t *ch;
1750   int n;
1751   tor_assert(fl->alloc_size > 0);
1752   n = 0;
1753   for (ch = fl->head; ch; ch = ch->next) {
1754     tor_assert(CHUNK_ALLOC_SIZE(ch->memlen) == fl->alloc_size);
1755     ++n;
1756   }
1757   tor_assert(n == fl->cur_length);
1758   tor_assert(n >= fl->lowest_length);
1759   tor_assert(n <= fl->max_length);
1760 }
1761 #endif
1762