src/cache/cache.c

   1 /* Cache subsystem */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 #include <string.h>
   8
   9 #include "elinks.h"
  10
  11 #include "bfu/dialog.h"
  12 #include "cache/cache.h"
  13 #include "cache/dialogs.h"
  14 #include "config/options.h"
  15 #include "main/main.h"
  16 #include "main/object.h"
  17 #include "network/connection.h"
  18 #include "protocol/protocol.h"
  19 #include "protocol/proxy.h"
  20 #include "protocol/uri.h"
  21 #include "util/error.h"
  22 #include "util/memory.h"
  23 #include "util/string.h"
  24
  25 /* The list of cache entries */
  26 static INIT_LIST_HEAD(cache_entries);
  27
  28 static unsigned longlong cache_size;
  29 static int id_counter = 1;
  30
  31 static void truncate_entry(struct cache_entry *cached, off_t offset, int final);
  32
  33 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
  34 #if 0
  35 #define DEBUG_CACHE
  36 #endif
  37
  38 #ifdef DEBUG_CACHE
  39
  40 #define dump_frag(frag, count) \
  41 do { \
  42         DBG(" [%d] f=%p offset=%" OFF_T_FORMAT " length=%" OFF_T_FORMAT \
  43             " real_length=%" OFF_T_FORMAT, \
  44             count, frag, frag->offset, frag->length, frag->real_length); \
  45 } while (0)
  46
  47 #define dump_frags(entry, comment) \
  48 do { \
  49         struct fragment *frag; \
  50         int count = 0;  \
  51  \
  52         DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
  53         foreach (frag, entry->frag) \
  54                 dump_frag(frag, ++count); \
  55 } while (0)
  56
  57 #else
  58 #define dump_frags(entry, comment)
  59 #endif /* DEBUG_CACHE */
  60
  61 unsigned longlong
  62 get_cache_size(void)
  63 {
  64         return cache_size;
  65 }
  66
  67 int
  68 get_cache_entry_count(void)
  69 {
  70         return list_size(&cache_entries);
  71 }
  72
  73 int
  74 get_cache_entry_used_count(void)
  75 {
  76         struct cache_entry *cached;
  77         int i = 0;
  78
  79         foreach (cached, cache_entries)
  80                 i += is_object_used(cached);
  81
  82         return i;
  83 }
  84
  85 int
  86 get_cache_entry_loading_count(void)
  87 {
  88         struct cache_entry *cached;
  89         int i = 0;
  90
  91         foreach (cached, cache_entries)
  92                 i += is_entry_used(cached);
  93
  94         return i;
  95 }
  96
  97 struct cache_entry *
  98 find_in_cache(struct uri *uri)
  99 {
 100         struct cache_entry *cached;
 101         int proxy = (uri->protocol == PROTOCOL_PROXY);
 102
 103         foreach (cached, cache_entries) {
 104                 struct uri *c_uri;
 105
 106                 if (!cached->valid) continue;
 107
 108                 c_uri = proxy ? cached->proxy_uri : cached->uri;
 109                 if (!compare_uri(c_uri, uri, URI_BASE))
 110                         continue;
 111
 112                 move_to_top_of_list(cache_entries, cached);
 113
 114                 return cached;
 115         }
 116
 117         return NULL;
 118 }
 119
 120 struct cache_entry *
 121 get_cache_entry(struct uri *uri)
 122 {
 123         struct cache_entry *cached = find_in_cache(uri);
 124
 125         assertm(!uri->fragment, "Fragment in URI (%s)", struri(uri));
 126
 127         if (cached) return cached;
 128
 129         shrink_memory(0);
 130
 131         cached = mem_calloc(1, sizeof(*cached));
 132         if (!cached) return NULL;
 133
 134         cached->uri = get_proxied_uri(uri);
 135         if (!cached->uri) {
 136                 mem_free(cached);
 137                 return NULL;
 138         }
 139
 140         cached->proxy_uri = get_proxy_uri(uri, NULL);
 141         if (!cached->proxy_uri) {
 142                 done_uri(cached->uri);
 143                 mem_free(cached);
 144                 return NULL;
 145         }
 146         cached->incomplete = 1;
 147         cached->valid = 1;
 148
 149         init_list(cached->frag);
 150         cached->id = id_counter++;
 151         object_nolock(cached, "cache_entry"); /* Debugging purpose. */
 152
 153         cached->box_item = add_listbox_leaf(&cache_browser, NULL, cached);
 154
 155         add_to_list(cache_entries, cached);
 156
 157         return cached;
 158 }
 159
 160 static int
 161 cache_entry_has_expired(struct cache_entry *cached)
 162 {
 163         timeval_T now;
 164
 165         timeval_now(&now);
 166
 167         return timeval_cmp(&cached->max_age, &now) <= 0;
 168 }
 169
 170 struct cache_entry *
 171 get_validated_cache_entry(struct uri *uri, enum cache_mode cache_mode)
 172 {
 173         struct cache_entry *cached;
 174
 175         /* We have to check if something should be reloaded */
 176         if (cache_mode > CACHE_MODE_NORMAL)
 177                 return NULL;
 178
 179         /* We only consider complete entries */
 180         cached = find_in_cache(uri);
 181         if (!cached || cached->incomplete)
 182                 return NULL;
 183
 184         /* Check if the entry can be deleted */
 185         /* FIXME: This does not make sense to me. Why should the usage pattern
 186          * of the cache entry matter? Only reason I can think of is to avoid
 187          * reloading when spawning a new tab which could potentially be a big
 188          * penalty but shouldn't that be taken care of on a higher level?
 189          * --jonas */
 190         if (is_object_used(cached)) {
 191 #if 0
 192                 /* Never use expired entries. */
 193                 /* Disabled because it hurts usability too much. */
 194                 if (cached->expire && cache_entry_has_expired(cached))
 195                         return NULL;
 196 #endif
 197                 return cached;
 198         }
 199
 200         /* A bit of a gray zone. Delete the entry if the it has the stricktest
 201          * cache mode and we don't want the most aggressive mode or we have to
 202          * remove the redirect or the entry expired. Please enlighten me.
 203          * --jonas */
 204         if ((cached->cache_mode == CACHE_MODE_NEVER && cache_mode != CACHE_MODE_ALWAYS)
 205             || (cached->redirect && !get_opt_bool("document.cache.cache_redirects"))
 206             || (cached->expire && cache_entry_has_expired(cached))) {
 207                 delete_cache_entry(cached);
 208                 return NULL;
 209         }
 210
 211         return cached;
 212 }
 213
 214 int
 215 cache_entry_is_valid(struct cache_entry *cached)
 216 {
 217         struct cache_entry *valid_cached;
 218
 219         foreach (valid_cached, cache_entries) {
 220                 if (valid_cached == cached)
 221                         return 1;
 222         }
 223
 224         return 0;
 225 }
 226
 227
 228 struct cache_entry *
 229 follow_cached_redirects(struct cache_entry *cached)
 230 {
 231         int redirects = 0;
 232
 233         while (cached) {
 234                 if (!cached->redirect) {
 235                         /* XXX: This is not quite true, but does that difference
 236                          * matter here? */
 237                         return cached;
 238                 }
 239
 240                 if (++redirects > MAX_REDIRECTS) break;
 241
 242                 cached = find_in_cache(cached->redirect);
 243         }
 244
 245         return NULL;
 246 }
 247
 248 struct cache_entry *
 249 get_redirected_cache_entry(struct uri *uri)
 250 {
 251         struct cache_entry *cached = find_in_cache(uri);
 252
 253         return cached ? follow_cached_redirects(cached) : NULL;
 254 }
 255
 256
 257 static inline void
 258 enlarge_entry(struct cache_entry *cached, off_t size)
 259 {
 260         cached->data_size += size;
 261         assertm(cached->data_size >= 0,
 262                 "cache entry data_size underflow: %ld", cached->data_size);
 263         if_assert_failed { cached->data_size = 0; }
 264
 265         cache_size += size;
 266         assertm(cache_size >= 0, "cache_size underflow: %ld", cache_size);
 267         if_assert_failed { cache_size = 0; }
 268 }
 269
 270
 271 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
 272
 273 /* One byte is reserved for data in struct fragment. */
 274 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
 275
 276 /* We store the fragments themselves in a private vault, safely separated from
 277  * the rest of memory structures. If we lived in the main libc memory pool, we
 278  * would trigger annoying pathological behaviour like artificially enlarging
 279  * the memory pool to 50M, then securing it with some stupid cookie record at
 280  * the top and then no matter how you flush the cache the data segment is still
 281  * 50M big.
 282  *
 283  * Cool, but we don't want that, so fragments (where the big data is stored)
 284  * live in their little mmap()ed worlds. There is some overhead, but if we
 285  * assume single fragment per cache entry and page size (mmap() allocation
 286  * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
 287  * That's not *that* horrible when you realize that the freshmeat front page
 288  * takes 300kb in memory and we usually do not deal with documents so small
 289  * that max. 4kb overhead would be visible there.
 290  *
 291  * The alternative would be of course to manage an entire custom memory pool,
 292  * but that is feasible only when we are able to resize it efficiently. We
 293  * aren't, except on Linux.
 294  *
 295  * Of course for all this to really completely prevent the pathological cases,
 296  * we need to stuff the rendered documents in too, because they seem to amount
 297  * the major memory bursts. */
 298
 299 static struct fragment *
 300 frag_alloc(size_t size)
 301 {
 302         struct fragment *f = mem_mmap_alloc(FRAGSIZE(size));
 303
 304         if (!f) return NULL;
 305         memset(f, 0, FRAGSIZE(size));
 306         return f;
 307 }
 308
 309 static struct fragment *
 310 frag_realloc(struct fragment *f, size_t size)
 311 {
 312         return mem_mmap_realloc(f, FRAGSIZE(f->real_length), FRAGSIZE(size));
 313 }
 314
 315 static void
 316 frag_free(struct fragment *f)
 317 {
 318         mem_mmap_free(f, FRAGSIZE(f->real_length));
 319 }
 320
 321
 322 /* Concatenate overlapping fragments. */
 323 static void
 324 remove_overlaps(struct cache_entry *cached, struct fragment *f, int *trunc)
 325 {
 326         off_t f_end_offset = f->offset + f->length;
 327
 328         /* Iterate thru all fragments we still overlap to. */
 329         while (list_has_next(cached->frag, f)
 330                 && f_end_offset > f->next->offset) {
 331                 struct fragment *nf;
 332                 off_t end_offset = f->next->offset + f->next->length;
 333
 334                 if (f_end_offset < end_offset) {
 335                         /* We end before end of the following fragment, though.
 336                          * So try to append overlapping part of that fragment
 337                          * to us. */
 338                         nf = frag_realloc(f, end_offset - f->offset);
 339                         if (nf) {
 340                                 nf->prev->next = nf;
 341                                 nf->next->prev = nf;
 342                                 f = nf;
 343
 344                                 if (memcmp(f->data + f->next->offset - f->offset,
 345                                            f->next->data,
 346                                            f->offset + f->length - f->next->offset))
 347                                         *trunc = 1;
 348
 349                                 memcpy(f->data + f->length,
 350                                        f->next->data + f_end_offset - f->next->offset,
 351                                        end_offset - f_end_offset);
 352
 353                                 enlarge_entry(cached, end_offset - f_end_offset);
 354                                 f->length = f->real_length = end_offset - f->offset;
 355                         }
 356
 357                 } else {
 358                         /* We will just discard this, it's complete subset of
 359                          * our new fragment. */
 360                         if (memcmp(f->data + f->next->offset - f->offset,
 361                                    f->next->data,
 362                                    f->next->length))
 363                                 *trunc = 1;
 364                 }
 365
 366                 /* Remove the fragment, it influences our new one! */
 367                 nf = f->next;
 368                 enlarge_entry(cached, -nf->length);
 369                 del_from_list(nf);
 370                 frag_free(nf);
 371         }
 372 }
 373
 374 /* Note that this function is maybe overcommented, but I'm certainly not
 375  * unhappy from that. */
 376 int
 377 add_fragment(struct cache_entry *cached, off_t offset,
 378              const unsigned char *data, ssize_t length)
 379 {
 380         struct fragment *f, *nf;
 381         int trunc = 0;
 382         off_t end_offset;
 383
 384         if (!length) return 0;
 385
 386         end_offset = offset + length;
 387         if (cached->length < end_offset)
 388                 cached->length = end_offset;
 389
 390         /* id marks each entry, and change each time it's modified,
 391          * used in HTML renderer. */
 392         cached->id = id_counter++;
 393
 394         /* Possibly insert the new data in the middle of existing fragment. */
 395         foreach (f, cached->frag) {
 396                 int ret = 0;
 397                 off_t f_end_offset = f->offset + f->length;
 398
 399                 /* No intersection? */
 400                 if (f->offset > offset) break;
 401                 if (f_end_offset < offset) continue;
 402
 403                 if (end_offset > f_end_offset) {
 404                         /* Overlap - we end further than original fragment. */
 405
 406                         if (end_offset - f->offset <= f->real_length) {
 407                                 /* We fit here, so let's enlarge it by delta of
 408                                  * old and new end.. */
 409                                 enlarge_entry(cached, end_offset - f_end_offset);
 410                                 /* ..and length is now total length. */
 411                                 f->length = end_offset - f->offset;
 412
 413                                 ret = 1; /* It was enlarged. */
 414                         } else {
 415                                 /* We will reduce fragment length only to the
 416                                  * starting non-interjecting size and add new
 417                                  * fragment directly after this one. */
 418                                 f->length = offset - f->offset;
 419                                 f = f->next;
 420                                 break;
 421                         }
 422
 423                 } /* else We are subset of original fragment. */
 424
 425                 /* Copy the stuff over there. */
 426                 memcpy(f->data + offset - f->offset, data, length);
 427
 428                 remove_overlaps(cached, f, &trunc);
 429
 430                 /* We truncate the entry even if the data contents is the
 431                  * same as what we have in the fragment, because that does
 432                  * not mean that what is going to follow won't differ, This
 433                  * is a serious problem when rendering HTML frame with onload
 434                  * snippets - we "guess" the rest of the document here,
 435                  * interpret the snippet, then it turns out in the real
 436                  * document the snippet is different and we are in trouble.
 437                  *
 438                  * Debugging this took me about 1.5 day (really), the diff with
 439                  * all the debugging print commands amounted about 20kb (gdb
 440                  * wasn't much useful since it stalled the download, de facto
 441                  * eliminating the bad behaviour). */
 442                 truncate_entry(cached, end_offset, 0);
 443
 444                 dump_frags(cached, "add_fragment");
 445
 446                 return ret;
 447         }
 448
 449         /* Make up new fragment. */
 450         nf = frag_alloc(CACHE_PAD(length));
 451         if (!nf) return -1;
 452
 453         nf->offset = offset;
 454         nf->length = length;
 455         nf->real_length = CACHE_PAD(length);
 456         memcpy(nf->data, data, length);
 457         add_at_pos(f->prev, nf);
 458
 459         enlarge_entry(cached, length);
 460
 461         remove_overlaps(cached, nf, &trunc);
 462         if (trunc) truncate_entry(cached, end_offset, 0);
 463
 464         dump_frags(cached, "add_fragment");
 465
 466         return 1;
 467 }
 468
 469 /* Try to defragment the cache entry. Defragmentation will not be possible
 470  * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
 471  * and bytes 201-300 in the second, we must leave those two fragments separate
 472  * so that the fragment for bytes 101-200 can later be inserted. However,
 473  * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
 474  * catenate them into one new fragment and replace the original fragments
 475  * with that new fragment.
 476  *
 477  * If are no fragments, return NULL. If there is no fragment with byte 1,
 478  * return NULL. Otherwise, return the first fragment, whether or not it was
 479  * possible to fully defragment the entry. */
 480 struct fragment *
 481 get_cache_fragment(struct cache_entry *cached)
 482 {
 483         struct fragment *first_frag, *adj_frag, *frag, *new_frag;
 484         int new_frag_len;
 485
 486         if (list_empty(cached->frag))
 487                 return NULL;
 488
 489         first_frag = cached->frag.next;
 490         if (first_frag->offset)
 491                 return NULL;
 492
 493         /* Only one fragment so no defragmentation is needed */
 494         if (list_is_singleton(cached->frag))
 495                 return first_frag;
 496
 497         /* Find the first pair of fragments with a gap in between. Only
 498          * fragments up to the first gap can be defragmented. */
 499         for (adj_frag = first_frag->next; adj_frag != (void *) &cached->frag;
 500              adj_frag = adj_frag->next) {
 501                 long gap = adj_frag->offset
 502                             - (adj_frag->prev->offset + adj_frag->prev->length);
 503
 504                 if (gap > 0) break;
 505                 if (gap == 0) continue;
 506
 507                 INTERNAL("fragments overlap");
 508                 return NULL;
 509         }
 510
 511         /* There is a gap between the first two fragments, so we can't
 512          * defragment anything. */
 513         if (adj_frag == first_frag->next)
 514                 return first_frag;
 515
 516         /* Calculate the length of the defragmented fragment. */
 517         for (new_frag_len = 0, frag = first_frag;
 518              frag != adj_frag;
 519              frag = frag->next)
 520                 new_frag_len += frag->length;
 521
 522         /* XXX: If the defragmentation fails because of allocation failure,
 523          * fall back to return the first fragment and pretend all is well. */
 524         /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
 525          * the code this extended version of the old defrag_entry() is supposed
 526          * to replace. --jonas */
 527         new_frag = frag_alloc(new_frag_len);
 528         if (!new_frag)
 529                 return first_frag->length ? first_frag : NULL;
 530
 531         new_frag->length = new_frag_len;
 532         new_frag->real_length = new_frag_len;
 533
 534         for (new_frag_len = 0, frag = first_frag;
 535              frag != adj_frag;
 536              frag = frag->next) {
 537                 struct fragment *tmp = frag;
 538
 539                 memcpy(new_frag->data + new_frag_len, frag->data, frag->length);
 540                 new_frag_len += frag->length;
 541
 542                 frag = frag->prev;
 543                 del_from_list(tmp);
 544                 frag_free(tmp);
 545         }
 546
 547         add_to_list(cached->frag, new_frag);
 548
 549         dump_frags(cached, "get_cache_fragment");
 550
 551         return new_frag;
 552 }
 553
 554 static void
 555 delete_fragment(struct cache_entry *cached, struct fragment *f)
 556 {
 557         while ((void *) f != &cached->frag) {
 558                 struct fragment *tmp = f->next;
 559
 560                 enlarge_entry(cached, -f->length);
 561                 del_from_list(f);
 562                 frag_free(f);
 563                 f = tmp;
 564         }
 565 }
 566
 567 static void
 568 truncate_entry(struct cache_entry *cached, off_t offset, int final)
 569 {
 570         struct fragment *f;
 571
 572         if (cached->length > offset) {
 573                 cached->length = offset;
 574                 cached->incomplete = 1;
 575         }
 576
 577         foreach (f, cached->frag) {
 578                 off_t size = offset - f->offset;
 579
 580                 /* XXX: is zero length fragment really legal here ? --Zas */
 581                 assert(f->length >= 0);
 582
 583                 if (size >= f->length) continue;
 584
 585                 if (size > 0) {
 586                         enlarge_entry(cached, -(f->length - size));
 587                         f->length = size;
 588
 589                         if (final) {
 590                                 struct fragment *nf;
 591
 592                                 nf = frag_realloc(f, f->length);
 593                                 if (nf) {
 594                                         nf->next->prev = nf;
 595                                         nf->prev->next = nf;
 596                                         f = nf;
 597                                         f->real_length = f->length;
 598                                 }
 599                         }
 600
 601                         f = f->next;
 602                 }
 603
 604                 delete_fragment(cached, f);
 605
 606                 dump_frags(cached, "truncate_entry");
 607                 return;
 608         }
 609 }
 610
 611 void
 612 free_entry_to(struct cache_entry *cached, off_t offset)
 613 {
 614         struct fragment *f;
 615
 616         foreach (f, cached->frag) {
 617                 if (f->offset + f->length <= offset) {
 618                         struct fragment *tmp = f;
 619
 620                         enlarge_entry(cached, -f->length);
 621                         f = f->prev;
 622                         del_from_list(tmp);
 623                         frag_free(tmp);
 624                 } else if (f->offset < offset) {
 625                         off_t size = offset - f->offset;
 626
 627                         enlarge_entry(cached, -size);
 628                         f->length -= size;
 629                         memmove(f->data, f->data + size, f->length);
 630                         f->offset = offset;
 631                 } else break;
 632         }
 633 }
 634
 635 void
 636 delete_entry_content(struct cache_entry *cached)
 637 {
 638         enlarge_entry(cached, -cached->data_size);
 639
 640         while (cached->frag.next != (void *) &cached->frag) {
 641                 struct fragment *f = cached->frag.next;
 642
 643                 del_from_list(f);
 644                 frag_free(f);
 645         }
 646         cached->id = id_counter++;
 647         cached->length = 0;
 648         cached->incomplete = 1;
 649
 650         mem_free_set(&cached->last_modified, NULL);
 651         mem_free_set(&cached->etag, NULL);
 652 }
 653
 654 static void
 655 done_cache_entry(struct cache_entry *cached)
 656 {
 657         assertm(!is_object_used(cached), "deleting locked cache entry");
 658         assertm(!is_entry_used(cached), "deleting loading cache entry");
 659
 660         delete_entry_content(cached);
 661
 662         if (cached->box_item) done_listbox_item(&cache_browser, cached->box_item);
 663
 664         if (cached->uri) done_uri(cached->uri);
 665         if (cached->proxy_uri) done_uri(cached->proxy_uri);
 666         if (cached->redirect) done_uri(cached->redirect);
 667
 668         mem_free_if(cached->head);
 669         mem_free_if(cached->content_type);
 670         mem_free_if(cached->last_modified);
 671         mem_free_if(cached->ssl_info);
 672         mem_free_if(cached->encoding_info);
 673         mem_free_if(cached->etag);
 674
 675         mem_free(cached);
 676 }
 677
 678 void
 679 delete_cache_entry(struct cache_entry *cached)
 680 {
 681         del_from_list(cached);
 682
 683         done_cache_entry(cached);
 684 }
 685
 686
 687 void
 688 normalize_cache_entry(struct cache_entry *cached, off_t truncate_length)
 689 {
 690         if (truncate_length < 0)
 691                 return;
 692
 693         truncate_entry(cached, truncate_length, 1);
 694         cached->incomplete = 0;
 695         cached->preformatted = 0;
 696 }
 697
 698
 699 struct uri *
 700 redirect_cache(struct cache_entry *cached, unsigned char *location,
 701                int get, int incomplete)
 702 {
 703         unsigned char *uristring;
 704
 705         /* XXX: I am a little puzzled whether we should only use the cache
 706          * entry's URI if it is valid. Hopefully always using it won't hurt.
 707          * Currently we handle direction redirects where "/" should be appended
 708          * special dunno if join_urls() could be made to handle that.
 709          * --jonas */
 710         /* XXX: We are assuming here that incomplete will only be zero when
 711          * doing these fake redirects which only purpose is to add an ending
 712          * slash *cough* dirseparator to the end of the URI. */
 713         if (incomplete == 0 && location[0] == '/' && location[1] == 0) {
 714                 /* To be sure use get_uri_string() to get rid of post data */
 715                 uristring = get_uri_string(cached->uri, URI_ORIGINAL);
 716                 if (uristring) add_to_strn(&uristring, location);
 717         } else {
 718                 uristring = join_urls(cached->uri, location);
 719         }
 720
 721         if (!uristring) return NULL;
 722
 723         /* Only add the post data if the redirect should not use GET method.
 724          * This is tied to the HTTP handling of the 303 and (if the
 725          * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
 726          * code handling. */
 727         if (cached->uri->post
 728             && !cached->redirect_get
 729             && !get) {
 730                 /* XXX: Add POST_CHAR and post data assuming URI components
 731                  * belong to one string. */
 732
 733                 /* To be certain we don't append post data twice in some
 734                  * conditions... --Zas */
 735                 assert(!strchr(uristring, POST_CHAR));
 736
 737                 add_to_strn(&uristring, cached->uri->post - 1);
 738         }
 739
 740         if (cached->redirect) done_uri(cached->redirect);
 741         cached->redirect = get_uri(uristring, 0);
 742         cached->redirect_get = get;
 743         if (incomplete >= 0) cached->incomplete = incomplete;
 744
 745         mem_free(uristring);
 746
 747         return cached->redirect;
 748 }
 749
 750
 751 void
 752 garbage_collection(int whole)
 753 {
 754         struct cache_entry *cached;
 755         /* We recompute cache_size when scanning cache entries, to ensure
 756          * consistency. */
 757         unsigned longlong old_cache_size = 0;
 758         /* The maximal cache size tolerated by user. Note that this is only
 759          * size of the "just stored" unused cache entries, used cache entries
 760          * are not counted to that. */
 761         unsigned longlong opt_cache_size = get_opt_long("document.cache.memory.size");
 762         /* The low-treshold cache size. Basically, when the cache size is
 763          * higher than opt_cache_size, we free the cache so that there is no
 764          * more than this value in the cache anymore. This is to make sure we
 765          * aren't cleaning cache too frequently when working with a lot of
 766          * small cache entries but rather free more and then let it grow a
 767          * little more as well. */
 768         unsigned longlong gc_cache_size = opt_cache_size * MEMORY_CACHE_GC_PERCENT / 100;
 769         /* The cache size we aim to reach. */
 770         unsigned longlong new_cache_size = cache_size;
 771 #ifdef DEBUG_CACHE
 772         /* Whether we've hit an used (unfreeable) entry when collecting
 773          * garbage. */
 774         int obstacle_entry = 0;
 775 #endif
 776
 777 #ifdef DEBUG_CACHE
 778         DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
 779               whole, opt_cache_size,gc_cache_size);
 780 #endif
 781
 782         if (!whole && cache_size <= opt_cache_size) return;
 783
 784
 785         /* Scanning cache, pass #1:
 786          * Weed out the used cache entries from @new_cache_size, so that we
 787          * will work only with the unused entries from then on. Also ensure
 788          * that @cache_size is in sync. */
 789
 790         foreach (cached, cache_entries) {
 791                 old_cache_size += cached->data_size;
 792
 793                 if (!is_object_used(cached) && !is_entry_used(cached))
 794                         continue;
 795
 796                 assertm(new_cache_size >= cached->data_size,
 797                         "cache_size (%ld) underflow: subtracting %ld from %ld",
 798                         cache_size, cached->data_size, new_cache_size);
 799
 800                 new_cache_size -= cached->data_size;
 801
 802                 if_assert_failed { new_cache_size = 0; }
 803         }
 804
 805         assertm(old_cache_size == cache_size,
 806                 "cache_size out of sync: %ld != (actual) %ld",
 807                 cache_size, old_cache_size);
 808         if_assert_failed { cache_size = old_cache_size; }
 809
 810         if (!whole && new_cache_size <= opt_cache_size) return;
 811
 812
 813         /* Scanning cache, pass #2:
 814          * Mark potential targets for destruction, from the oldest to the
 815          * newest. */
 816
 817         foreachback (cached, cache_entries) {
 818                 /* We would have shrinked enough already? */
 819                 if (!whole && new_cache_size <= gc_cache_size)
 820                         goto shrinked_enough;
 821
 822                 /* Skip used cache entries. */
 823                 if (is_object_used(cached) || is_entry_used(cached)) {
 824 #ifdef DEBUG_CACHE
 825                         obstacle_entry = 1;
 826 #endif
 827                         cached->gc_target = 0;
 828                         continue;
 829                 }
 830
 831                 /* FIXME: Optionally take cached->max_age into consideration,
 832                  * but that will probably complicate things too much. We'd have
 833                  * to sort entries so prioritize removing the oldest entries. */
 834
 835                 assertm(new_cache_size >= cached->data_size,
 836                         "cache_size (%ld) underflow: subtracting %ld from %ld",
 837                         cache_size, cached->data_size, new_cache_size);
 838
 839                 /* Mark me for destruction, sir. */
 840                 cached->gc_target = 1;
 841                 new_cache_size -= cached->data_size;
 842
 843                 if_assert_failed { new_cache_size = 0; }
 844         }
 845
 846         /* If we'd free the whole cache... */
 847         assertm(new_cache_size == 0,
 848                 "cache_size (%ld) overflow: %ld",
 849                 cache_size, new_cache_size);
 850         if_assert_failed { new_cache_size = 0; }
 851
 852 shrinked_enough:
 853
 854
 855         /* Now turn around and start walking in the opposite direction. */
 856         cached = cached->next;
 857
 858         /* Something is strange when we decided all is ok before dropping any
 859          * cache entry. */
 860         if ((void *) cached == &cache_entries) return;
 861
 862
 863         if (!whole) {
 864                 struct cache_entry *entry;
 865
 866                 /* Scanning cache, pass #3:
 867                  * Walk back in the cache and unmark the cache entries which
 868                  * could still fit into the cache. */
 869
 870                 /* This makes sense when the newest entry is HUGE and after it,
 871                  * there's just plenty of tiny entries. By this point, all the
 872                  * tiny entries would be marked for deletion even though it'd
 873                  * be enough to free the huge entry. This actually fixes that
 874                  * situation. */
 875
 876                 for (entry = cached; (void *) entry != &cache_entries; entry = entry->next) {
 877                         unsigned longlong newer_cache_size = new_cache_size + entry->data_size;
 878
 879                         if (newer_cache_size > gc_cache_size)
 880                                 continue;
 881
 882                         new_cache_size = newer_cache_size;
 883                         entry->gc_target = 0;
 884                 }
 885         }
 886
 887
 888         /* Scanning cache, pass #4:
 889          * Destroy the marked entries. So sad, but that's life, bro'. */
 890
 891         for (; (void *) cached != &cache_entries; ) {
 892                 cached = cached->next;
 893                 if (cached->prev->gc_target)
 894                         delete_cache_entry(cached->prev);
 895         }
 896
 897
 898 #ifdef DEBUG_CACHE
 899         if ((whole || !obstacle_entry) && cache_size > gc_cache_size) {
 900                 DBG("garbage collection doesn't work, cache size %ld > %ld, "
 901                       "document.cache.memory.size set to: %ld bytes",
 902                       cache_size, gc_cache_size,
 903                       get_opt_long("document.cache.memory.size"));
 904         }
 905 #endif
 906 }