src/cache/cache.c

   1 /* Cache subsystem */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 #include <string.h>
   8
   9 #include "elinks.h"
  10
  11 #include "bfu/dialog.h"
  12 #include "cache/cache.h"
  13 #include "cache/dialogs.h"
  14 #include "config/options.h"
  15 #include "main/main.h"
  16 #include "main/object.h"
  17 #include "network/connection.h"
  18 #include "protocol/protocol.h"
  19 #include "protocol/proxy.h"
  20 #include "protocol/uri.h"
  21 #include "util/error.h"
  22 #include "util/memory.h"
  23 #include "util/string.h"
  24 #include "util/time.h"
  25
  26 /* The list of cache entries */
  27 static INIT_LIST_OF(struct cache_entry, cache_entries);
  28
  29 static unsigned longlong cache_size;
  30 static int id_counter = 1;
  31
  32 static void truncate_entry(struct cache_entry *cached, off_t offset, int final);
  33
  34 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
  35 #if 0
  36 #define DEBUG_CACHE
  37 #endif
  38
  39 #ifdef DEBUG_CACHE
  40
  41 #define dump_frag(frag, count) \
  42 do { \
  43         DBG(" [%d] f=%p offset=%" OFF_PRINT_FORMAT \
  44             " length=%" OFF_PRINT_FORMAT \
  45             " real_length=%" OFF_PRINT_FORMAT, \
  46             count, frag, (off_print_T) frag->offset, \
  47             (off_print_T) frag->length, (off_print_T) frag->real_length); \
  48 } while (0)
  49
  50 #define dump_frags(entry, comment) \
  51 do { \
  52         struct fragment *frag; \
  53         int count = 0;  \
  54  \
  55         DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
  56         foreach (frag, entry->frag) \
  57                 dump_frag(frag, ++count); \
  58 } while (0)
  59
  60 #else
  61 #define dump_frags(entry, comment)
  62 #endif /* DEBUG_CACHE */
  63
  64 unsigned longlong
  65 get_cache_size(void)
  66 {
  67         return cache_size;
  68 }
  69
  70 int
  71 get_cache_entry_count(void)
  72 {
  73         return list_size(&cache_entries);
  74 }
  75
  76 int
  77 get_cache_entry_used_count(void)
  78 {
  79         struct cache_entry *cached;
  80         int i = 0;
  81
  82         foreach (cached, cache_entries)
  83                 i += is_object_used(cached);
  84
  85         return i;
  86 }
  87
  88 int
  89 get_cache_entry_loading_count(void)
  90 {
  91         struct cache_entry *cached;
  92         int i = 0;
  93
  94         foreach (cached, cache_entries)
  95                 i += is_entry_used(cached);
  96
  97         return i;
  98 }
  99
 100 struct cache_entry *
 101 find_in_cache(struct uri *uri)
 102 {
 103         struct cache_entry *cached;
 104         int proxy = (uri->protocol == PROTOCOL_PROXY);
 105
 106         foreach (cached, cache_entries) {
 107                 struct uri *c_uri;
 108
 109                 if (!cached->valid) continue;
 110
 111                 c_uri = proxy ? cached->proxy_uri : cached->uri;
 112                 if (!compare_uri(c_uri, uri, URI_BASE))
 113                         continue;
 114
 115                 move_to_top_of_list(cache_entries, cached);
 116
 117                 return cached;
 118         }
 119
 120         return NULL;
 121 }
 122
 123 struct cache_entry *
 124 get_cache_entry(struct uri *uri)
 125 {
 126         struct cache_entry *cached = find_in_cache(uri);
 127
 128         assertm(!uri->fragment, "Fragment in URI (%s)", struri(uri));
 129
 130         if (cached) return cached;
 131
 132         shrink_memory(0);
 133
 134         cached = mem_calloc(1, sizeof(*cached));
 135         if (!cached) return NULL;
 136
 137         cached->uri = get_proxied_uri(uri);
 138         if (!cached->uri) {
 139                 mem_free(cached);
 140                 return NULL;
 141         }
 142
 143         cached->proxy_uri = get_proxy_uri(uri, NULL);
 144         if (!cached->proxy_uri) {
 145                 done_uri(cached->uri);
 146                 mem_free(cached);
 147                 return NULL;
 148         }
 149         cached->incomplete = 1;
 150         cached->valid = 1;
 151
 152         init_list(cached->frag);
 153         cached->cache_id = id_counter++;
 154         object_nolock(cached, "cache_entry"); /* Debugging purpose. */
 155
 156         cached->box_item = add_listbox_leaf(&cache_browser, NULL, cached);
 157
 158         add_to_list(cache_entries, cached);
 159
 160         return cached;
 161 }
 162
 163 static int
 164 cache_entry_has_expired(struct cache_entry *cached)
 165 {
 166         timeval_T now;
 167
 168         timeval_now(&now);
 169
 170         return timeval_cmp(&cached->max_age, &now) <= 0;
 171 }
 172
 173 struct cache_entry *
 174 get_validated_cache_entry(struct uri *uri, enum cache_mode cache_mode)
 175 {
 176         struct cache_entry *cached;
 177
 178         /* We have to check if something should be reloaded */
 179         if (cache_mode > CACHE_MODE_NORMAL)
 180                 return NULL;
 181
 182         /* We only consider complete entries */
 183         cached = find_in_cache(uri);
 184         if (!cached || cached->incomplete)
 185                 return NULL;
 186
 187
 188         /* A bit of a gray zone. Delete the entry if the it has the strictest
 189          * cache mode and we don't want the most aggressive mode or we have to
 190          * remove the redirect or the entry expired. Please enlighten me.
 191          * --jonas */
 192         if ((cached->cache_mode == CACHE_MODE_NEVER && cache_mode != CACHE_MODE_ALWAYS)
 193             || (cached->redirect && !get_opt_bool("document.cache.cache_redirects", NULL))
 194             || (cached->expire && cache_entry_has_expired(cached))) {
 195                 if (!is_object_used(cached)) delete_cache_entry(cached);
 196                 return NULL;
 197         }
 198
 199         if (cached->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED
 200             && cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED
 201             && (cached->last_modified || cached->etag)
 202             && get_opt_int("document.cache.revalidation_interval", NULL) >= 0) {
 203                 if (cached->seconds + get_opt_int("document.cache.revalidation_interval", NULL) < time(NULL))
 204                         return NULL;
 205         }
 206
 207         return cached;
 208 }
 209
 210 int
 211 cache_entry_is_valid(struct cache_entry *cached)
 212 {
 213         struct cache_entry *valid_cached;
 214
 215         foreach (valid_cached, cache_entries) {
 216                 if (valid_cached == cached)
 217                         return 1;
 218         }
 219
 220         return 0;
 221 }
 222
 223
 224 struct cache_entry *
 225 follow_cached_redirects(struct cache_entry *cached)
 226 {
 227         int redirects = 0;
 228
 229         while (cached) {
 230                 if (!cached->redirect) {
 231                         /* XXX: This is not quite true, but does that difference
 232                          * matter here? */
 233                         return cached;
 234                 }
 235
 236                 if (++redirects > MAX_REDIRECTS) break;
 237
 238                 cached = find_in_cache(cached->redirect);
 239         }
 240
 241         return NULL;
 242 }
 243
 244 struct cache_entry *
 245 get_redirected_cache_entry(struct uri *uri)
 246 {
 247         struct cache_entry *cached = find_in_cache(uri);
 248
 249         return cached ? follow_cached_redirects(cached) : NULL;
 250 }
 251
 252
 253 static inline void
 254 enlarge_entry(struct cache_entry *cached, off_t size)
 255 {
 256         cached->data_size += size;
 257         assertm(cached->data_size >= 0,
 258                 "cache entry data_size underflow: %ld", cached->data_size);
 259         if_assert_failed { cached->data_size = 0; }
 260
 261         cache_size += size;
 262         assertm(cache_size >= 0, "cache_size underflow: %ld", cache_size);
 263         if_assert_failed { cache_size = 0; }
 264 }
 265
 266
 267 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
 268
 269 /* One byte is reserved for data in struct fragment. */
 270 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
 271
 272 /* We store the fragments themselves in a private vault, safely separated from
 273  * the rest of memory structures. If we lived in the main libc memory pool, we
 274  * would trigger annoying pathological behaviour like artificially enlarging
 275  * the memory pool to 50M, then securing it with some stupid cookie record at
 276  * the top and then no matter how you flush the cache the data segment is still
 277  * 50M big.
 278  *
 279  * Cool, but we don't want that, so fragments (where the big data is stored)
 280  * live in their little mmap()ed worlds. There is some overhead, but if we
 281  * assume single fragment per cache entry and page size (mmap() allocation
 282  * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
 283  * That's not *that* horrible when you realize that the freshmeat front page
 284  * takes 300kb in memory and we usually do not deal with documents so small
 285  * that max. 4kb overhead would be visible there.
 286  *
 287  * The alternative would be of course to manage an entire custom memory pool,
 288  * but that is feasible only when we are able to resize it efficiently. We
 289  * aren't, except on Linux.
 290  *
 291  * Of course for all this to really completely prevent the pathological cases,
 292  * we need to stuff the rendered documents in too, because they seem to amount
 293  * the major memory bursts. */
 294
 295 static struct fragment *
 296 frag_alloc(size_t size)
 297 {
 298         struct fragment *f = mem_mmap_alloc(FRAGSIZE(size));
 299
 300         if (!f) return NULL;
 301         memset(f, 0, FRAGSIZE(size));
 302         return f;
 303 }
 304
 305 static struct fragment *
 306 frag_realloc(struct fragment *f, size_t size)
 307 {
 308         return mem_mmap_realloc(f, FRAGSIZE(f->real_length), FRAGSIZE(size));
 309 }
 310
 311 static void
 312 frag_free(struct fragment *f)
 313 {
 314         mem_mmap_free(f, FRAGSIZE(f->real_length));
 315 }
 316
 317
 318 /* Concatenate overlapping fragments. */
 319 static void
 320 remove_overlaps(struct cache_entry *cached, struct fragment *f, int *trunc)
 321 {
 322         off_t f_end_offset = f->offset + f->length;
 323
 324         /* Iterate thru all fragments we still overlap to. */
 325         while (list_has_next(cached->frag, f)
 326                 && f_end_offset > f->next->offset) {
 327                 struct fragment *nf;
 328                 off_t end_offset = f->next->offset + f->next->length;
 329
 330                 if (f_end_offset < end_offset) {
 331                         /* We end before end of the following fragment, though.
 332                          * So try to append overlapping part of that fragment
 333                          * to us. */
 334                         nf = frag_realloc(f, end_offset - f->offset);
 335                         if (nf) {
 336                                 nf->prev->next = nf;
 337                                 nf->next->prev = nf;
 338                                 f = nf;
 339
 340                                 if (memcmp(f->data + f->next->offset - f->offset,
 341                                            f->next->data,
 342                                            f->offset + f->length - f->next->offset))
 343                                         *trunc = 1;
 344
 345                                 memcpy(f->data + f->length,
 346                                        f->next->data + f_end_offset - f->next->offset,
 347                                        end_offset - f_end_offset);
 348
 349                                 enlarge_entry(cached, end_offset - f_end_offset);
 350                                 f->length = f->real_length = end_offset - f->offset;
 351                         }
 352
 353                 } else {
 354                         /* We will just discard this, it's complete subset of
 355                          * our new fragment. */
 356                         if (memcmp(f->data + f->next->offset - f->offset,
 357                                    f->next->data,
 358                                    f->next->length))
 359                                 *trunc = 1;
 360                 }
 361
 362                 /* Remove the fragment, it influences our new one! */
 363                 nf = f->next;
 364                 enlarge_entry(cached, -nf->length);
 365                 del_from_list(nf);
 366                 frag_free(nf);
 367         }
 368 }
 369
 370 /* Note that this function is maybe overcommented, but I'm certainly not
 371  * unhappy from that. */
 372 int
 373 add_fragment(struct cache_entry *cached, off_t offset,
 374              const unsigned char *data, ssize_t length)
 375 {
 376         struct fragment *f, *nf;
 377         int trunc = 0;
 378         off_t end_offset;
 379
 380         if (!length) return 0;
 381
 382         end_offset = offset + length;
 383         if (cached->length < end_offset)
 384                 cached->length = end_offset;
 385
 386         /* id marks each entry, and change each time it's modified,
 387          * used in HTML renderer. */
 388         cached->cache_id = id_counter++;
 389
 390         /* Possibly insert the new data in the middle of existing fragment. */
 391         foreach (f, cached->frag) {
 392                 int ret = 0;
 393                 off_t f_end_offset = f->offset + f->length;
 394
 395                 /* No intersection? */
 396                 if (f->offset > offset) break;
 397                 if (f_end_offset < offset) continue;
 398
 399                 if (end_offset > f_end_offset) {
 400                         /* Overlap - we end further than original fragment. */
 401
 402                         if (end_offset - f->offset <= f->real_length) {
 403                                 /* We fit here, so let's enlarge it by delta of
 404                                  * old and new end.. */
 405                                 enlarge_entry(cached, end_offset - f_end_offset);
 406                                 /* ..and length is now total length. */
 407                                 f->length = end_offset - f->offset;
 408
 409                                 ret = 1; /* It was enlarged. */
 410                         } else {
 411                                 /* We will reduce fragment length only to the
 412                                  * starting non-interjecting size and add new
 413                                  * fragment directly after this one. */
 414                                 f->length = offset - f->offset;
 415                                 f = f->next;
 416                                 break;
 417                         }
 418
 419                 } /* else We are subset of original fragment. */
 420
 421                 /* Copy the stuff over there. */
 422                 memcpy(f->data + offset - f->offset, data, length);
 423
 424                 remove_overlaps(cached, f, &trunc);
 425
 426                 /* We truncate the entry even if the data contents is the
 427                  * same as what we have in the fragment, because that does
 428                  * not mean that what is going to follow won't differ, This
 429                  * is a serious problem when rendering HTML frame with onload
 430                  * snippets - we "guess" the rest of the document here,
 431                  * interpret the snippet, then it turns out in the real
 432                  * document the snippet is different and we are in trouble.
 433                  *
 434                  * Debugging this took me about 1.5 day (really), the diff with
 435                  * all the debugging print commands amounted about 20kb (gdb
 436                  * wasn't much useful since it stalled the download, de facto
 437                  * eliminating the bad behaviour). */
 438                 truncate_entry(cached, end_offset, 0);
 439
 440                 dump_frags(cached, "add_fragment");
 441
 442                 return ret;
 443         }
 444
 445         /* Make up new fragment. */
 446         nf = frag_alloc(CACHE_PAD(length));
 447         if (!nf) return -1;
 448
 449         nf->offset = offset;
 450         nf->length = length;
 451         nf->real_length = CACHE_PAD(length);
 452         memcpy(nf->data, data, length);
 453         add_at_pos(f->prev, nf);
 454
 455         enlarge_entry(cached, length);
 456
 457         remove_overlaps(cached, nf, &trunc);
 458         if (trunc) truncate_entry(cached, end_offset, 0);
 459
 460         dump_frags(cached, "add_fragment");
 461
 462         return 1;
 463 }
 464
 465 /* Try to defragment the cache entry. Defragmentation will not be possible
 466  * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
 467  * and bytes 201-300 in the second, we must leave those two fragments separate
 468  * so that the fragment for bytes 101-200 can later be inserted. However,
 469  * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
 470  * catenate them into one new fragment and replace the original fragments
 471  * with that new fragment.
 472  *
 473  * If are no fragments, return NULL. If there is no fragment with byte 1,
 474  * return NULL. Otherwise, return the first fragment, whether or not it was
 475  * possible to fully defragment the entry. */
 476 struct fragment *
 477 get_cache_fragment(struct cache_entry *cached)
 478 {
 479         struct fragment *first_frag, *adj_frag, *frag, *new_frag;
 480         int new_frag_len;
 481
 482         if (list_empty(cached->frag))
 483                 return NULL;
 484
 485         first_frag = cached->frag.next;
 486         if (first_frag->offset)
 487                 return NULL;
 488
 489         /* Only one fragment so no defragmentation is needed */
 490         if (list_is_singleton(cached->frag))
 491                 return first_frag;
 492
 493         /* Find the first pair of fragments with a gap in between. Only
 494          * fragments up to the first gap can be defragmented. */
 495         for (adj_frag = first_frag->next; adj_frag != (void *) &cached->frag;
 496              adj_frag = adj_frag->next) {
 497                 long gap = adj_frag->offset
 498                             - (adj_frag->prev->offset + adj_frag->prev->length);
 499
 500                 if (gap > 0) break;
 501                 if (gap == 0) continue;
 502
 503                 INTERNAL("fragments overlap");
 504                 return NULL;
 505         }
 506
 507         /* There is a gap between the first two fragments, so we can't
 508          * defragment anything. */
 509         if (adj_frag == first_frag->next)
 510                 return first_frag;
 511
 512         /* Calculate the length of the defragmented fragment. */
 513         for (new_frag_len = 0, frag = first_frag;
 514              frag != adj_frag;
 515              frag = frag->next)
 516                 new_frag_len += frag->length;
 517
 518         /* XXX: If the defragmentation fails because of allocation failure,
 519          * fall back to return the first fragment and pretend all is well. */
 520         /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
 521          * the code this extended version of the old defrag_entry() is supposed
 522          * to replace. --jonas */
 523         new_frag = frag_alloc(new_frag_len);
 524         if (!new_frag)
 525                 return first_frag->length ? first_frag : NULL;
 526
 527         new_frag->length = new_frag_len;
 528         new_frag->real_length = new_frag_len;
 529
 530         for (new_frag_len = 0, frag = first_frag;
 531              frag != adj_frag;
 532              frag = frag->next) {
 533                 struct fragment *tmp = frag;
 534
 535                 memcpy(new_frag->data + new_frag_len, frag->data, frag->length);
 536                 new_frag_len += frag->length;
 537
 538                 frag = frag->prev;
 539                 del_from_list(tmp);
 540                 frag_free(tmp);
 541         }
 542
 543         add_to_list(cached->frag, new_frag);
 544
 545         dump_frags(cached, "get_cache_fragment");
 546
 547         return new_frag;
 548 }
 549
 550 static void
 551 delete_fragment(struct cache_entry *cached, struct fragment *f)
 552 {
 553         while ((void *) f != &cached->frag) {
 554                 struct fragment *tmp = f->next;
 555
 556                 enlarge_entry(cached, -f->length);
 557                 del_from_list(f);
 558                 frag_free(f);
 559                 f = tmp;
 560         }
 561 }
 562
 563 static void
 564 truncate_entry(struct cache_entry *cached, off_t offset, int final)
 565 {
 566         struct fragment *f;
 567
 568         if (cached->length > offset) {
 569                 cached->length = offset;
 570                 cached->incomplete = 1;
 571         }
 572
 573         foreach (f, cached->frag) {
 574                 off_t size = offset - f->offset;
 575
 576                 /* XXX: is zero length fragment really legal here ? --Zas */
 577                 assert(f->length >= 0);
 578
 579                 if (size >= f->length) continue;
 580
 581                 if (size > 0) {
 582                         enlarge_entry(cached, -(f->length - size));
 583                         f->length = size;
 584
 585                         if (final) {
 586                                 struct fragment *nf;
 587
 588                                 nf = frag_realloc(f, f->length);
 589                                 if (nf) {
 590                                         nf->next->prev = nf;
 591                                         nf->prev->next = nf;
 592                                         f = nf;
 593                                         f->real_length = f->length;
 594                                 }
 595                         }
 596
 597                         f = f->next;
 598                 }
 599
 600                 delete_fragment(cached, f);
 601
 602                 dump_frags(cached, "truncate_entry");
 603                 return;
 604         }
 605 }
 606
 607 void
 608 free_entry_to(struct cache_entry *cached, off_t offset)
 609 {
 610         struct fragment *f;
 611
 612         foreach (f, cached->frag) {
 613                 if (f->offset + f->length <= offset) {
 614                         struct fragment *tmp = f;
 615
 616                         enlarge_entry(cached, -f->length);
 617                         f = f->prev;
 618                         del_from_list(tmp);
 619                         frag_free(tmp);
 620                 } else if (f->offset < offset) {
 621                         off_t size = offset - f->offset;
 622
 623                         enlarge_entry(cached, -size);
 624                         f->length -= size;
 625                         memmove(f->data, f->data + size, f->length);
 626                         f->offset = offset;
 627                 } else break;
 628         }
 629 }
 630
 631 void
 632 delete_entry_content(struct cache_entry *cached)
 633 {
 634         enlarge_entry(cached, -cached->data_size);
 635
 636         while (cached->frag.next != (void *) &cached->frag) {
 637                 struct fragment *f = cached->frag.next;
 638
 639                 del_from_list(f);
 640                 frag_free(f);
 641         }
 642         cached->cache_id = id_counter++;
 643         cached->length = 0;
 644         cached->incomplete = 1;
 645
 646         mem_free_set(&cached->last_modified, NULL);
 647         mem_free_set(&cached->etag, NULL);
 648 }
 649
 650 static void
 651 done_cache_entry(struct cache_entry *cached)
 652 {
 653         assertm(!is_object_used(cached), "deleting locked cache entry");
 654         assertm(!is_entry_used(cached), "deleting loading cache entry");
 655
 656         delete_entry_content(cached);
 657
 658         if (cached->box_item) done_listbox_item(&cache_browser, cached->box_item);
 659
 660         if (cached->uri) done_uri(cached->uri);
 661         if (cached->proxy_uri) done_uri(cached->proxy_uri);
 662         if (cached->redirect) done_uri(cached->redirect);
 663
 664         mem_free_if(cached->head);
 665         mem_free_if(cached->content_type);
 666         mem_free_if(cached->last_modified);
 667         mem_free_if(cached->ssl_info);
 668         mem_free_if(cached->encoding_info);
 669         mem_free_if(cached->etag);
 670
 671         mem_free(cached);
 672 }
 673
 674 void
 675 delete_cache_entry(struct cache_entry *cached)
 676 {
 677         del_from_list(cached);
 678
 679         done_cache_entry(cached);
 680 }
 681
 682
 683 void
 684 normalize_cache_entry(struct cache_entry *cached, off_t truncate_length)
 685 {
 686         if (truncate_length < 0)
 687                 return;
 688
 689         truncate_entry(cached, truncate_length, 1);
 690         cached->incomplete = 0;
 691         cached->preformatted = 0;
 692         cached->seconds = time(NULL);
 693 }
 694
 695
 696 struct uri *
 697 redirect_cache(struct cache_entry *cached, unsigned char *location,
 698                int get, int incomplete)
 699 {
 700         unsigned char *uristring;
 701
 702         /* XXX: I am a little puzzled whether we should only use the cache
 703          * entry's URI if it is valid. Hopefully always using it won't hurt.
 704          * Currently we handle direction redirects where "/" should be appended
 705          * special dunno if join_urls() could be made to handle that.
 706          * --jonas */
 707         /* XXX: We are assuming here that incomplete will only be zero when
 708          * doing these fake redirects which only purpose is to add an ending
 709          * slash *cough* dirseparator to the end of the URI. */
 710         if (incomplete == 0 && dir_sep(location[0]) && location[1] == 0) {
 711                 /* To be sure use get_uri_string() to get rid of post data */
 712                 uristring = get_uri_string(cached->uri, URI_ORIGINAL);
 713                 if (uristring) add_to_strn(&uristring, location);
 714         } else {
 715                 uristring = join_urls(cached->uri, location);
 716         }
 717
 718         if (!uristring) return NULL;
 719
 720         /* Only add the post data if the redirect should not use GET method.
 721          * This is tied to the HTTP handling of the 303 and (if the
 722          * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
 723          * code handling. */
 724         if (cached->uri->post
 725             && !cached->redirect_get
 726             && !get) {
 727                 /* XXX: Add POST_CHAR and post data assuming URI components
 728                  * belong to one string. */
 729
 730                 /* To be certain we don't append post data twice in some
 731                  * conditions... --Zas */
 732                 assert(!strchr(uristring, POST_CHAR));
 733
 734                 add_to_strn(&uristring, cached->uri->post - 1);
 735         }
 736
 737         if (cached->redirect) done_uri(cached->redirect);
 738         cached->redirect = get_uri(uristring, 0);
 739         cached->redirect_get = get;
 740         if (incomplete >= 0) cached->incomplete = incomplete;
 741
 742         mem_free(uristring);
 743
 744         return cached->redirect;
 745 }
 746
 747
 748 void
 749 garbage_collection(int whole)
 750 {
 751         struct cache_entry *cached;
 752         /* We recompute cache_size when scanning cache entries, to ensure
 753          * consistency. */
 754         unsigned longlong old_cache_size = 0;
 755         /* The maximal cache size tolerated by user. Note that this is only
 756          * size of the "just stored" unused cache entries, used cache entries
 757          * are not counted to that. */
 758         unsigned longlong opt_cache_size = get_opt_long("document.cache.memory.size", NULL);
 759         /* The low-treshold cache size. Basically, when the cache size is
 760          * higher than opt_cache_size, we free the cache so that there is no
 761          * more than this value in the cache anymore. This is to make sure we
 762          * aren't cleaning cache too frequently when working with a lot of
 763          * small cache entries but rather free more and then let it grow a
 764          * little more as well. */
 765         unsigned longlong gc_cache_size = opt_cache_size * MEMORY_CACHE_GC_PERCENT / 100;
 766         /* The cache size we aim to reach. */
 767         unsigned longlong new_cache_size = cache_size;
 768 #ifdef DEBUG_CACHE
 769         /* Whether we've hit an used (unfreeable) entry when collecting
 770          * garbage. */
 771         int obstacle_entry = 0;
 772 #endif
 773
 774 #ifdef DEBUG_CACHE
 775         DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
 776               whole, opt_cache_size,gc_cache_size);
 777 #endif
 778
 779         if (!whole && cache_size <= opt_cache_size) return;
 780
 781
 782         /* Scanning cache, pass #1:
 783          * Weed out the used cache entries from @new_cache_size, so that we
 784          * will work only with the unused entries from then on. Also ensure
 785          * that @cache_size is in sync. */
 786
 787         foreach (cached, cache_entries) {
 788                 old_cache_size += cached->data_size;
 789
 790                 if (!is_object_used(cached) && !is_entry_used(cached))
 791                         continue;
 792
 793                 assertm(new_cache_size >= cached->data_size,
 794                         "cache_size (%ld) underflow: subtracting %ld from %ld",
 795                         cache_size, cached->data_size, new_cache_size);
 796
 797                 new_cache_size -= cached->data_size;
 798
 799                 if_assert_failed { new_cache_size = 0; }
 800         }
 801
 802         assertm(old_cache_size == cache_size,
 803                 "cache_size out of sync: %ld != (actual) %ld",
 804                 cache_size, old_cache_size);
 805         if_assert_failed { cache_size = old_cache_size; }
 806
 807         if (!whole && new_cache_size <= opt_cache_size) return;
 808
 809
 810         /* Scanning cache, pass #2:
 811          * Mark potential targets for destruction, from the oldest to the
 812          * newest. */
 813
 814         foreachback (cached, cache_entries) {
 815                 /* We would have shrinked enough already? */
 816                 if (!whole && new_cache_size <= gc_cache_size)
 817                         goto shrinked_enough;
 818
 819                 /* Skip used cache entries. */
 820                 if (is_object_used(cached) || is_entry_used(cached)) {
 821 #ifdef DEBUG_CACHE
 822                         obstacle_entry = 1;
 823 #endif
 824                         cached->gc_target = 0;
 825                         continue;
 826                 }
 827
 828                 /* FIXME: Optionally take cached->max_age into consideration,
 829                  * but that will probably complicate things too much. We'd have
 830                  * to sort entries so prioritize removing the oldest entries. */
 831
 832                 assertm(new_cache_size >= cached->data_size,
 833                         "cache_size (%ld) underflow: subtracting %ld from %ld",
 834                         cache_size, cached->data_size, new_cache_size);
 835
 836                 /* Mark me for destruction, sir. */
 837                 cached->gc_target = 1;
 838                 new_cache_size -= cached->data_size;
 839
 840                 if_assert_failed { new_cache_size = 0; }
 841         }
 842
 843         /* If we'd free the whole cache... */
 844         assertm(new_cache_size == 0,
 845                 "cache_size (%ld) overflow: %ld",
 846                 cache_size, new_cache_size);
 847         if_assert_failed { new_cache_size = 0; }
 848
 849 shrinked_enough:
 850
 851
 852         /* Now turn around and start walking in the opposite direction. */
 853         cached = cached->next;
 854
 855         /* Something is strange when we decided all is ok before dropping any
 856          * cache entry. */
 857         if ((void *) cached == &cache_entries) return;
 858
 859
 860         if (!whole) {
 861                 struct cache_entry *entry;
 862
 863                 /* Scanning cache, pass #3:
 864                  * Walk back in the cache and unmark the cache entries which
 865                  * could still fit into the cache. */
 866
 867                 /* This makes sense when the newest entry is HUGE and after it,
 868                  * there's just plenty of tiny entries. By this point, all the
 869                  * tiny entries would be marked for deletion even though it'd
 870                  * be enough to free the huge entry. This actually fixes that
 871                  * situation. */
 872
 873                 for (entry = cached; (void *) entry != &cache_entries; entry = entry->next) {
 874                         unsigned longlong newer_cache_size = new_cache_size + entry->data_size;
 875
 876                         if (newer_cache_size > gc_cache_size)
 877                                 continue;
 878
 879                         new_cache_size = newer_cache_size;
 880                         entry->gc_target = 0;
 881                 }
 882         }
 883
 884
 885         /* Scanning cache, pass #4:
 886          * Destroy the marked entries. So sad, but that's life, bro'. */
 887
 888         for (; (void *) cached != &cache_entries; ) {
 889                 cached = cached->next;
 890                 if (cached->prev->gc_target)
 891                         delete_cache_entry(cached->prev);
 892         }
 893
 894
 895 #ifdef DEBUG_CACHE
 896         if ((whole || !obstacle_entry) && cache_size > gc_cache_size) {
 897                 DBG("garbage collection doesn't work, cache size %ld > %ld, "
 898                       "document.cache.memory.size set to: %ld bytes",
 899                       cache_size, gc_cache_size,
 900                       get_opt_long("document.cache.memory.size", NULL));
 901         }
 902 #endif
 903 }