11 #include "bfu/dialog.h"
12 #include "cache/cache.h"
13 #include "cache/dialogs.h"
14 #include "config/options.h"
15 #include "main/main.h"
16 #include "main/object.h"
17 #include "network/connection.h"
18 #include "protocol/protocol.h"
19 #include "protocol/proxy.h"
20 #include "protocol/uri.h"
21 #include "util/error.h"
22 #include "util/memory.h"
23 #include "util/string.h"
24 #include "util/time.h"
26 /* The list of cache entries */
27 static INIT_LIST_HEAD(cache_entries
);
29 static unsigned longlong cache_size
;
30 static int id_counter
= 1;
32 static void truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
);
34 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
41 #define dump_frag(frag, count) \
43 DBG(" [%d] f=%p offset=%" OFF_T_FORMAT " length=%" OFF_T_FORMAT \
44 " real_length=%" OFF_T_FORMAT, \
45 count, frag, frag->offset, frag->length, frag->real_length); \
48 #define dump_frags(entry, comment) \
50 struct fragment *frag; \
53 DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
54 foreach (frag, entry->frag) \
55 dump_frag(frag, ++count); \
59 #define dump_frags(entry, comment)
60 #endif /* DEBUG_CACHE */
69 get_cache_entry_count(void)
71 return list_size(&cache_entries
);
75 get_cache_entry_used_count(void)
77 struct cache_entry
*cached
;
80 foreach (cached
, cache_entries
)
81 i
+= is_object_used(cached
);
87 get_cache_entry_loading_count(void)
89 struct cache_entry
*cached
;
92 foreach (cached
, cache_entries
)
93 i
+= is_entry_used(cached
);
99 find_in_cache(struct uri
*uri
)
101 struct cache_entry
*cached
;
102 int proxy
= (uri
->protocol
== PROTOCOL_PROXY
);
104 foreach (cached
, cache_entries
) {
107 if (!cached
->valid
) continue;
109 c_uri
= proxy
? cached
->proxy_uri
: cached
->uri
;
110 if (!compare_uri(c_uri
, uri
, URI_BASE
))
113 move_to_top_of_list(cache_entries
, cached
);
122 get_cache_entry(struct uri
*uri
)
124 struct cache_entry
*cached
= find_in_cache(uri
);
126 assertm(!uri
->fragment
, "Fragment in URI (%s)", struri(uri
));
128 if (cached
) return cached
;
132 cached
= mem_calloc(1, sizeof(*cached
));
133 if (!cached
) return NULL
;
135 cached
->uri
= get_proxied_uri(uri
);
141 cached
->proxy_uri
= get_proxy_uri(uri
, NULL
);
142 if (!cached
->proxy_uri
) {
143 done_uri(cached
->uri
);
147 cached
->incomplete
= 1;
150 init_list(cached
->frag
);
151 cached
->id
= id_counter
++;
152 object_nolock(cached
, "cache_entry"); /* Debugging purpose. */
154 cached
->box_item
= add_listbox_leaf(&cache_browser
, NULL
, cached
);
156 add_to_list(cache_entries
, cached
);
162 cache_entry_has_expired(struct cache_entry
*cached
)
168 return timeval_cmp(&cached
->max_age
, &now
) <= 0;
172 get_validated_cache_entry(struct uri
*uri
, enum cache_mode cache_mode
)
174 struct cache_entry
*cached
;
176 /* We have to check if something should be reloaded */
177 if (cache_mode
> CACHE_MODE_NORMAL
)
180 /* We only consider complete entries */
181 cached
= find_in_cache(uri
);
182 if (!cached
|| cached
->incomplete
)
186 /* A bit of a gray zone. Delete the entry if the it has the strictest
187 * cache mode and we don't want the most aggressive mode or we have to
188 * remove the redirect or the entry expired. Please enlighten me.
190 if ((cached
->cache_mode
== CACHE_MODE_NEVER
&& cache_mode
!= CACHE_MODE_ALWAYS
)
191 || (cached
->redirect
&& !get_opt_bool("document.cache.cache_redirects"))
192 || (cached
->expire
&& cache_entry_has_expired(cached
))) {
193 if (!is_object_used(cached
)) delete_cache_entry(cached
);
197 if (cached
->cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
198 && cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
199 && (cached
->last_modified
|| cached
->etag
)
200 && get_opt_int("document.cache.revalidation_interval") >= 0) {
201 if (cached
->seconds
+ get_opt_int("document.cache.revalidation_interval") < time(NULL
))
209 cache_entry_is_valid(struct cache_entry
*cached
)
211 struct cache_entry
*valid_cached
;
213 foreach (valid_cached
, cache_entries
) {
214 if (valid_cached
== cached
)
223 follow_cached_redirects(struct cache_entry
*cached
)
228 if (!cached
->redirect
) {
229 /* XXX: This is not quite true, but does that difference
234 if (++redirects
> MAX_REDIRECTS
) break;
236 cached
= find_in_cache(cached
->redirect
);
243 get_redirected_cache_entry(struct uri
*uri
)
245 struct cache_entry
*cached
= find_in_cache(uri
);
247 return cached
? follow_cached_redirects(cached
) : NULL
;
252 enlarge_entry(struct cache_entry
*cached
, off_t size
)
254 cached
->data_size
+= size
;
255 assertm(cached
->data_size
>= 0,
256 "cache entry data_size underflow: %ld", cached
->data_size
);
257 if_assert_failed
{ cached
->data_size
= 0; }
260 assertm(cache_size
>= 0, "cache_size underflow: %ld", cache_size
);
261 if_assert_failed
{ cache_size
= 0; }
265 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
267 /* One byte is reserved for data in struct fragment. */
268 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
270 /* We store the fragments themselves in a private vault, safely separated from
271 * the rest of memory structures. If we lived in the main libc memory pool, we
272 * would trigger annoying pathological behaviour like artificially enlarging
273 * the memory pool to 50M, then securing it with some stupid cookie record at
274 * the top and then no matter how you flush the cache the data segment is still
277 * Cool, but we don't want that, so fragments (where the big data is stored)
278 * live in their little mmap()ed worlds. There is some overhead, but if we
279 * assume single fragment per cache entry and page size (mmap() allocation
280 * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
281 * That's not *that* horrible when you realize that the freshmeat front page
282 * takes 300kb in memory and we usually do not deal with documents so small
283 * that max. 4kb overhead would be visible there.
285 * The alternative would be of course to manage an entire custom memory pool,
286 * but that is feasible only when we are able to resize it efficiently. We
287 * aren't, except on Linux.
289 * Of course for all this to really completely prevent the pathological cases,
290 * we need to stuff the rendered documents in too, because they seem to amount
291 * the major memory bursts. */
293 static struct fragment
*
294 frag_alloc(size_t size
)
296 struct fragment
*f
= mem_mmap_alloc(FRAGSIZE(size
));
299 memset(f
, 0, FRAGSIZE(size
));
303 static struct fragment
*
304 frag_realloc(struct fragment
*f
, size_t size
)
306 return mem_mmap_realloc(f
, FRAGSIZE(f
->real_length
), FRAGSIZE(size
));
310 frag_free(struct fragment
*f
)
312 mem_mmap_free(f
, FRAGSIZE(f
->real_length
));
316 /* Concatenate overlapping fragments. */
318 remove_overlaps(struct cache_entry
*cached
, struct fragment
*f
, int *trunc
)
320 off_t f_end_offset
= f
->offset
+ f
->length
;
322 /* Iterate thru all fragments we still overlap to. */
323 while (list_has_next(cached
->frag
, f
)
324 && f_end_offset
> f
->next
->offset
) {
326 off_t end_offset
= f
->next
->offset
+ f
->next
->length
;
328 if (f_end_offset
< end_offset
) {
329 /* We end before end of the following fragment, though.
330 * So try to append overlapping part of that fragment
332 nf
= frag_realloc(f
, end_offset
- f
->offset
);
338 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
340 f
->offset
+ f
->length
- f
->next
->offset
))
343 memcpy(f
->data
+ f
->length
,
344 f
->next
->data
+ f_end_offset
- f
->next
->offset
,
345 end_offset
- f_end_offset
);
347 enlarge_entry(cached
, end_offset
- f_end_offset
);
348 f
->length
= f
->real_length
= end_offset
- f
->offset
;
352 /* We will just discard this, it's complete subset of
353 * our new fragment. */
354 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
360 /* Remove the fragment, it influences our new one! */
362 enlarge_entry(cached
, -nf
->length
);
368 /* Note that this function is maybe overcommented, but I'm certainly not
369 * unhappy from that. */
371 add_fragment(struct cache_entry
*cached
, off_t offset
,
372 const unsigned char *data
, ssize_t length
)
374 struct fragment
*f
, *nf
;
378 if (!length
) return 0;
380 end_offset
= offset
+ length
;
381 if (cached
->length
< end_offset
)
382 cached
->length
= end_offset
;
384 /* id marks each entry, and change each time it's modified,
385 * used in HTML renderer. */
386 cached
->id
= id_counter
++;
388 /* Possibly insert the new data in the middle of existing fragment. */
389 foreach (f
, cached
->frag
) {
391 off_t f_end_offset
= f
->offset
+ f
->length
;
393 /* No intersection? */
394 if (f
->offset
> offset
) break;
395 if (f_end_offset
< offset
) continue;
397 if (end_offset
> f_end_offset
) {
398 /* Overlap - we end further than original fragment. */
400 if (end_offset
- f
->offset
<= f
->real_length
) {
401 /* We fit here, so let's enlarge it by delta of
402 * old and new end.. */
403 enlarge_entry(cached
, end_offset
- f_end_offset
);
404 /* ..and length is now total length. */
405 f
->length
= end_offset
- f
->offset
;
407 ret
= 1; /* It was enlarged. */
409 /* We will reduce fragment length only to the
410 * starting non-interjecting size and add new
411 * fragment directly after this one. */
412 f
->length
= offset
- f
->offset
;
417 } /* else We are subset of original fragment. */
419 /* Copy the stuff over there. */
420 memcpy(f
->data
+ offset
- f
->offset
, data
, length
);
422 remove_overlaps(cached
, f
, &trunc
);
424 /* We truncate the entry even if the data contents is the
425 * same as what we have in the fragment, because that does
426 * not mean that what is going to follow won't differ, This
427 * is a serious problem when rendering HTML frame with onload
428 * snippets - we "guess" the rest of the document here,
429 * interpret the snippet, then it turns out in the real
430 * document the snippet is different and we are in trouble.
432 * Debugging this took me about 1.5 day (really), the diff with
433 * all the debugging print commands amounted about 20kb (gdb
434 * wasn't much useful since it stalled the download, de facto
435 * eliminating the bad behaviour). */
436 truncate_entry(cached
, end_offset
, 0);
438 dump_frags(cached
, "add_fragment");
443 /* Make up new fragment. */
444 nf
= frag_alloc(CACHE_PAD(length
));
449 nf
->real_length
= CACHE_PAD(length
);
450 memcpy(nf
->data
, data
, length
);
451 add_at_pos(f
->prev
, nf
);
453 enlarge_entry(cached
, length
);
455 remove_overlaps(cached
, nf
, &trunc
);
456 if (trunc
) truncate_entry(cached
, end_offset
, 0);
458 dump_frags(cached
, "add_fragment");
463 /* Try to defragment the cache entry. Defragmentation will not be possible
464 * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
465 * and bytes 201-300 in the second, we must leave those two fragments separate
466 * so that the fragment for bytes 101-200 can later be inserted. However,
467 * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
468 * catenate them into one new fragment and replace the original fragments
469 * with that new fragment.
471 * If are no fragments, return NULL. If there is no fragment with byte 1,
472 * return NULL. Otherwise, return the first fragment, whether or not it was
473 * possible to fully defragment the entry. */
475 get_cache_fragment(struct cache_entry
*cached
)
477 struct fragment
*first_frag
, *adj_frag
, *frag
, *new_frag
;
480 if (list_empty(cached
->frag
))
483 first_frag
= cached
->frag
.next
;
484 if (first_frag
->offset
)
487 /* Only one fragment so no defragmentation is needed */
488 if (list_is_singleton(cached
->frag
))
491 /* Find the first pair of fragments with a gap in between. Only
492 * fragments up to the first gap can be defragmented. */
493 for (adj_frag
= first_frag
->next
; adj_frag
!= (void *) &cached
->frag
;
494 adj_frag
= adj_frag
->next
) {
495 long gap
= adj_frag
->offset
496 - (adj_frag
->prev
->offset
+ adj_frag
->prev
->length
);
499 if (gap
== 0) continue;
501 INTERNAL("fragments overlap");
505 /* There is a gap between the first two fragments, so we can't
506 * defragment anything. */
507 if (adj_frag
== first_frag
->next
)
510 /* Calculate the length of the defragmented fragment. */
511 for (new_frag_len
= 0, frag
= first_frag
;
514 new_frag_len
+= frag
->length
;
516 /* XXX: If the defragmentation fails because of allocation failure,
517 * fall back to return the first fragment and pretend all is well. */
518 /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
519 * the code this extended version of the old defrag_entry() is supposed
520 * to replace. --jonas */
521 new_frag
= frag_alloc(new_frag_len
);
523 return first_frag
->length
? first_frag
: NULL
;
525 new_frag
->length
= new_frag_len
;
526 new_frag
->real_length
= new_frag_len
;
528 for (new_frag_len
= 0, frag
= first_frag
;
531 struct fragment
*tmp
= frag
;
533 memcpy(new_frag
->data
+ new_frag_len
, frag
->data
, frag
->length
);
534 new_frag_len
+= frag
->length
;
541 add_to_list(cached
->frag
, new_frag
);
543 dump_frags(cached
, "get_cache_fragment");
549 delete_fragment(struct cache_entry
*cached
, struct fragment
*f
)
551 while ((void *) f
!= &cached
->frag
) {
552 struct fragment
*tmp
= f
->next
;
554 enlarge_entry(cached
, -f
->length
);
562 truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
)
566 if (cached
->length
> offset
) {
567 cached
->length
= offset
;
568 cached
->incomplete
= 1;
571 foreach (f
, cached
->frag
) {
572 off_t size
= offset
- f
->offset
;
574 /* XXX: is zero length fragment really legal here ? --Zas */
575 assert(f
->length
>= 0);
577 if (size
>= f
->length
) continue;
580 enlarge_entry(cached
, -(f
->length
- size
));
586 nf
= frag_realloc(f
, f
->length
);
591 f
->real_length
= f
->length
;
598 delete_fragment(cached
, f
);
600 dump_frags(cached
, "truncate_entry");
606 free_entry_to(struct cache_entry
*cached
, off_t offset
)
610 foreach (f
, cached
->frag
) {
611 if (f
->offset
+ f
->length
<= offset
) {
612 struct fragment
*tmp
= f
;
614 enlarge_entry(cached
, -f
->length
);
618 } else if (f
->offset
< offset
) {
619 off_t size
= offset
- f
->offset
;
621 enlarge_entry(cached
, -size
);
623 memmove(f
->data
, f
->data
+ size
, f
->length
);
630 delete_entry_content(struct cache_entry
*cached
)
632 enlarge_entry(cached
, -cached
->data_size
);
634 while (cached
->frag
.next
!= (void *) &cached
->frag
) {
635 struct fragment
*f
= cached
->frag
.next
;
640 cached
->id
= id_counter
++;
642 cached
->incomplete
= 1;
644 mem_free_set(&cached
->last_modified
, NULL
);
645 mem_free_set(&cached
->etag
, NULL
);
649 done_cache_entry(struct cache_entry
*cached
)
651 assertm(!is_object_used(cached
), "deleting locked cache entry");
652 assertm(!is_entry_used(cached
), "deleting loading cache entry");
654 delete_entry_content(cached
);
656 if (cached
->box_item
) done_listbox_item(&cache_browser
, cached
->box_item
);
658 if (cached
->uri
) done_uri(cached
->uri
);
659 if (cached
->proxy_uri
) done_uri(cached
->proxy_uri
);
660 if (cached
->redirect
) done_uri(cached
->redirect
);
662 mem_free_if(cached
->head
);
663 mem_free_if(cached
->content_type
);
664 mem_free_if(cached
->last_modified
);
665 mem_free_if(cached
->ssl_info
);
666 mem_free_if(cached
->encoding_info
);
667 mem_free_if(cached
->etag
);
673 delete_cache_entry(struct cache_entry
*cached
)
675 del_from_list(cached
);
677 done_cache_entry(cached
);
682 normalize_cache_entry(struct cache_entry
*cached
, off_t truncate_length
)
684 if (truncate_length
< 0)
687 truncate_entry(cached
, truncate_length
, 1);
688 cached
->incomplete
= 0;
689 cached
->preformatted
= 0;
690 cached
->seconds
= time(NULL
);
695 redirect_cache(struct cache_entry
*cached
, unsigned char *location
,
696 int get
, int incomplete
)
698 unsigned char *uristring
;
700 /* XXX: I am a little puzzled whether we should only use the cache
701 * entry's URI if it is valid. Hopefully always using it won't hurt.
702 * Currently we handle direction redirects where "/" should be appended
703 * special dunno if join_urls() could be made to handle that.
705 /* XXX: We are assuming here that incomplete will only be zero when
706 * doing these fake redirects which only purpose is to add an ending
707 * slash *cough* dirseparator to the end of the URI. */
708 if (incomplete
== 0 && dir_sep(location
[0]) && location
[1] == 0) {
709 /* To be sure use get_uri_string() to get rid of post data */
710 uristring
= get_uri_string(cached
->uri
, URI_ORIGINAL
);
711 if (uristring
) add_to_strn(&uristring
, location
);
713 uristring
= join_urls(cached
->uri
, location
);
716 if (!uristring
) return NULL
;
718 /* Only add the post data if the redirect should not use GET method.
719 * This is tied to the HTTP handling of the 303 and (if the
720 * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
722 if (cached
->uri
->post
723 && !cached
->redirect_get
725 /* XXX: Add POST_CHAR and post data assuming URI components
726 * belong to one string. */
728 /* To be certain we don't append post data twice in some
729 * conditions... --Zas */
730 assert(!strchr(uristring
, POST_CHAR
));
732 add_to_strn(&uristring
, cached
->uri
->post
- 1);
735 if (cached
->redirect
) done_uri(cached
->redirect
);
736 cached
->redirect
= get_uri(uristring
, 0);
737 cached
->redirect_get
= get
;
738 if (incomplete
>= 0) cached
->incomplete
= incomplete
;
742 return cached
->redirect
;
747 garbage_collection(int whole
)
749 struct cache_entry
*cached
;
750 /* We recompute cache_size when scanning cache entries, to ensure
752 unsigned longlong old_cache_size
= 0;
753 /* The maximal cache size tolerated by user. Note that this is only
754 * size of the "just stored" unused cache entries, used cache entries
755 * are not counted to that. */
756 unsigned longlong opt_cache_size
= get_opt_long("document.cache.memory.size");
757 /* The low-treshold cache size. Basically, when the cache size is
758 * higher than opt_cache_size, we free the cache so that there is no
759 * more than this value in the cache anymore. This is to make sure we
760 * aren't cleaning cache too frequently when working with a lot of
761 * small cache entries but rather free more and then let it grow a
762 * little more as well. */
763 unsigned longlong gc_cache_size
= opt_cache_size
* MEMORY_CACHE_GC_PERCENT
/ 100;
764 /* The cache size we aim to reach. */
765 unsigned longlong new_cache_size
= cache_size
;
767 /* Whether we've hit an used (unfreeable) entry when collecting
769 int obstacle_entry
= 0;
773 DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
774 whole
, opt_cache_size
,gc_cache_size
);
777 if (!whole
&& cache_size
<= opt_cache_size
) return;
780 /* Scanning cache, pass #1:
781 * Weed out the used cache entries from @new_cache_size, so that we
782 * will work only with the unused entries from then on. Also ensure
783 * that @cache_size is in sync. */
785 foreach (cached
, cache_entries
) {
786 old_cache_size
+= cached
->data_size
;
788 if (!is_object_used(cached
) && !is_entry_used(cached
))
791 assertm(new_cache_size
>= cached
->data_size
,
792 "cache_size (%ld) underflow: subtracting %ld from %ld",
793 cache_size
, cached
->data_size
, new_cache_size
);
795 new_cache_size
-= cached
->data_size
;
797 if_assert_failed
{ new_cache_size
= 0; }
800 assertm(old_cache_size
== cache_size
,
801 "cache_size out of sync: %ld != (actual) %ld",
802 cache_size
, old_cache_size
);
803 if_assert_failed
{ cache_size
= old_cache_size
; }
805 if (!whole
&& new_cache_size
<= opt_cache_size
) return;
808 /* Scanning cache, pass #2:
809 * Mark potential targets for destruction, from the oldest to the
812 foreachback (cached
, cache_entries
) {
813 /* We would have shrinked enough already? */
814 if (!whole
&& new_cache_size
<= gc_cache_size
)
815 goto shrinked_enough
;
817 /* Skip used cache entries. */
818 if (is_object_used(cached
) || is_entry_used(cached
)) {
822 cached
->gc_target
= 0;
826 /* FIXME: Optionally take cached->max_age into consideration,
827 * but that will probably complicate things too much. We'd have
828 * to sort entries so prioritize removing the oldest entries. */
830 assertm(new_cache_size
>= cached
->data_size
,
831 "cache_size (%ld) underflow: subtracting %ld from %ld",
832 cache_size
, cached
->data_size
, new_cache_size
);
834 /* Mark me for destruction, sir. */
835 cached
->gc_target
= 1;
836 new_cache_size
-= cached
->data_size
;
838 if_assert_failed
{ new_cache_size
= 0; }
841 /* If we'd free the whole cache... */
842 assertm(new_cache_size
== 0,
843 "cache_size (%ld) overflow: %ld",
844 cache_size
, new_cache_size
);
845 if_assert_failed
{ new_cache_size
= 0; }
850 /* Now turn around and start walking in the opposite direction. */
851 cached
= cached
->next
;
853 /* Something is strange when we decided all is ok before dropping any
855 if ((void *) cached
== &cache_entries
) return;
859 struct cache_entry
*entry
;
861 /* Scanning cache, pass #3:
862 * Walk back in the cache and unmark the cache entries which
863 * could still fit into the cache. */
865 /* This makes sense when the newest entry is HUGE and after it,
866 * there's just plenty of tiny entries. By this point, all the
867 * tiny entries would be marked for deletion even though it'd
868 * be enough to free the huge entry. This actually fixes that
871 for (entry
= cached
; (void *) entry
!= &cache_entries
; entry
= entry
->next
) {
872 unsigned longlong newer_cache_size
= new_cache_size
+ entry
->data_size
;
874 if (newer_cache_size
> gc_cache_size
)
877 new_cache_size
= newer_cache_size
;
878 entry
->gc_target
= 0;
883 /* Scanning cache, pass #4:
884 * Destroy the marked entries. So sad, but that's life, bro'. */
886 for (; (void *) cached
!= &cache_entries
; ) {
887 cached
= cached
->next
;
888 if (cached
->prev
->gc_target
)
889 delete_cache_entry(cached
->prev
);
894 if ((whole
|| !obstacle_entry
) && cache_size
> gc_cache_size
) {
895 DBG("garbage collection doesn't work, cache size %ld > %ld, "
896 "document.cache.memory.size set to: %ld bytes",
897 cache_size
, gc_cache_size
,
898 get_opt_long("document.cache.memory.size"));