11 #include "bfu/dialog.h"
12 #include "cache/cache.h"
13 #include "cache/dialogs.h"
14 #include "config/options.h"
15 #include "main/main.h"
16 #include "main/object.h"
17 #include "network/connection.h"
18 #include "protocol/protocol.h"
19 #include "protocol/proxy.h"
20 #include "protocol/uri.h"
21 #include "util/error.h"
22 #include "util/memory.h"
23 #include "util/string.h"
25 /* The list of cache entries */
26 static INIT_LIST_HEAD(cache_entries
);
28 static unsigned longlong cache_size
;
29 static int id_counter
= 1;
31 static void truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
);
33 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
40 #define dump_frag(frag, count) \
42 DBG(" [%d] f=%p offset=%" OFF_T_FORMAT " length=%" OFF_T_FORMAT \
43 " real_length=%" OFF_T_FORMAT, \
44 count, frag, frag->offset, frag->length, frag->real_length); \
47 #define dump_frags(entry, comment) \
49 struct fragment *frag; \
52 DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
53 foreach (frag, entry->frag) \
54 dump_frag(frag, ++count); \
58 #define dump_frags(entry, comment)
59 #endif /* DEBUG_CACHE */
68 get_cache_entry_count(void)
70 return list_size(&cache_entries
);
74 get_cache_entry_used_count(void)
76 struct cache_entry
*cached
;
79 foreach (cached
, cache_entries
)
80 i
+= is_object_used(cached
);
86 get_cache_entry_loading_count(void)
88 struct cache_entry
*cached
;
91 foreach (cached
, cache_entries
)
92 i
+= is_entry_used(cached
);
98 find_in_cache(struct uri
*uri
)
100 struct cache_entry
*cached
;
101 int proxy
= (uri
->protocol
== PROTOCOL_PROXY
);
103 foreach (cached
, cache_entries
) {
106 if (!cached
->valid
) continue;
108 c_uri
= proxy
? cached
->proxy_uri
: cached
->uri
;
109 if (!compare_uri(c_uri
, uri
, URI_BASE
))
112 move_to_top_of_list(cache_entries
, cached
);
121 get_cache_entry(struct uri
*uri
)
123 struct cache_entry
*cached
= find_in_cache(uri
);
125 assertm(!uri
->fragment
, "Fragment in URI (%s)", struri(uri
));
127 if (cached
) return cached
;
131 cached
= mem_calloc(1, sizeof(*cached
));
132 if (!cached
) return NULL
;
134 cached
->uri
= get_proxied_uri(uri
);
140 cached
->proxy_uri
= get_proxy_uri(uri
, NULL
);
141 if (!cached
->proxy_uri
) {
142 done_uri(cached
->uri
);
146 cached
->incomplete
= 1;
149 init_list(cached
->frag
);
150 cached
->id
= id_counter
++;
151 object_nolock(cached
, "cache_entry"); /* Debugging purpose. */
153 cached
->box_item
= add_listbox_leaf(&cache_browser
, NULL
, cached
);
155 add_to_list(cache_entries
, cached
);
161 cache_entry_has_expired(struct cache_entry
*cached
)
167 return timeval_cmp(&cached
->max_age
, &now
) <= 0;
171 get_validated_cache_entry(struct uri
*uri
, enum cache_mode cache_mode
)
173 struct cache_entry
*cached
;
175 /* We have to check if something should be reloaded */
176 if (cache_mode
> CACHE_MODE_NORMAL
)
179 /* We only consider complete entries */
180 cached
= find_in_cache(uri
);
181 if (!cached
|| cached
->incomplete
)
184 /* Check if the entry can be deleted */
185 /* FIXME: This does not make sense to me. Why should the usage pattern
186 * of the cache entry matter? Only reason I can think of is to avoid
187 * reloading when spawning a new tab which could potentially be a big
188 * penalty but shouldn't that be taken care of on a higher level?
190 if (is_object_used(cached
)) {
192 /* Never use expired entries. */
193 /* Disabled because it hurts usability too much. */
194 if (cached
->expire
&& cache_entry_has_expired(cached
))
200 /* A bit of a gray zone. Delete the entry if the it has the stricktest
201 * cache mode and we don't want the most aggressive mode or we have to
202 * remove the redirect or the entry expired. Please enlighten me.
204 if ((cached
->cache_mode
== CACHE_MODE_NEVER
&& cache_mode
!= CACHE_MODE_ALWAYS
)
205 || (cached
->redirect
&& !get_opt_bool("document.cache.cache_redirects"))
206 || (cached
->expire
&& cache_entry_has_expired(cached
))) {
207 delete_cache_entry(cached
);
215 cache_entry_is_valid(struct cache_entry
*cached
)
217 struct cache_entry
*valid_cached
;
219 foreach (valid_cached
, cache_entries
) {
220 if (valid_cached
== cached
)
229 follow_cached_redirects(struct cache_entry
*cached
)
234 if (!cached
->redirect
) {
235 /* XXX: This is not quite true, but does that difference
240 if (++redirects
> MAX_REDIRECTS
) break;
242 cached
= find_in_cache(cached
->redirect
);
249 get_redirected_cache_entry(struct uri
*uri
)
251 struct cache_entry
*cached
= find_in_cache(uri
);
253 return cached
? follow_cached_redirects(cached
) : NULL
;
258 enlarge_entry(struct cache_entry
*cached
, off_t size
)
260 cached
->data_size
+= size
;
261 assertm(cached
->data_size
>= 0,
262 "cache entry data_size underflow: %ld", cached
->data_size
);
263 if_assert_failed
{ cached
->data_size
= 0; }
266 assertm(cache_size
>= 0, "cache_size underflow: %ld", cache_size
);
267 if_assert_failed
{ cache_size
= 0; }
271 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
273 /* One byte is reserved for data in struct fragment. */
274 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
276 /* We store the fragments themselves in a private vault, safely separated from
277 * the rest of memory structures. If we lived in the main libc memory pool, we
278 * would trigger annoying pathological behaviour like artificially enlarging
279 * the memory pool to 50M, then securing it with some stupid cookie record at
280 * the top and then no matter how you flush the cache the data segment is still
283 * Cool, but we don't want that, so fragments (where the big data is stored)
284 * live in their little mmap()ed worlds. There is some overhead, but if we
285 * assume single fragment per cache entry and page size (mmap() allocation
286 * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
287 * That's not *that* horrible when you realize that the freshmeat front page
288 * takes 300kb in memory and we usually do not deal with documents so small
289 * that max. 4kb overhead would be visible there.
291 * The alternative would be of course to manage an entire custom memory pool,
292 * but that is feasible only when we are able to resize it efficiently. We
293 * aren't, except on Linux.
295 * Of course for all this to really completely prevent the pathological cases,
296 * we need to stuff the rendered documents in too, because they seem to amount
297 * the major memory bursts. */
299 static struct fragment
*
300 frag_alloc(size_t size
)
302 struct fragment
*f
= mem_mmap_alloc(FRAGSIZE(size
));
305 memset(f
, 0, FRAGSIZE(size
));
309 static struct fragment
*
310 frag_realloc(struct fragment
*f
, size_t size
)
312 return mem_mmap_realloc(f
, FRAGSIZE(f
->real_length
), FRAGSIZE(size
));
316 frag_free(struct fragment
*f
)
318 mem_mmap_free(f
, FRAGSIZE(f
->real_length
));
322 /* Concatenate overlapping fragments. */
324 remove_overlaps(struct cache_entry
*cached
, struct fragment
*f
, int *trunc
)
326 off_t f_end_offset
= f
->offset
+ f
->length
;
328 /* Iterate thru all fragments we still overlap to. */
329 while (list_has_next(cached
->frag
, f
)
330 && f_end_offset
> f
->next
->offset
) {
332 off_t end_offset
= f
->next
->offset
+ f
->next
->length
;
334 if (f_end_offset
< end_offset
) {
335 /* We end before end of the following fragment, though.
336 * So try to append overlapping part of that fragment
338 nf
= frag_realloc(f
, end_offset
- f
->offset
);
344 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
346 f
->offset
+ f
->length
- f
->next
->offset
))
349 memcpy(f
->data
+ f
->length
,
350 f
->next
->data
+ f_end_offset
- f
->next
->offset
,
351 end_offset
- f_end_offset
);
353 enlarge_entry(cached
, end_offset
- f_end_offset
);
354 f
->length
= f
->real_length
= end_offset
- f
->offset
;
358 /* We will just discard this, it's complete subset of
359 * our new fragment. */
360 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
366 /* Remove the fragment, it influences our new one! */
368 enlarge_entry(cached
, -nf
->length
);
374 /* Note that this function is maybe overcommented, but I'm certainly not
375 * unhappy from that. */
377 add_fragment(struct cache_entry
*cached
, off_t offset
,
378 const unsigned char *data
, ssize_t length
)
380 struct fragment
*f
, *nf
;
384 if (!length
) return 0;
386 end_offset
= offset
+ length
;
387 if (cached
->length
< end_offset
)
388 cached
->length
= end_offset
;
390 /* id marks each entry, and change each time it's modified,
391 * used in HTML renderer. */
392 cached
->id
= id_counter
++;
394 /* Possibly insert the new data in the middle of existing fragment. */
395 foreach (f
, cached
->frag
) {
397 off_t f_end_offset
= f
->offset
+ f
->length
;
399 /* No intersection? */
400 if (f
->offset
> offset
) break;
401 if (f_end_offset
< offset
) continue;
403 if (end_offset
> f_end_offset
) {
404 /* Overlap - we end further than original fragment. */
406 if (end_offset
- f
->offset
<= f
->real_length
) {
407 /* We fit here, so let's enlarge it by delta of
408 * old and new end.. */
409 enlarge_entry(cached
, end_offset
- f_end_offset
);
410 /* ..and length is now total length. */
411 f
->length
= end_offset
- f
->offset
;
413 ret
= 1; /* It was enlarged. */
415 /* We will reduce fragment length only to the
416 * starting non-interjecting size and add new
417 * fragment directly after this one. */
418 f
->length
= offset
- f
->offset
;
423 } /* else We are subset of original fragment. */
425 /* Copy the stuff over there. */
426 memcpy(f
->data
+ offset
- f
->offset
, data
, length
);
428 remove_overlaps(cached
, f
, &trunc
);
430 /* We truncate the entry even if the data contents is the
431 * same as what we have in the fragment, because that does
432 * not mean that what is going to follow won't differ, This
433 * is a serious problem when rendering HTML frame with onload
434 * snippets - we "guess" the rest of the document here,
435 * interpret the snippet, then it turns out in the real
436 * document the snippet is different and we are in trouble.
438 * Debugging this took me about 1.5 day (really), the diff with
439 * all the debugging print commands amounted about 20kb (gdb
440 * wasn't much useful since it stalled the download, de facto
441 * eliminating the bad behaviour). */
442 truncate_entry(cached
, end_offset
, 0);
444 dump_frags(cached
, "add_fragment");
449 /* Make up new fragment. */
450 nf
= frag_alloc(CACHE_PAD(length
));
455 nf
->real_length
= CACHE_PAD(length
);
456 memcpy(nf
->data
, data
, length
);
457 add_at_pos(f
->prev
, nf
);
459 enlarge_entry(cached
, length
);
461 remove_overlaps(cached
, nf
, &trunc
);
462 if (trunc
) truncate_entry(cached
, end_offset
, 0);
464 dump_frags(cached
, "add_fragment");
469 /* Try to defragment the cache entry. Defragmentation will not be possible
470 * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
471 * and bytes 201-300 in the second, we must leave those two fragments separate
472 * so that the fragment for bytes 101-200 can later be inserted. However,
473 * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
474 * catenate them into one new fragment and replace the original fragments
475 * with that new fragment.
477 * If are no fragments, return NULL. If there is no fragment with byte 1,
478 * return NULL. Otherwise, return the first fragment, whether or not it was
479 * possible to fully defragment the entry. */
481 get_cache_fragment(struct cache_entry
*cached
)
483 struct fragment
*first_frag
, *adj_frag
, *frag
, *new_frag
;
486 if (list_empty(cached
->frag
))
489 first_frag
= cached
->frag
.next
;
490 if (first_frag
->offset
)
493 /* Only one fragment so no defragmentation is needed */
494 if (list_is_singleton(cached
->frag
))
497 /* Find the first pair of fragments with a gap in between. Only
498 * fragments up to the first gap can be defragmented. */
499 for (adj_frag
= first_frag
->next
; adj_frag
!= (void *) &cached
->frag
;
500 adj_frag
= adj_frag
->next
) {
501 long gap
= adj_frag
->offset
502 - (adj_frag
->prev
->offset
+ adj_frag
->prev
->length
);
505 if (gap
== 0) continue;
507 INTERNAL("fragments overlap");
511 /* There is a gap between the first two fragments, so we can't
512 * defragment anything. */
513 if (adj_frag
== first_frag
->next
)
516 /* Calculate the length of the defragmented fragment. */
517 for (new_frag_len
= 0, frag
= first_frag
;
520 new_frag_len
+= frag
->length
;
522 /* XXX: If the defragmentation fails because of allocation failure,
523 * fall back to return the first fragment and pretend all is well. */
524 /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
525 * the code this extended version of the old defrag_entry() is supposed
526 * to replace. --jonas */
527 new_frag
= frag_alloc(new_frag_len
);
529 return first_frag
->length
? first_frag
: NULL
;
531 new_frag
->length
= new_frag_len
;
532 new_frag
->real_length
= new_frag_len
;
534 for (new_frag_len
= 0, frag
= first_frag
;
537 struct fragment
*tmp
= frag
;
539 memcpy(new_frag
->data
+ new_frag_len
, frag
->data
, frag
->length
);
540 new_frag_len
+= frag
->length
;
547 add_to_list(cached
->frag
, new_frag
);
549 dump_frags(cached
, "get_cache_fragment");
555 delete_fragment(struct cache_entry
*cached
, struct fragment
*f
)
557 while ((void *) f
!= &cached
->frag
) {
558 struct fragment
*tmp
= f
->next
;
560 enlarge_entry(cached
, -f
->length
);
568 truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
)
572 if (cached
->length
> offset
) {
573 cached
->length
= offset
;
574 cached
->incomplete
= 1;
577 foreach (f
, cached
->frag
) {
578 off_t size
= offset
- f
->offset
;
580 /* XXX: is zero length fragment really legal here ? --Zas */
581 assert(f
->length
>= 0);
583 if (size
>= f
->length
) continue;
586 enlarge_entry(cached
, -(f
->length
- size
));
592 nf
= frag_realloc(f
, f
->length
);
597 f
->real_length
= f
->length
;
604 delete_fragment(cached
, f
);
606 dump_frags(cached
, "truncate_entry");
612 free_entry_to(struct cache_entry
*cached
, off_t offset
)
616 foreach (f
, cached
->frag
) {
617 if (f
->offset
+ f
->length
<= offset
) {
618 struct fragment
*tmp
= f
;
620 enlarge_entry(cached
, -f
->length
);
624 } else if (f
->offset
< offset
) {
625 off_t size
= offset
- f
->offset
;
627 enlarge_entry(cached
, -size
);
629 memmove(f
->data
, f
->data
+ size
, f
->length
);
636 delete_entry_content(struct cache_entry
*cached
)
638 enlarge_entry(cached
, -cached
->data_size
);
640 while (cached
->frag
.next
!= (void *) &cached
->frag
) {
641 struct fragment
*f
= cached
->frag
.next
;
646 cached
->id
= id_counter
++;
648 cached
->incomplete
= 1;
650 mem_free_set(&cached
->last_modified
, NULL
);
651 mem_free_set(&cached
->etag
, NULL
);
655 done_cache_entry(struct cache_entry
*cached
)
657 assertm(!is_object_used(cached
), "deleting locked cache entry");
658 assertm(!is_entry_used(cached
), "deleting loading cache entry");
660 delete_entry_content(cached
);
662 if (cached
->box_item
) done_listbox_item(&cache_browser
, cached
->box_item
);
664 if (cached
->uri
) done_uri(cached
->uri
);
665 if (cached
->proxy_uri
) done_uri(cached
->proxy_uri
);
666 if (cached
->redirect
) done_uri(cached
->redirect
);
668 mem_free_if(cached
->head
);
669 mem_free_if(cached
->content_type
);
670 mem_free_if(cached
->last_modified
);
671 mem_free_if(cached
->ssl_info
);
672 mem_free_if(cached
->encoding_info
);
673 mem_free_if(cached
->etag
);
679 delete_cache_entry(struct cache_entry
*cached
)
681 del_from_list(cached
);
683 done_cache_entry(cached
);
688 normalize_cache_entry(struct cache_entry
*cached
, off_t truncate_length
)
690 if (truncate_length
< 0)
693 truncate_entry(cached
, truncate_length
, 1);
694 cached
->incomplete
= 0;
695 cached
->preformatted
= 0;
700 redirect_cache(struct cache_entry
*cached
, unsigned char *location
,
701 int get
, int incomplete
)
703 unsigned char *uristring
;
705 /* XXX: I am a little puzzled whether we should only use the cache
706 * entry's URI if it is valid. Hopefully always using it won't hurt.
707 * Currently we handle direction redirects where "/" should be appended
708 * special dunno if join_urls() could be made to handle that.
710 /* XXX: We are assuming here that incomplete will only be zero when
711 * doing these fake redirects which only purpose is to add an ending
712 * slash *cough* dirseparator to the end of the URI. */
713 if (incomplete
== 0 && location
[0] == '/' && location
[1] == 0) {
714 /* To be sure use get_uri_string() to get rid of post data */
715 uristring
= get_uri_string(cached
->uri
, URI_ORIGINAL
);
716 if (uristring
) add_to_strn(&uristring
, location
);
718 uristring
= join_urls(cached
->uri
, location
);
721 if (!uristring
) return NULL
;
723 /* Only add the post data if the redirect should not use GET method.
724 * This is tied to the HTTP handling of the 303 and (if the
725 * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
727 if (cached
->uri
->post
728 && !cached
->redirect_get
730 /* XXX: Add POST_CHAR and post data assuming URI components
731 * belong to one string. */
733 /* To be certain we don't append post data twice in some
734 * conditions... --Zas */
735 assert(!strchr(uristring
, POST_CHAR
));
737 add_to_strn(&uristring
, cached
->uri
->post
- 1);
740 if (cached
->redirect
) done_uri(cached
->redirect
);
741 cached
->redirect
= get_uri(uristring
, 0);
742 cached
->redirect_get
= get
;
743 if (incomplete
>= 0) cached
->incomplete
= incomplete
;
747 return cached
->redirect
;
752 garbage_collection(int whole
)
754 struct cache_entry
*cached
;
755 /* We recompute cache_size when scanning cache entries, to ensure
757 unsigned longlong old_cache_size
= 0;
758 /* The maximal cache size tolerated by user. Note that this is only
759 * size of the "just stored" unused cache entries, used cache entries
760 * are not counted to that. */
761 unsigned longlong opt_cache_size
= get_opt_long("document.cache.memory.size");
762 /* The low-treshold cache size. Basically, when the cache size is
763 * higher than opt_cache_size, we free the cache so that there is no
764 * more than this value in the cache anymore. This is to make sure we
765 * aren't cleaning cache too frequently when working with a lot of
766 * small cache entries but rather free more and then let it grow a
767 * little more as well. */
768 unsigned longlong gc_cache_size
= opt_cache_size
* MEMORY_CACHE_GC_PERCENT
/ 100;
769 /* The cache size we aim to reach. */
770 unsigned longlong new_cache_size
= cache_size
;
772 /* Whether we've hit an used (unfreeable) entry when collecting
774 int obstacle_entry
= 0;
778 DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
779 whole
, opt_cache_size
,gc_cache_size
);
782 if (!whole
&& cache_size
<= opt_cache_size
) return;
785 /* Scanning cache, pass #1:
786 * Weed out the used cache entries from @new_cache_size, so that we
787 * will work only with the unused entries from then on. Also ensure
788 * that @cache_size is in sync. */
790 foreach (cached
, cache_entries
) {
791 old_cache_size
+= cached
->data_size
;
793 if (!is_object_used(cached
) && !is_entry_used(cached
))
796 assertm(new_cache_size
>= cached
->data_size
,
797 "cache_size (%ld) underflow: subtracting %ld from %ld",
798 cache_size
, cached
->data_size
, new_cache_size
);
800 new_cache_size
-= cached
->data_size
;
802 if_assert_failed
{ new_cache_size
= 0; }
805 assertm(old_cache_size
== cache_size
,
806 "cache_size out of sync: %ld != (actual) %ld",
807 cache_size
, old_cache_size
);
808 if_assert_failed
{ cache_size
= old_cache_size
; }
810 if (!whole
&& new_cache_size
<= opt_cache_size
) return;
813 /* Scanning cache, pass #2:
814 * Mark potential targets for destruction, from the oldest to the
817 foreachback (cached
, cache_entries
) {
818 /* We would have shrinked enough already? */
819 if (!whole
&& new_cache_size
<= gc_cache_size
)
820 goto shrinked_enough
;
822 /* Skip used cache entries. */
823 if (is_object_used(cached
) || is_entry_used(cached
)) {
827 cached
->gc_target
= 0;
831 /* FIXME: Optionally take cached->max_age into consideration,
832 * but that will probably complicate things too much. We'd have
833 * to sort entries so prioritize removing the oldest entries. */
835 assertm(new_cache_size
>= cached
->data_size
,
836 "cache_size (%ld) underflow: subtracting %ld from %ld",
837 cache_size
, cached
->data_size
, new_cache_size
);
839 /* Mark me for destruction, sir. */
840 cached
->gc_target
= 1;
841 new_cache_size
-= cached
->data_size
;
843 if_assert_failed
{ new_cache_size
= 0; }
846 /* If we'd free the whole cache... */
847 assertm(new_cache_size
== 0,
848 "cache_size (%ld) overflow: %ld",
849 cache_size
, new_cache_size
);
850 if_assert_failed
{ new_cache_size
= 0; }
855 /* Now turn around and start walking in the opposite direction. */
856 cached
= cached
->next
;
858 /* Something is strange when we decided all is ok before dropping any
860 if ((void *) cached
== &cache_entries
) return;
864 struct cache_entry
*entry
;
866 /* Scanning cache, pass #3:
867 * Walk back in the cache and unmark the cache entries which
868 * could still fit into the cache. */
870 /* This makes sense when the newest entry is HUGE and after it,
871 * there's just plenty of tiny entries. By this point, all the
872 * tiny entries would be marked for deletion even though it'd
873 * be enough to free the huge entry. This actually fixes that
876 for (entry
= cached
; (void *) entry
!= &cache_entries
; entry
= entry
->next
) {
877 unsigned longlong newer_cache_size
= new_cache_size
+ entry
->data_size
;
879 if (newer_cache_size
> gc_cache_size
)
882 new_cache_size
= newer_cache_size
;
883 entry
->gc_target
= 0;
888 /* Scanning cache, pass #4:
889 * Destroy the marked entries. So sad, but that's life, bro'. */
891 for (; (void *) cached
!= &cache_entries
; ) {
892 cached
= cached
->next
;
893 if (cached
->prev
->gc_target
)
894 delete_cache_entry(cached
->prev
);
899 if ((whole
|| !obstacle_entry
) && cache_size
> gc_cache_size
) {
900 DBG("garbage collection doesn't work, cache size %ld > %ld, "
901 "document.cache.memory.size set to: %ld bytes",
902 cache_size
, gc_cache_size
,
903 get_opt_long("document.cache.memory.size"));