11 #include "bfu/dialog.h"
12 #include "cache/cache.h"
13 #include "cache/dialogs.h"
14 #include "config/options.h"
15 #include "main/main.h"
16 #include "main/object.h"
17 #include "network/connection.h"
18 #include "protocol/protocol.h"
19 #include "protocol/proxy.h"
20 #include "protocol/uri.h"
21 #include "util/error.h"
22 #include "util/memory.h"
23 #include "util/string.h"
25 /* The list of cache entries */
26 static INIT_LIST_HEAD(cache_entries
);
28 static unsigned longlong cache_size
;
29 static int id_counter
= 1;
31 static void truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
);
33 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
40 #define dump_frag(frag, count) \
42 DBG(" [%d] f=%p offset=%" PRId64 " length=%" PRId64 \
43 " real_length=%" PRId64, \
44 count, frag, frag->offset, frag->length, frag->real_length); \
47 #define dump_frags(entry, comment) \
49 struct fragment *frag; \
52 DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
53 foreach (frag, entry->frag) \
54 dump_frag(frag, ++count); \
58 #define dump_frags(entry, comment)
59 #endif /* DEBUG_CACHE */
68 get_cache_entry_count(void)
70 return list_size(&cache_entries
);
74 get_cache_entry_used_count(void)
76 struct cache_entry
*cached
;
79 foreach (cached
, cache_entries
)
80 i
+= is_object_used(cached
);
86 get_cache_entry_loading_count(void)
88 struct cache_entry
*cached
;
91 foreach (cached
, cache_entries
)
92 i
+= is_entry_used(cached
);
98 find_in_cache(struct uri
*uri
)
100 struct cache_entry
*cached
;
101 int proxy
= (uri
->protocol
== PROTOCOL_PROXY
);
103 foreach (cached
, cache_entries
) {
106 if (!cached
->valid
) continue;
108 c_uri
= proxy
? cached
->proxy_uri
: cached
->uri
;
109 if (!compare_uri(c_uri
, uri
, URI_BASE
))
112 move_to_top_of_list(cache_entries
, cached
);
121 get_cache_entry(struct uri
*uri
)
123 struct cache_entry
*cached
= find_in_cache(uri
);
125 assertm(!uri
->fragment
, "Fragment in URI (%s)", struri(uri
));
127 if (cached
) return cached
;
131 cached
= mem_calloc(1, sizeof(*cached
));
132 if (!cached
) return NULL
;
134 cached
->uri
= get_proxied_uri(uri
);
140 cached
->proxy_uri
= get_proxy_uri(uri
, NULL
);
141 if (!cached
->proxy_uri
) {
142 done_uri(cached
->uri
);
146 cached
->incomplete
= 1;
149 init_list(cached
->frag
);
150 cached
->id
= id_counter
++;
151 object_nolock(cached
, "cache_entry"); /* Debugging purpose. */
153 cached
->box_item
= add_listbox_leaf(&cache_browser
, NULL
, cached
);
155 add_to_list(cache_entries
, cached
);
161 cache_entry_has_expired(struct cache_entry
*cached
)
167 return timeval_cmp(&cached
->max_age
, &now
) <= 0;
171 get_validated_cache_entry(struct uri
*uri
, enum cache_mode cache_mode
)
173 struct cache_entry
*cached
;
175 /* We have to check if something should be reloaded */
176 if (cache_mode
> CACHE_MODE_NORMAL
)
179 /* We only consider complete entries */
180 cached
= find_in_cache(uri
);
181 if (!cached
|| cached
->incomplete
)
184 /* A bit of a gray zone. Delete the entry if the it has the stricktest
185 * cache mode and we don't want the most aggressive mode or we have to
186 * remove the redirect or the entry expired. Please enlighten me.
188 if ((cached
->cache_mode
== CACHE_MODE_NEVER
&& cache_mode
!= CACHE_MODE_ALWAYS
)
189 || (cached
->redirect
&& !get_opt_bool("document.cache.cache_redirects"))
190 || (cached
->expire
&& cache_entry_has_expired(cached
))) {
191 if (!is_object_used(cached
)) delete_cache_entry(cached
);
199 cache_entry_is_valid(struct cache_entry
*cached
)
201 struct cache_entry
*valid_cached
;
203 foreach (valid_cached
, cache_entries
) {
204 if (valid_cached
== cached
)
213 follow_cached_redirects(struct cache_entry
*cached
)
218 if (!cached
->redirect
) {
219 /* XXX: This is not quite true, but does that difference
224 if (++redirects
> MAX_REDIRECTS
) break;
226 cached
= find_in_cache(cached
->redirect
);
233 get_redirected_cache_entry(struct uri
*uri
)
235 struct cache_entry
*cached
= find_in_cache(uri
);
237 return cached
? follow_cached_redirects(cached
) : NULL
;
242 enlarge_entry(struct cache_entry
*cached
, off_t size
)
244 cached
->data_size
+= size
;
245 assertm(cached
->data_size
>= 0,
246 "cache entry data_size underflow: %ld", cached
->data_size
);
247 if_assert_failed
{ cached
->data_size
= 0; }
250 assertm(cache_size
>= 0, "cache_size underflow: %ld", cache_size
);
251 if_assert_failed
{ cache_size
= 0; }
255 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
257 /* One byte is reserved for data in struct fragment. */
258 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
260 /* We store the fragments themselves in a private vault, safely separated from
261 * the rest of memory structures. If we lived in the main libc memory pool, we
262 * would trigger annoying pathological behaviour like artificially enlarging
263 * the memory pool to 50M, then securing it with some stupid cookie record at
264 * the top and then no matter how you flush the cache the data segment is still
267 * Cool, but we don't want that, so fragments (where the big data is stored)
268 * live in their little mmap()ed worlds. There is some overhead, but if we
269 * assume single fragment per cache entry and page size (mmap() allocation
270 * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
271 * That's not *that* horrible when you realize that the freshmeat front page
272 * takes 300kb in memory and we usually do not deal with documents so small
273 * that max. 4kb overhead would be visible there.
275 * The alternative would be of course to manage an entire custom memory pool,
276 * but that is feasible only when we are able to resize it efficiently. We
277 * aren't, except on Linux.
279 * Of course for all this to really completely prevent the pathological cases,
280 * we need to stuff the rendered documents in too, because they seem to amount
281 * the major memory bursts. */
283 static struct fragment
*
284 frag_alloc(size_t size
)
286 struct fragment
*f
= mem_mmap_alloc(FRAGSIZE(size
));
289 memset(f
, 0, FRAGSIZE(size
));
293 static struct fragment
*
294 frag_realloc(struct fragment
*f
, size_t size
)
296 return mem_mmap_realloc(f
, FRAGSIZE(f
->real_length
), FRAGSIZE(size
));
300 frag_free(struct fragment
*f
)
302 mem_mmap_free(f
, FRAGSIZE(f
->real_length
));
306 /* Concatenate overlapping fragments. */
308 remove_overlaps(struct cache_entry
*cached
, struct fragment
*f
, int *trunc
)
310 off_t f_end_offset
= f
->offset
+ f
->length
;
312 /* Iterate thru all fragments we still overlap to. */
313 while (list_has_next(cached
->frag
, f
)
314 && f_end_offset
> f
->next
->offset
) {
316 off_t end_offset
= f
->next
->offset
+ f
->next
->length
;
318 if (f_end_offset
< end_offset
) {
319 /* We end before end of the following fragment, though.
320 * So try to append overlapping part of that fragment
322 nf
= frag_realloc(f
, end_offset
- f
->offset
);
328 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
330 f
->offset
+ f
->length
- f
->next
->offset
))
333 memcpy(f
->data
+ f
->length
,
334 f
->next
->data
+ f_end_offset
- f
->next
->offset
,
335 end_offset
- f_end_offset
);
337 enlarge_entry(cached
, end_offset
- f_end_offset
);
338 f
->length
= f
->real_length
= end_offset
- f
->offset
;
342 /* We will just discard this, it's complete subset of
343 * our new fragment. */
344 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
350 /* Remove the fragment, it influences our new one! */
352 enlarge_entry(cached
, -nf
->length
);
358 /* Note that this function is maybe overcommented, but I'm certainly not
359 * unhappy from that. */
361 add_fragment(struct cache_entry
*cached
, off_t offset
,
362 const unsigned char *data
, ssize_t length
)
364 struct fragment
*f
, *nf
;
368 if (!length
) return 0;
370 end_offset
= offset
+ length
;
371 if (cached
->length
< end_offset
)
372 cached
->length
= end_offset
;
374 /* id marks each entry, and change each time it's modified,
375 * used in HTML renderer. */
376 cached
->id
= id_counter
++;
378 /* Possibly insert the new data in the middle of existing fragment. */
379 foreach (f
, cached
->frag
) {
381 off_t f_end_offset
= f
->offset
+ f
->length
;
383 /* No intersection? */
384 if (f
->offset
> offset
) break;
385 if (f_end_offset
< offset
) continue;
387 if (end_offset
> f_end_offset
) {
388 /* Overlap - we end further than original fragment. */
390 if (end_offset
- f
->offset
<= f
->real_length
) {
391 /* We fit here, so let's enlarge it by delta of
392 * old and new end.. */
393 enlarge_entry(cached
, end_offset
- f_end_offset
);
394 /* ..and length is now total length. */
395 f
->length
= end_offset
- f
->offset
;
397 ret
= 1; /* It was enlarged. */
399 /* We will reduce fragment length only to the
400 * starting non-interjecting size and add new
401 * fragment directly after this one. */
402 f
->length
= offset
- f
->offset
;
407 } /* else We are subset of original fragment. */
409 /* Copy the stuff over there. */
410 memcpy(f
->data
+ offset
- f
->offset
, data
, length
);
412 remove_overlaps(cached
, f
, &trunc
);
414 /* We truncate the entry even if the data contents is the
415 * same as what we have in the fragment, because that does
416 * not mean that what is going to follow won't differ, This
417 * is a serious problem when rendering HTML frame with onload
418 * snippets - we "guess" the rest of the document here,
419 * interpret the snippet, then it turns out in the real
420 * document the snippet is different and we are in trouble.
422 * Debugging this took me about 1.5 day (really), the diff with
423 * all the debugging print commands amounted about 20kb (gdb
424 * wasn't much useful since it stalled the download, de facto
425 * eliminating the bad behaviour). */
426 truncate_entry(cached
, end_offset
, 0);
428 dump_frags(cached
, "add_fragment");
433 /* Make up new fragment. */
434 nf
= frag_alloc(CACHE_PAD(length
));
439 nf
->real_length
= CACHE_PAD(length
);
440 memcpy(nf
->data
, data
, length
);
441 add_at_pos(f
->prev
, nf
);
443 enlarge_entry(cached
, length
);
445 remove_overlaps(cached
, nf
, &trunc
);
446 if (trunc
) truncate_entry(cached
, end_offset
, 0);
448 dump_frags(cached
, "add_fragment");
453 /* Try to defragment the cache entry. Defragmentation will not be possible
454 * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
455 * and bytes 201-300 in the second, we must leave those two fragments separate
456 * so that the fragment for bytes 101-200 can later be inserted. However,
457 * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
458 * catenate them into one new fragment and replace the original fragments
459 * with that new fragment.
461 * If are no fragments, return NULL. If there is no fragment with byte 1,
462 * return NULL. Otherwise, return the first fragment, whether or not it was
463 * possible to fully defragment the entry. */
465 get_cache_fragment(struct cache_entry
*cached
)
467 struct fragment
*first_frag
, *adj_frag
, *frag
, *new_frag
;
470 if (list_empty(cached
->frag
))
473 first_frag
= cached
->frag
.next
;
474 if (first_frag
->offset
)
477 /* Only one fragment so no defragmentation is needed */
478 if (list_is_singleton(cached
->frag
))
481 /* Find the first pair of fragments with a gap in between. Only
482 * fragments up to the first gap can be defragmented. */
483 for (adj_frag
= first_frag
->next
; adj_frag
!= (void *) &cached
->frag
;
484 adj_frag
= adj_frag
->next
) {
485 long gap
= adj_frag
->offset
486 - (adj_frag
->prev
->offset
+ adj_frag
->prev
->length
);
489 if (gap
== 0) continue;
491 INTERNAL("fragments overlap");
495 /* There is a gap between the first two fragments, so we can't
496 * defragment anything. */
497 if (adj_frag
== first_frag
->next
)
500 /* Calculate the length of the defragmented fragment. */
501 for (new_frag_len
= 0, frag
= first_frag
;
504 new_frag_len
+= frag
->length
;
506 /* XXX: If the defragmentation fails because of allocation failure,
507 * fall back to return the first fragment and pretend all is well. */
508 /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
509 * the code this extended version of the old defrag_entry() is supposed
510 * to replace. --jonas */
511 new_frag
= frag_alloc(new_frag_len
);
513 return first_frag
->length
? first_frag
: NULL
;
515 new_frag
->length
= new_frag_len
;
516 new_frag
->real_length
= new_frag_len
;
518 for (new_frag_len
= 0, frag
= first_frag
;
521 struct fragment
*tmp
= frag
;
523 memcpy(new_frag
->data
+ new_frag_len
, frag
->data
, frag
->length
);
524 new_frag_len
+= frag
->length
;
531 add_to_list(cached
->frag
, new_frag
);
533 dump_frags(cached
, "get_cache_fragment");
539 delete_fragment(struct cache_entry
*cached
, struct fragment
*f
)
541 while ((void *) f
!= &cached
->frag
) {
542 struct fragment
*tmp
= f
->next
;
544 enlarge_entry(cached
, -f
->length
);
552 truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
)
556 if (cached
->length
> offset
) {
557 cached
->length
= offset
;
558 cached
->incomplete
= 1;
561 foreach (f
, cached
->frag
) {
562 off_t size
= offset
- f
->offset
;
564 /* XXX: is zero length fragment really legal here ? --Zas */
565 assert(f
->length
>= 0);
567 if (size
>= f
->length
) continue;
570 enlarge_entry(cached
, -(f
->length
- size
));
576 nf
= frag_realloc(f
, f
->length
);
581 f
->real_length
= f
->length
;
588 delete_fragment(cached
, f
);
590 dump_frags(cached
, "truncate_entry");
596 free_entry_to(struct cache_entry
*cached
, off_t offset
)
600 foreach (f
, cached
->frag
) {
601 if (f
->offset
+ f
->length
<= offset
) {
602 struct fragment
*tmp
= f
;
604 enlarge_entry(cached
, -f
->length
);
608 } else if (f
->offset
< offset
) {
609 off_t size
= offset
- f
->offset
;
611 enlarge_entry(cached
, -size
);
613 memmove(f
->data
, f
->data
+ size
, f
->length
);
620 delete_entry_content(struct cache_entry
*cached
)
622 enlarge_entry(cached
, -cached
->data_size
);
624 while (cached
->frag
.next
!= (void *) &cached
->frag
) {
625 struct fragment
*f
= cached
->frag
.next
;
630 cached
->id
= id_counter
++;
632 cached
->incomplete
= 1;
634 mem_free_set(&cached
->last_modified
, NULL
);
635 mem_free_set(&cached
->etag
, NULL
);
639 done_cache_entry(struct cache_entry
*cached
)
641 assertm(!is_object_used(cached
), "deleting locked cache entry");
642 assertm(!is_entry_used(cached
), "deleting loading cache entry");
644 delete_entry_content(cached
);
646 if (cached
->box_item
) done_listbox_item(&cache_browser
, cached
->box_item
);
648 if (cached
->uri
) done_uri(cached
->uri
);
649 if (cached
->proxy_uri
) done_uri(cached
->proxy_uri
);
650 if (cached
->redirect
) done_uri(cached
->redirect
);
652 mem_free_if(cached
->head
);
653 mem_free_if(cached
->content_type
);
654 mem_free_if(cached
->last_modified
);
655 mem_free_if(cached
->ssl_info
);
656 mem_free_if(cached
->encoding_info
);
657 mem_free_if(cached
->etag
);
663 delete_cache_entry(struct cache_entry
*cached
)
665 del_from_list(cached
);
667 done_cache_entry(cached
);
672 normalize_cache_entry(struct cache_entry
*cached
, off_t truncate_length
)
674 if (truncate_length
< 0)
677 truncate_entry(cached
, truncate_length
, 1);
678 cached
->incomplete
= 0;
679 cached
->preformatted
= 0;
684 redirect_cache(struct cache_entry
*cached
, unsigned char *location
,
685 int get
, int incomplete
)
687 unsigned char *uristring
;
689 /* XXX: I am a little puzzled whether we should only use the cache
690 * entry's URI if it is valid. Hopefully always using it won't hurt.
691 * Currently we handle direction redirects where "/" should be appended
692 * special dunno if join_urls() could be made to handle that.
694 /* XXX: We are assuming here that incomplete will only be zero when
695 * doing these fake redirects which only purpose is to add an ending
696 * slash *cough* dirseparator to the end of the URI. */
697 if (incomplete
== 0 && location
[0] == '/' && location
[1] == 0) {
698 /* To be sure use get_uri_string() to get rid of post data */
699 uristring
= get_uri_string(cached
->uri
, URI_ORIGINAL
);
700 if (uristring
) add_to_strn(&uristring
, location
);
702 uristring
= join_urls(cached
->uri
, location
);
705 if (!uristring
) return NULL
;
707 /* Only add the post data if the redirect should not use GET method.
708 * This is tied to the HTTP handling of the 303 and (if the
709 * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
711 if (cached
->uri
->post
712 && !cached
->redirect_get
714 /* XXX: Add POST_CHAR and post data assuming URI components
715 * belong to one string. */
717 /* To be certain we don't append post data twice in some
718 * conditions... --Zas */
719 assert(!strchr(uristring
, POST_CHAR
));
721 add_to_strn(&uristring
, cached
->uri
->post
- 1);
724 if (cached
->redirect
) done_uri(cached
->redirect
);
725 cached
->redirect
= get_uri(uristring
, 0);
726 cached
->redirect_get
= get
;
727 if (incomplete
>= 0) cached
->incomplete
= incomplete
;
731 return cached
->redirect
;
736 garbage_collection(int whole
)
738 struct cache_entry
*cached
;
739 /* We recompute cache_size when scanning cache entries, to ensure
741 unsigned longlong old_cache_size
= 0;
742 /* The maximal cache size tolerated by user. Note that this is only
743 * size of the "just stored" unused cache entries, used cache entries
744 * are not counted to that. */
745 unsigned longlong opt_cache_size
= get_opt_long("document.cache.memory.size");
746 /* The low-treshold cache size. Basically, when the cache size is
747 * higher than opt_cache_size, we free the cache so that there is no
748 * more than this value in the cache anymore. This is to make sure we
749 * aren't cleaning cache too frequently when working with a lot of
750 * small cache entries but rather free more and then let it grow a
751 * little more as well. */
752 unsigned longlong gc_cache_size
= opt_cache_size
* MEMORY_CACHE_GC_PERCENT
/ 100;
753 /* The cache size we aim to reach. */
754 unsigned longlong new_cache_size
= cache_size
;
756 /* Whether we've hit an used (unfreeable) entry when collecting
758 int obstacle_entry
= 0;
762 DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
763 whole
, opt_cache_size
,gc_cache_size
);
766 if (!whole
&& cache_size
<= opt_cache_size
) return;
769 /* Scanning cache, pass #1:
770 * Weed out the used cache entries from @new_cache_size, so that we
771 * will work only with the unused entries from then on. Also ensure
772 * that @cache_size is in sync. */
774 foreach (cached
, cache_entries
) {
775 old_cache_size
+= cached
->data_size
;
777 if (!is_object_used(cached
) && !is_entry_used(cached
))
780 assertm(new_cache_size
>= cached
->data_size
,
781 "cache_size (%ld) underflow: subtracting %ld from %ld",
782 cache_size
, cached
->data_size
, new_cache_size
);
784 new_cache_size
-= cached
->data_size
;
786 if_assert_failed
{ new_cache_size
= 0; }
789 assertm(old_cache_size
== cache_size
,
790 "cache_size out of sync: %ld != (actual) %ld",
791 cache_size
, old_cache_size
);
792 if_assert_failed
{ cache_size
= old_cache_size
; }
794 if (!whole
&& new_cache_size
<= opt_cache_size
) return;
797 /* Scanning cache, pass #2:
798 * Mark potential targets for destruction, from the oldest to the
801 foreachback (cached
, cache_entries
) {
802 /* We would have shrinked enough already? */
803 if (!whole
&& new_cache_size
<= gc_cache_size
)
804 goto shrinked_enough
;
806 /* Skip used cache entries. */
807 if (is_object_used(cached
) || is_entry_used(cached
)) {
811 cached
->gc_target
= 0;
815 /* FIXME: Optionally take cached->max_age into consideration,
816 * but that will probably complicate things too much. We'd have
817 * to sort entries so prioritize removing the oldest entries. */
819 assertm(new_cache_size
>= cached
->data_size
,
820 "cache_size (%ld) underflow: subtracting %ld from %ld",
821 cache_size
, cached
->data_size
, new_cache_size
);
823 /* Mark me for destruction, sir. */
824 cached
->gc_target
= 1;
825 new_cache_size
-= cached
->data_size
;
827 if_assert_failed
{ new_cache_size
= 0; }
830 /* If we'd free the whole cache... */
831 assertm(new_cache_size
== 0,
832 "cache_size (%ld) overflow: %ld",
833 cache_size
, new_cache_size
);
834 if_assert_failed
{ new_cache_size
= 0; }
839 /* Now turn around and start walking in the opposite direction. */
840 cached
= cached
->next
;
842 /* Something is strange when we decided all is ok before dropping any
844 if ((void *) cached
== &cache_entries
) return;
848 struct cache_entry
*entry
;
850 /* Scanning cache, pass #3:
851 * Walk back in the cache and unmark the cache entries which
852 * could still fit into the cache. */
854 /* This makes sense when the newest entry is HUGE and after it,
855 * there's just plenty of tiny entries. By this point, all the
856 * tiny entries would be marked for deletion even though it'd
857 * be enough to free the huge entry. This actually fixes that
860 for (entry
= cached
; (void *) entry
!= &cache_entries
; entry
= entry
->next
) {
861 unsigned longlong newer_cache_size
= new_cache_size
+ entry
->data_size
;
863 if (newer_cache_size
> gc_cache_size
)
866 new_cache_size
= newer_cache_size
;
867 entry
->gc_target
= 0;
872 /* Scanning cache, pass #4:
873 * Destroy the marked entries. So sad, but that's life, bro'. */
875 for (; (void *) cached
!= &cache_entries
; ) {
876 cached
= cached
->next
;
877 if (cached
->prev
->gc_target
)
878 delete_cache_entry(cached
->prev
);
883 if ((whole
|| !obstacle_entry
) && cache_size
> gc_cache_size
) {
884 DBG("garbage collection doesn't work, cache size %ld > %ld, "
885 "document.cache.memory.size set to: %ld bytes",
886 cache_size
, gc_cache_size
,
887 get_opt_long("document.cache.memory.size"));