11 #include "bfu/dialog.h"
12 #include "cache/cache.h"
13 #include "cache/dialogs.h"
14 #include "config/options.h"
15 #include "main/main.h"
16 #include "main/object.h"
17 #include "network/connection.h"
18 #include "protocol/protocol.h"
19 #include "protocol/proxy.h"
20 #include "protocol/uri.h"
21 #include "util/error.h"
22 #include "util/memory.h"
23 #include "util/string.h"
24 #include "util/time.h"
26 /* The list of cache entries */
27 static INIT_LIST_OF(struct cache_entry
, cache_entries
);
29 static unsigned longlong cache_size
;
30 static int id_counter
= 1;
32 static void truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
);
34 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
41 #define dump_frag(frag, count) \
43 DBG(" [%d] f=%p offset=%" OFF_PRINT_FORMAT \
44 " length=%" OFF_PRINT_FORMAT \
45 " real_length=%" OFF_PRINT_FORMAT, \
46 count, frag, (off_print_T) frag->offset, \
47 (off_print_T) frag->length, (off_print_T) frag->real_length); \
50 #define dump_frags(entry, comment) \
52 struct fragment *frag; \
55 DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
56 foreach (frag, entry->frag) \
57 dump_frag(frag, ++count); \
61 #define dump_frags(entry, comment)
62 #endif /* DEBUG_CACHE */
71 get_cache_entry_count(void)
73 return list_size(&cache_entries
);
77 get_cache_entry_used_count(void)
79 struct cache_entry
*cached
;
82 foreach (cached
, cache_entries
)
83 i
+= is_object_used(cached
);
89 get_cache_entry_loading_count(void)
91 struct cache_entry
*cached
;
94 foreach (cached
, cache_entries
)
95 i
+= is_entry_used(cached
);
101 find_in_cache(struct uri
*uri
)
103 struct cache_entry
*cached
;
104 int proxy
= (uri
->protocol
== PROTOCOL_PROXY
);
106 foreach (cached
, cache_entries
) {
109 if (!cached
->valid
) continue;
111 c_uri
= proxy
? cached
->proxy_uri
: cached
->uri
;
112 if (!compare_uri(c_uri
, uri
, URI_BASE
))
115 move_to_top_of_list(cache_entries
, cached
);
124 get_cache_entry(struct uri
*uri
)
126 struct cache_entry
*cached
= find_in_cache(uri
);
128 assertm(!uri
->fragment
, "Fragment in URI (%s)", struri(uri
));
130 if (cached
) return cached
;
134 cached
= mem_calloc(1, sizeof(*cached
));
135 if (!cached
) return NULL
;
137 cached
->uri
= get_proxied_uri(uri
);
143 cached
->proxy_uri
= get_proxy_uri(uri
, NULL
);
144 if (!cached
->proxy_uri
) {
145 done_uri(cached
->uri
);
149 cached
->incomplete
= 1;
152 init_list(cached
->frag
);
153 cached
->cache_id
= id_counter
++;
154 object_nolock(cached
, "cache_entry"); /* Debugging purpose. */
156 cached
->box_item
= add_listbox_leaf(&cache_browser
, NULL
, cached
);
158 add_to_list(cache_entries
, cached
);
164 cache_entry_has_expired(struct cache_entry
*cached
)
170 return timeval_cmp(&cached
->max_age
, &now
) <= 0;
174 get_validated_cache_entry(struct uri
*uri
, enum cache_mode cache_mode
)
176 struct cache_entry
*cached
;
178 /* We have to check if something should be reloaded */
179 if (cache_mode
> CACHE_MODE_NORMAL
)
182 /* We only consider complete entries */
183 cached
= find_in_cache(uri
);
184 if (!cached
|| cached
->incomplete
)
188 /* A bit of a gray zone. Delete the entry if the it has the strictest
189 * cache mode and we don't want the most aggressive mode or we have to
190 * remove the redirect or the entry expired. Please enlighten me.
192 if ((cached
->cache_mode
== CACHE_MODE_NEVER
&& cache_mode
!= CACHE_MODE_ALWAYS
)
193 || (cached
->redirect
&& !get_opt_bool("document.cache.cache_redirects", NULL
))
194 || (cached
->expire
&& cache_entry_has_expired(cached
))) {
195 if (!is_object_used(cached
)) delete_cache_entry(cached
);
199 if (cached
->cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
200 && cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
201 && (cached
->last_modified
|| cached
->etag
)
202 && get_opt_int("document.cache.revalidation_interval", NULL
) >= 0) {
203 if (cached
->seconds
+ get_opt_int("document.cache.revalidation_interval", NULL
) < time(NULL
))
211 cache_entry_is_valid(struct cache_entry
*cached
)
213 struct cache_entry
*valid_cached
;
215 foreach (valid_cached
, cache_entries
) {
216 if (valid_cached
== cached
)
225 follow_cached_redirects(struct cache_entry
*cached
)
230 if (!cached
->redirect
) {
231 /* XXX: This is not quite true, but does that difference
236 if (++redirects
> MAX_REDIRECTS
) break;
238 cached
= find_in_cache(cached
->redirect
);
245 get_redirected_cache_entry(struct uri
*uri
)
247 struct cache_entry
*cached
= find_in_cache(uri
);
249 return cached
? follow_cached_redirects(cached
) : NULL
;
254 enlarge_entry(struct cache_entry
*cached
, off_t size
)
256 cached
->data_size
+= size
;
257 assertm(cached
->data_size
>= 0,
258 "cache entry data_size underflow: %ld", cached
->data_size
);
259 if_assert_failed
{ cached
->data_size
= 0; }
262 assertm(cache_size
>= 0, "cache_size underflow: %ld", cache_size
);
263 if_assert_failed
{ cache_size
= 0; }
267 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
269 /* One byte is reserved for data in struct fragment. */
270 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
272 /* We store the fragments themselves in a private vault, safely separated from
273 * the rest of memory structures. If we lived in the main libc memory pool, we
274 * would trigger annoying pathological behaviour like artificially enlarging
275 * the memory pool to 50M, then securing it with some stupid cookie record at
276 * the top and then no matter how you flush the cache the data segment is still
279 * Cool, but we don't want that, so fragments (where the big data is stored)
280 * live in their little mmap()ed worlds. There is some overhead, but if we
281 * assume single fragment per cache entry and page size (mmap() allocation
282 * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
283 * That's not *that* horrible when you realize that the freshmeat front page
284 * takes 300kb in memory and we usually do not deal with documents so small
285 * that max. 4kb overhead would be visible there.
287 * The alternative would be of course to manage an entire custom memory pool,
288 * but that is feasible only when we are able to resize it efficiently. We
289 * aren't, except on Linux.
291 * Of course for all this to really completely prevent the pathological cases,
292 * we need to stuff the rendered documents in too, because they seem to amount
293 * the major memory bursts. */
295 static struct fragment
*
296 frag_alloc(size_t size
)
298 struct fragment
*f
= mem_mmap_alloc(FRAGSIZE(size
));
301 memset(f
, 0, FRAGSIZE(size
));
305 static struct fragment
*
306 frag_realloc(struct fragment
*f
, size_t size
)
308 return mem_mmap_realloc(f
, FRAGSIZE(f
->real_length
), FRAGSIZE(size
));
312 frag_free(struct fragment
*f
)
314 mem_mmap_free(f
, FRAGSIZE(f
->real_length
));
318 /* Concatenate overlapping fragments. */
320 remove_overlaps(struct cache_entry
*cached
, struct fragment
*f
, int *trunc
)
322 off_t f_end_offset
= f
->offset
+ f
->length
;
324 /* Iterate thru all fragments we still overlap to. */
325 while (list_has_next(cached
->frag
, f
)
326 && f_end_offset
> f
->next
->offset
) {
328 off_t end_offset
= f
->next
->offset
+ f
->next
->length
;
330 if (f_end_offset
< end_offset
) {
331 /* We end before end of the following fragment, though.
332 * So try to append overlapping part of that fragment
334 nf
= frag_realloc(f
, end_offset
- f
->offset
);
340 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
342 f
->offset
+ f
->length
- f
->next
->offset
))
345 memcpy(f
->data
+ f
->length
,
346 f
->next
->data
+ f_end_offset
- f
->next
->offset
,
347 end_offset
- f_end_offset
);
349 enlarge_entry(cached
, end_offset
- f_end_offset
);
350 f
->length
= f
->real_length
= end_offset
- f
->offset
;
354 /* We will just discard this, it's complete subset of
355 * our new fragment. */
356 if (memcmp(f
->data
+ f
->next
->offset
- f
->offset
,
362 /* Remove the fragment, it influences our new one! */
364 enlarge_entry(cached
, -nf
->length
);
370 /* Note that this function is maybe overcommented, but I'm certainly not
371 * unhappy from that. */
373 add_fragment(struct cache_entry
*cached
, off_t offset
,
374 const unsigned char *data
, ssize_t length
)
376 struct fragment
*f
, *nf
;
380 if (!length
) return 0;
382 end_offset
= offset
+ length
;
383 if (cached
->length
< end_offset
)
384 cached
->length
= end_offset
;
386 /* id marks each entry, and change each time it's modified,
387 * used in HTML renderer. */
388 cached
->cache_id
= id_counter
++;
390 /* Possibly insert the new data in the middle of existing fragment. */
391 foreach (f
, cached
->frag
) {
393 off_t f_end_offset
= f
->offset
+ f
->length
;
395 /* No intersection? */
396 if (f
->offset
> offset
) break;
397 if (f_end_offset
< offset
) continue;
399 if (end_offset
> f_end_offset
) {
400 /* Overlap - we end further than original fragment. */
402 if (end_offset
- f
->offset
<= f
->real_length
) {
403 /* We fit here, so let's enlarge it by delta of
404 * old and new end.. */
405 enlarge_entry(cached
, end_offset
- f_end_offset
);
406 /* ..and length is now total length. */
407 f
->length
= end_offset
- f
->offset
;
409 ret
= 1; /* It was enlarged. */
411 /* We will reduce fragment length only to the
412 * starting non-interjecting size and add new
413 * fragment directly after this one. */
414 f
->length
= offset
- f
->offset
;
419 } /* else We are subset of original fragment. */
421 /* Copy the stuff over there. */
422 memcpy(f
->data
+ offset
- f
->offset
, data
, length
);
424 remove_overlaps(cached
, f
, &trunc
);
426 /* We truncate the entry even if the data contents is the
427 * same as what we have in the fragment, because that does
428 * not mean that what is going to follow won't differ, This
429 * is a serious problem when rendering HTML frame with onload
430 * snippets - we "guess" the rest of the document here,
431 * interpret the snippet, then it turns out in the real
432 * document the snippet is different and we are in trouble.
434 * Debugging this took me about 1.5 day (really), the diff with
435 * all the debugging print commands amounted about 20kb (gdb
436 * wasn't much useful since it stalled the download, de facto
437 * eliminating the bad behaviour). */
438 truncate_entry(cached
, end_offset
, 0);
440 dump_frags(cached
, "add_fragment");
445 /* Make up new fragment. */
446 nf
= frag_alloc(CACHE_PAD(length
));
451 nf
->real_length
= CACHE_PAD(length
);
452 memcpy(nf
->data
, data
, length
);
453 add_at_pos(f
->prev
, nf
);
455 enlarge_entry(cached
, length
);
457 remove_overlaps(cached
, nf
, &trunc
);
458 if (trunc
) truncate_entry(cached
, end_offset
, 0);
460 dump_frags(cached
, "add_fragment");
465 /* Try to defragment the cache entry. Defragmentation will not be possible
466 * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
467 * and bytes 201-300 in the second, we must leave those two fragments separate
468 * so that the fragment for bytes 101-200 can later be inserted. However,
469 * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
470 * catenate them into one new fragment and replace the original fragments
471 * with that new fragment.
473 * If are no fragments, return NULL. If there is no fragment with byte 1,
474 * return NULL. Otherwise, return the first fragment, whether or not it was
475 * possible to fully defragment the entry. */
477 get_cache_fragment(struct cache_entry
*cached
)
479 struct fragment
*first_frag
, *adj_frag
, *frag
, *new_frag
;
482 if (list_empty(cached
->frag
))
485 first_frag
= cached
->frag
.next
;
486 if (first_frag
->offset
)
489 /* Only one fragment so no defragmentation is needed */
490 if (list_is_singleton(cached
->frag
))
493 /* Find the first pair of fragments with a gap in between. Only
494 * fragments up to the first gap can be defragmented. */
495 for (adj_frag
= first_frag
->next
; adj_frag
!= (void *) &cached
->frag
;
496 adj_frag
= adj_frag
->next
) {
497 long gap
= adj_frag
->offset
498 - (adj_frag
->prev
->offset
+ adj_frag
->prev
->length
);
501 if (gap
== 0) continue;
503 INTERNAL("fragments overlap");
507 /* There is a gap between the first two fragments, so we can't
508 * defragment anything. */
509 if (adj_frag
== first_frag
->next
)
512 /* Calculate the length of the defragmented fragment. */
513 for (new_frag_len
= 0, frag
= first_frag
;
516 new_frag_len
+= frag
->length
;
518 /* XXX: If the defragmentation fails because of allocation failure,
519 * fall back to return the first fragment and pretend all is well. */
520 /* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
521 * the code this extended version of the old defrag_entry() is supposed
522 * to replace. --jonas */
523 new_frag
= frag_alloc(new_frag_len
);
525 return first_frag
->length
? first_frag
: NULL
;
527 new_frag
->length
= new_frag_len
;
528 new_frag
->real_length
= new_frag_len
;
530 for (new_frag_len
= 0, frag
= first_frag
;
533 struct fragment
*tmp
= frag
;
535 memcpy(new_frag
->data
+ new_frag_len
, frag
->data
, frag
->length
);
536 new_frag_len
+= frag
->length
;
543 add_to_list(cached
->frag
, new_frag
);
545 dump_frags(cached
, "get_cache_fragment");
551 delete_fragment(struct cache_entry
*cached
, struct fragment
*f
)
553 while ((void *) f
!= &cached
->frag
) {
554 struct fragment
*tmp
= f
->next
;
556 enlarge_entry(cached
, -f
->length
);
564 truncate_entry(struct cache_entry
*cached
, off_t offset
, int final
)
568 if (cached
->length
> offset
) {
569 cached
->length
= offset
;
570 cached
->incomplete
= 1;
573 foreach (f
, cached
->frag
) {
574 off_t size
= offset
- f
->offset
;
576 /* XXX: is zero length fragment really legal here ? --Zas */
577 assert(f
->length
>= 0);
579 if (size
>= f
->length
) continue;
582 enlarge_entry(cached
, -(f
->length
- size
));
588 nf
= frag_realloc(f
, f
->length
);
593 f
->real_length
= f
->length
;
600 delete_fragment(cached
, f
);
602 dump_frags(cached
, "truncate_entry");
608 free_entry_to(struct cache_entry
*cached
, off_t offset
)
612 foreach (f
, cached
->frag
) {
613 if (f
->offset
+ f
->length
<= offset
) {
614 struct fragment
*tmp
= f
;
616 enlarge_entry(cached
, -f
->length
);
620 } else if (f
->offset
< offset
) {
621 off_t size
= offset
- f
->offset
;
623 enlarge_entry(cached
, -size
);
625 memmove(f
->data
, f
->data
+ size
, f
->length
);
632 delete_entry_content(struct cache_entry
*cached
)
634 enlarge_entry(cached
, -cached
->data_size
);
636 while (cached
->frag
.next
!= (void *) &cached
->frag
) {
637 struct fragment
*f
= cached
->frag
.next
;
642 cached
->cache_id
= id_counter
++;
644 cached
->incomplete
= 1;
646 mem_free_set(&cached
->last_modified
, NULL
);
647 mem_free_set(&cached
->etag
, NULL
);
651 done_cache_entry(struct cache_entry
*cached
)
653 assertm(!is_object_used(cached
), "deleting locked cache entry");
654 assertm(!is_entry_used(cached
), "deleting loading cache entry");
656 delete_entry_content(cached
);
658 if (cached
->box_item
) done_listbox_item(&cache_browser
, cached
->box_item
);
660 if (cached
->uri
) done_uri(cached
->uri
);
661 if (cached
->proxy_uri
) done_uri(cached
->proxy_uri
);
662 if (cached
->redirect
) done_uri(cached
->redirect
);
664 mem_free_if(cached
->head
);
665 mem_free_if(cached
->content_type
);
666 mem_free_if(cached
->last_modified
);
667 mem_free_if(cached
->ssl_info
);
668 mem_free_if(cached
->encoding_info
);
669 mem_free_if(cached
->etag
);
675 delete_cache_entry(struct cache_entry
*cached
)
677 del_from_list(cached
);
679 done_cache_entry(cached
);
684 normalize_cache_entry(struct cache_entry
*cached
, off_t truncate_length
)
686 if (truncate_length
< 0)
689 truncate_entry(cached
, truncate_length
, 1);
690 cached
->incomplete
= 0;
691 cached
->preformatted
= 0;
692 cached
->seconds
= time(NULL
);
697 redirect_cache(struct cache_entry
*cached
, unsigned char *location
,
698 int get
, int incomplete
)
700 unsigned char *uristring
;
702 /* XXX: I am a little puzzled whether we should only use the cache
703 * entry's URI if it is valid. Hopefully always using it won't hurt.
704 * Currently we handle direction redirects where "/" should be appended
705 * special dunno if join_urls() could be made to handle that.
707 /* XXX: We are assuming here that incomplete will only be zero when
708 * doing these fake redirects which only purpose is to add an ending
709 * slash *cough* dirseparator to the end of the URI. */
710 if (incomplete
== 0 && dir_sep(location
[0]) && location
[1] == 0) {
711 /* To be sure use get_uri_string() to get rid of post data */
712 uristring
= get_uri_string(cached
->uri
, URI_ORIGINAL
);
713 if (uristring
) add_to_strn(&uristring
, location
);
715 uristring
= join_urls(cached
->uri
, location
);
718 if (!uristring
) return NULL
;
720 /* Only add the post data if the redirect should not use GET method.
721 * This is tied to the HTTP handling of the 303 and (if the
722 * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
724 if (cached
->uri
->post
725 && !cached
->redirect_get
727 /* XXX: Add POST_CHAR and post data assuming URI components
728 * belong to one string. */
730 /* To be certain we don't append post data twice in some
731 * conditions... --Zas */
732 assert(!strchr(uristring
, POST_CHAR
));
734 add_to_strn(&uristring
, cached
->uri
->post
- 1);
737 if (cached
->redirect
) done_uri(cached
->redirect
);
738 cached
->redirect
= get_uri(uristring
, 0);
739 cached
->redirect_get
= get
;
740 if (incomplete
>= 0) cached
->incomplete
= incomplete
;
744 return cached
->redirect
;
749 garbage_collection(int whole
)
751 struct cache_entry
*cached
;
752 /* We recompute cache_size when scanning cache entries, to ensure
754 unsigned longlong old_cache_size
= 0;
755 /* The maximal cache size tolerated by user. Note that this is only
756 * size of the "just stored" unused cache entries, used cache entries
757 * are not counted to that. */
758 unsigned longlong opt_cache_size
= get_opt_long("document.cache.memory.size", NULL
);
759 /* The low-treshold cache size. Basically, when the cache size is
760 * higher than opt_cache_size, we free the cache so that there is no
761 * more than this value in the cache anymore. This is to make sure we
762 * aren't cleaning cache too frequently when working with a lot of
763 * small cache entries but rather free more and then let it grow a
764 * little more as well. */
765 unsigned longlong gc_cache_size
= opt_cache_size
* MEMORY_CACHE_GC_PERCENT
/ 100;
766 /* The cache size we aim to reach. */
767 unsigned longlong new_cache_size
= cache_size
;
769 /* Whether we've hit an used (unfreeable) entry when collecting
771 int obstacle_entry
= 0;
775 DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
776 whole
, opt_cache_size
,gc_cache_size
);
779 if (!whole
&& cache_size
<= opt_cache_size
) return;
782 /* Scanning cache, pass #1:
783 * Weed out the used cache entries from @new_cache_size, so that we
784 * will work only with the unused entries from then on. Also ensure
785 * that @cache_size is in sync. */
787 foreach (cached
, cache_entries
) {
788 old_cache_size
+= cached
->data_size
;
790 if (!is_object_used(cached
) && !is_entry_used(cached
))
793 assertm(new_cache_size
>= cached
->data_size
,
794 "cache_size (%ld) underflow: subtracting %ld from %ld",
795 cache_size
, cached
->data_size
, new_cache_size
);
797 new_cache_size
-= cached
->data_size
;
799 if_assert_failed
{ new_cache_size
= 0; }
802 assertm(old_cache_size
== cache_size
,
803 "cache_size out of sync: %ld != (actual) %ld",
804 cache_size
, old_cache_size
);
805 if_assert_failed
{ cache_size
= old_cache_size
; }
807 if (!whole
&& new_cache_size
<= opt_cache_size
) return;
810 /* Scanning cache, pass #2:
811 * Mark potential targets for destruction, from the oldest to the
814 foreachback (cached
, cache_entries
) {
815 /* We would have shrinked enough already? */
816 if (!whole
&& new_cache_size
<= gc_cache_size
)
817 goto shrinked_enough
;
819 /* Skip used cache entries. */
820 if (is_object_used(cached
) || is_entry_used(cached
)) {
824 cached
->gc_target
= 0;
828 /* FIXME: Optionally take cached->max_age into consideration,
829 * but that will probably complicate things too much. We'd have
830 * to sort entries so prioritize removing the oldest entries. */
832 assertm(new_cache_size
>= cached
->data_size
,
833 "cache_size (%ld) underflow: subtracting %ld from %ld",
834 cache_size
, cached
->data_size
, new_cache_size
);
836 /* Mark me for destruction, sir. */
837 cached
->gc_target
= 1;
838 new_cache_size
-= cached
->data_size
;
840 if_assert_failed
{ new_cache_size
= 0; }
843 /* If we'd free the whole cache... */
844 assertm(new_cache_size
== 0,
845 "cache_size (%ld) overflow: %ld",
846 cache_size
, new_cache_size
);
847 if_assert_failed
{ new_cache_size
= 0; }
852 /* Now turn around and start walking in the opposite direction. */
853 cached
= cached
->next
;
855 /* Something is strange when we decided all is ok before dropping any
857 if ((void *) cached
== &cache_entries
) return;
861 struct cache_entry
*entry
;
863 /* Scanning cache, pass #3:
864 * Walk back in the cache and unmark the cache entries which
865 * could still fit into the cache. */
867 /* This makes sense when the newest entry is HUGE and after it,
868 * there's just plenty of tiny entries. By this point, all the
869 * tiny entries would be marked for deletion even though it'd
870 * be enough to free the huge entry. This actually fixes that
873 for (entry
= cached
; (void *) entry
!= &cache_entries
; entry
= entry
->next
) {
874 unsigned longlong newer_cache_size
= new_cache_size
+ entry
->data_size
;
876 if (newer_cache_size
> gc_cache_size
)
879 new_cache_size
= newer_cache_size
;
880 entry
->gc_target
= 0;
885 /* Scanning cache, pass #4:
886 * Destroy the marked entries. So sad, but that's life, bro'. */
888 for (; (void *) cached
!= &cache_entries
; ) {
889 cached
= cached
->next
;
890 if (cached
->prev
->gc_target
)
891 delete_cache_entry(cached
->prev
);
896 if ((whole
|| !obstacle_entry
) && cache_size
> gc_cache_size
) {
897 DBG("garbage collection doesn't work, cache size %ld > %ld, "
898 "document.cache.memory.size set to: %ld bytes",
899 cache_size
, gc_cache_size
,
900 get_opt_long("document.cache.memory.size", NULL
));