make module cache just about everything, but never return items
[httpd-crcsyncproxy.git] / crccache / mod_crccache_client.c
blobfddad209b79f69919fd076c0993e28217f0d7e60
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
32 #include <assert.h>
34 #include <apr-1.0/apr_file_io.h>
35 #include <apr-1.0/apr_strings.h>
36 #include <apr-1.0/apr_base64.h>
37 #include <apr-1.0/apr_lib.h>
38 #include <apr-1.0/apr_date.h>
39 #include "ap_provider.h"
40 #include "util_filter.h"
41 #include "util_script.h"
42 #include "util_charset.h"
43 #include <http_log.h>
44 #include <http_protocol.h>
46 #include "crccache.h"
47 #include "ap_wrapper.h"
48 #include <crcsync/crcsync.h>
49 #include <zlib.h>
51 #include "mod_crccache_client.h"
53 static ap_filter_rec_t *crccache_decode_filter_handle;
55 /* Handles for cache filters, resolved at startup to eliminate
56 * a name-to-function mapping on each request
58 static ap_filter_rec_t *cache_save_filter_handle;
59 static ap_filter_rec_t *cache_save_subreq_filter_handle;
60 static ap_filter_rec_t *cache_out_filter_handle;
61 static ap_filter_rec_t *cache_out_subreq_filter_handle;
62 static ap_filter_rec_t *cache_remove_url_filter_handle;
65 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
67 * Flow to Find the .data file:
68 * Incoming client requests URI /foo/bar/baz
69 * Generate <hash> off of /foo/bar/baz
70 * Open <hash>.header
71 * Read in <hash>.header file (may contain Format #1 or Format #2)
72 * If format #1 (Contains a list of Vary Headers):
73 * Use each header name (from .header) with our request values (headers_in) to
74 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
75 * re-read in <hash>.header (must be format #2)
76 * read in <hash>.data
78 * Format #1:
79 * apr_uint32_t format;
80 * apr_time_t expire;
81 * apr_array_t vary_headers (delimited by CRLF)
83 * Format #2:
84 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
85 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
86 * r->headers_out (delimited by CRLF)
87 * CRLF
88 * r->headers_in (delimited by CRLF)
89 * CRLF
92 module AP_MODULE_DECLARE_DATA crccache_client_module;
93 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
96 static int cache_post_config(apr_pool_t *p, apr_pool_t *plog,
97 apr_pool_t *ptemp, server_rec *s)
99 /* This is the means by which unusual (non-unix) os's may find alternate
100 * means to run a given command (e.g. shebang/registry parsing on Win32)
102 cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
103 if (!cache_generate_key) {
104 cache_generate_key = cache_generate_key_default;
106 return OK;
111 * Local static functions
114 static char *header_file(apr_pool_t *p, crccache_client_conf *conf,
115 disk_cache_object_t *dobj, const char *name) {
116 if (!dobj->hashfile) {
117 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
118 conf->dirlength, name);
121 if (dobj->prefix) {
122 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
123 dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
124 } else {
125 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
126 CACHE_HEADER_SUFFIX, NULL);
130 static char *data_file(apr_pool_t *p, crccache_client_conf *conf,
131 disk_cache_object_t *dobj, const char *name) {
132 if (!dobj->hashfile) {
133 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
134 conf->dirlength, name);
137 if (dobj->prefix) {
138 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
139 dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
140 } else {
141 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
142 CACHE_DATA_SUFFIX, NULL);
146 static void mkdir_structure(crccache_client_conf *conf, const char *file,
147 apr_pool_t *pool) {
148 apr_status_t rv;
149 char *p;
151 for (p = (char*) file + conf->cache_root_len + 1;;) {
152 p = strchr(p, '/');
153 if (!p)
154 break;
155 *p = '\0';
157 rv = apr_dir_make(file, APR_UREAD | APR_UWRITE | APR_UEXECUTE, pool);
158 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
159 /* XXX */
161 *p = '/';
162 ++p;
166 /* htcacheclean may remove directories underneath us.
167 * So, we'll try renaming three times at a cost of 0.002 seconds.
169 static apr_status_t safe_file_rename(crccache_client_conf *conf, const char *src,
170 const char *dest, apr_pool_t *pool) {
171 apr_status_t rv;
173 rv = apr_file_rename(src, dest, pool);
175 if (rv != APR_SUCCESS) {
176 int i;
178 for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
179 /* 1000 micro-seconds aka 0.001 seconds. */
180 apr_sleep(1000);
182 mkdir_structure(conf, dest, pool);
184 rv = apr_file_rename(src, dest, pool);
188 return rv;
191 static apr_status_t file_cache_el_final(disk_cache_object_t *dobj,
192 request_rec *r) {
193 /* move the data over */
194 if (dobj->tfd) {
195 apr_status_t rv;
197 apr_file_close(dobj->tfd);
199 /* This assumes that the tempfile is on the same file system
200 * as the cache_root. If not, then we need a file copy/move
201 * rather than a rename.
203 rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool);
204 if (rv != APR_SUCCESS) {
205 ap_log_error(APLOG_MARK, APLOG_WARNING, rv,r->server, "disk_cache: rename tempfile to datafile failed:"
206 " %s -> %s", dobj->tempfile, dobj->datafile);
207 apr_file_remove(dobj->tempfile, r->pool);
210 dobj->tfd = NULL;
213 return APR_SUCCESS;
216 static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj,
217 request_rec *r) {
218 /* Remove the header file and the body file. */
219 apr_file_remove(dobj->hdrsfile, r->pool);
220 apr_file_remove(dobj->datafile, r->pool);
222 /* If we opened the temporary data file, close and remove it. */
223 if (dobj->tfd) {
224 apr_file_close(dobj->tfd);
225 apr_file_remove(dobj->tempfile, r->pool);
226 dobj->tfd = NULL;
229 return APR_SUCCESS;
232 /* These two functions get and put state information into the data
233 * file for an ap_cache_el, this state information will be read
234 * and written transparent to clients of this module
236 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
237 disk_cache_object_t *dobj, request_rec *r) {
238 apr_status_t rv;
239 char *urlbuff;
240 disk_cache_info_t disk_info;
241 apr_size_t len;
243 /* read the data from the cache file */
244 len = sizeof(disk_cache_info_t);
245 rv = apr_file_read_full(fd, &disk_info, len, &len);
246 if (rv != APR_SUCCESS) {
247 return rv;
250 /* Store it away so we can get it later. */
251 dobj->disk_info = disk_info;
253 info->status = disk_info.status;
254 info->date = disk_info.date;
255 info->expire = disk_info.expire;
256 info->request_time = disk_info.request_time;
257 info->response_time = disk_info.response_time;
259 /* Note that we could optimize this by conditionally doing the palloc
260 * depending upon the size. */
261 urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
262 len = disk_info.name_len;
263 rv = apr_file_read_full(fd, urlbuff, len, &len);
264 if (rv != APR_SUCCESS) {
265 return rv;
267 urlbuff[disk_info.name_len] = '\0';
269 /* check that we have the same URL */
270 /* Would strncmp be correct? */
271 if (strcmp(urlbuff, dobj->name) != 0) {
272 return APR_EGENERAL;
275 return APR_SUCCESS;
278 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
279 apr_array_header_t *varray, const char *oldkey) {
280 struct iovec *iov;
281 int i, k;
282 int nvec;
283 const char *header;
284 const char **elts;
286 nvec = (varray->nelts * 2) + 1;
287 iov = apr_palloc(p, sizeof(struct iovec) * nvec);
288 elts = (const char **) varray->elts;
290 /* TODO:
291 * - Handle multiple-value headers better. (sort them?)
292 * - Handle Case in-sensitive Values better.
293 * This isn't the end of the world, since it just lowers the cache
294 * hit rate, but it would be nice to fix.
296 * The majority are case insenstive if they are values (encoding etc).
297 * Most of rfc2616 is case insensitive on header contents.
299 * So the better solution may be to identify headers which should be
300 * treated case-sensitive?
301 * HTTP URI's (3.2.3) [host and scheme are insensitive]
302 * HTTP method (5.1.1)
303 * HTTP-date values (3.3.1)
304 * 3.7 Media Types [exerpt]
305 * The type, subtype, and parameter attribute names are case-
306 * insensitive. Parameter values might or might not be case-sensitive,
307 * depending on the semantics of the parameter name.
308 * 4.20 Except [exerpt]
309 * Comparison of expectation values is case-insensitive for unquoted
310 * tokens (including the 100-continue token), and is case-sensitive for
311 * quoted-string expectation-extensions.
314 for (i = 0, k = 0; i < varray->nelts; i++) {
315 header = apr_table_get(headers, elts[i]);
316 if (!header) {
317 header = "";
319 iov[k].iov_base = (char*) elts[i];
320 iov[k].iov_len = strlen(elts[i]);
321 k++;
322 iov[k].iov_base = (char*) header;
323 iov[k].iov_len = strlen(header);
324 k++;
326 iov[k].iov_base = (char*) oldkey;
327 iov[k].iov_len = strlen(oldkey);
328 k++;
330 return apr_pstrcatv(p, iov, k, NULL);
333 static int array_alphasort(const void *fn1, const void *fn2) {
334 return strcmp(*(char**) fn1, *(char**) fn2);
337 static void tokens_to_array(apr_pool_t *p, const char *data,
338 apr_array_header_t *arr) {
339 char *token;
341 while ((token = ap_get_list_item(p, &data)) != NULL) {
342 *((const char **) apr_array_push(arr)) = token;
345 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
346 qsort((void *) arr->elts, arr->nelts, sizeof(char *), array_alphasort);
350 * Hook and mod_cache callback functions
352 int create_entity(cache_handle_t *h, request_rec *r, const char *key,
353 apr_off_t len) {
354 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
355 &crccache_client_module);
356 cache_object_t *obj;
357 disk_cache_object_t *dobj;
359 if (conf->cache_root == NULL) {
360 return DECLINED;
363 /* Allocate and initialize cache_object_t and disk_cache_object_t */
364 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
365 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
367 obj->key = apr_pstrdup(r->pool, key);
369 dobj->name = obj->key;
370 dobj->prefix = NULL;
371 /* Save the cache root */
372 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
373 dobj->root_len = conf->cache_root_len;
374 dobj->datafile = data_file(r->pool, conf, dobj, key);
375 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
376 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
378 return OK;
381 int open_entity(cache_handle_t *h, request_rec *r, const char *key) {
382 apr_uint32_t format;
383 apr_size_t len;
384 const char *nkey;
385 apr_status_t rc;
386 static int error_logged = 0;
387 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
388 &crccache_client_module);
389 apr_finfo_t finfo;
390 cache_object_t *obj;
391 cache_info *info;
392 disk_cache_object_t *dobj;
393 int flags;
394 h->cache_obj = NULL;
396 /* Look up entity keyed to 'url' */
397 if (conf->cache_root == NULL) {
398 if (!error_logged) {
399 error_logged = 1;
400 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
401 "disk_cache: Cannot cache files to disk without a CacheRootClient specified.");
403 return DECLINED;
406 /* Create and init the cache object */
407 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
408 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
410 info = &(obj->info);
412 /* Open the headers file */
413 dobj->prefix = NULL;
415 /* Save the cache root */
416 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
417 dobj->root_len = conf->cache_root_len;
419 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
420 flags = APR_READ|APR_BINARY|APR_BUFFERED;
421 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
422 if (rc != APR_SUCCESS) {
423 return DECLINED;
426 /* read the format from the cache file */
427 len = sizeof(format);
428 apr_file_read_full(dobj->hfd, &format, len, &len);
430 if (format == VARY_FORMAT_VERSION) {
431 apr_array_header_t* varray;
432 apr_time_t expire;
434 len = sizeof(expire);
435 apr_file_read_full(dobj->hfd, &expire, len, &len);
437 varray = apr_array_make(r->pool, 5, sizeof(char*));
438 rc = read_array(r, varray, dobj->hfd);
439 if (rc != APR_SUCCESS) {
440 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
441 "disk_cache: Cannot parse vary header file: %s",
442 dobj->hdrsfile);
443 return DECLINED;
445 apr_file_close(dobj->hfd);
447 nkey = regen_key(r->pool, r->headers_in, varray, key);
449 dobj->hashfile = NULL;
450 dobj->prefix = dobj->hdrsfile;
451 dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
453 flags = APR_READ|APR_BINARY|APR_BUFFERED;
454 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
455 if (rc != APR_SUCCESS) {
456 return DECLINED;
459 else if (format != DISK_FORMAT_VERSION) {
460 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
461 "cache_disk: File '%s' has a version mismatch. File had version: %d.",
462 dobj->hdrsfile, format);
463 return DECLINED;
465 else {
466 apr_off_t offset = 0;
467 /* This wasn't a Vary Format file, so we must seek to the
468 * start of the file again, so that later reads work.
470 apr_file_seek(dobj->hfd, APR_SET, &offset);
471 nkey = key;
474 obj->key = nkey;
475 dobj->key = nkey;
476 dobj->name = key;
477 dobj->datafile = data_file(r->pool, conf, dobj, nkey);
478 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
480 /* Open the data file */
481 flags = APR_READ|APR_BINARY;
482 #ifdef APR_SENDFILE_ENABLED
483 flags |= APR_SENDFILE_ENABLED;
484 #endif
485 rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool);
486 if (rc != APR_SUCCESS) {
487 /* XXX: Log message */
488 return DECLINED;
491 rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
492 if (rc == APR_SUCCESS) {
493 dobj->file_size = finfo.size;
496 /* Read the bytes to setup the cache_info fields */
497 rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r);
498 if (rc != APR_SUCCESS) {
499 /* XXX log message */
500 return DECLINED;
503 /* Initialize the cache_handle callback functions */
504 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
505 "disk_cache: Recalled cached URL info header %s", dobj->name);
506 return OK;
509 int remove_entity(cache_handle_t *h) {
510 /* Null out the cache object pointer so next time we start from scratch */
511 h->cache_obj = NULL;
513 return OK;
516 int remove_url(cache_handle_t *h, apr_pool_t *p) {
517 apr_status_t rc;
518 disk_cache_object_t *dobj;
520 /* Get disk cache object from cache handle */
521 dobj = (disk_cache_object_t *) h->cache_obj->vobj;
522 if (!dobj) {
523 return DECLINED;
526 /* Delete headers file */
527 if (dobj->hdrsfile) {
528 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
529 "disk_cache: Deleting %s from cache.", dobj->hdrsfile);
531 rc = apr_file_remove(dobj->hdrsfile, p);
532 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
533 /* Will only result in an output if httpd is started with -e debug.
534 * For reason see log_error_core for the case s == NULL.
536 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
537 "disk_cache: Failed to delete headers file %s from cache.",
538 dobj->hdrsfile);
539 return DECLINED;
543 /* Delete data file */
544 if (dobj->datafile) {
545 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
546 "disk_cache: Deleting %s from cache.", dobj->datafile);
548 rc = apr_file_remove(dobj->datafile, p);
549 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
550 /* Will only result in an output if httpd is started with -e debug.
551 * For reason see log_error_core for the case s == NULL.
553 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
554 "disk_cache: Failed to delete data file %s from cache.",
555 dobj->datafile);
556 return DECLINED;
560 /* now delete directories as far as possible up to our cache root */
561 if (dobj->root) {
562 const char *str_to_copy;
564 str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->datafile;
565 if (str_to_copy) {
566 char *dir, *slash, *q;
568 dir = apr_pstrdup(p, str_to_copy);
570 /* remove filename */
571 slash = strrchr(dir, '/');
572 *slash = '\0';
575 * now walk our way back to the cache root, delete everything
576 * in the way as far as possible
578 * Note: due to the way we constructed the file names in
579 * header_file and data_file, we are guaranteed that the
580 * cache_root is suffixed by at least one '/' which will be
581 * turned into a terminating null by this loop. Therefore,
582 * we won't either delete or go above our cache root.
584 for (q = dir + dobj->root_len; *q; ) {
585 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
586 "disk_cache: Deleting directory %s from cache",
587 dir);
589 rc = apr_dir_remove(dir, p);
590 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
591 break;
593 slash = strrchr(q, '/');
594 *slash = '\0';
599 return OK;
602 apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
603 apr_file_t *file) {
604 char w[MAX_STRING_LEN];
605 int p;
606 apr_status_t rv;
608 while (1) {
609 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
610 if (rv != APR_SUCCESS) {
611 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
612 "Premature end of vary array.");
613 return rv;
616 p = strlen(w);
617 if (p> 0 && w[p - 1] == '\n') {
618 if (p> 1 && w[p - 2] == CR) {
619 w[p - 2] = '\0';
621 else {
622 w[p - 1] = '\0';
626 /* If we've finished reading the array, break out of the loop. */
627 if (w[0] == '\0') {
628 break;
631 *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
634 return APR_SUCCESS;
637 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr) {
638 int i;
639 apr_status_t rv;
640 struct iovec iov[2];
641 apr_size_t amt;
642 const char **elts;
644 elts = (const char **) arr->elts;
646 for (i = 0; i < arr->nelts; i++) {
647 iov[0].iov_base = (char*) elts[i];
648 iov[0].iov_len = strlen(elts[i]);
649 iov[1].iov_base = CRLF;
650 iov[1].iov_len = sizeof(CRLF) - 1;
652 rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
653 &amt);
654 if (rv != APR_SUCCESS) {
655 return rv;
659 iov[0].iov_base = CRLF;
660 iov[0].iov_len = sizeof(CRLF) - 1;
662 return apr_file_writev(fd, (const struct iovec *) &iov, 1,
663 &amt);
666 apr_status_t read_table(cache_handle_t *handle, request_rec *r,
667 apr_table_t *table, apr_file_t *file) {
668 char w[MAX_STRING_LEN];
669 char *l;
670 int p;
671 apr_status_t rv;
673 while (1) {
675 /* ### What about APR_EOF? */
676 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
677 if (rv != APR_SUCCESS) {
678 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
679 "Premature end of cache headers.");
680 return rv;
683 /* Delete terminal (CR?)LF */
685 p = strlen(w);
686 /* Indeed, the host's '\n':
687 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
688 -- whatever the script generates.
690 if (p> 0 && w[p - 1] == '\n') {
691 if (p> 1 && w[p - 2] == CR) {
692 w[p - 2] = '\0';
694 else {
695 w[p - 1] = '\0';
699 /* If we've finished reading the headers, break out of the loop. */
700 if (w[0] == '\0') {
701 break;
704 #if APR_CHARSET_EBCDIC
705 /* Chances are that we received an ASCII header text instead of
706 * the expected EBCDIC header lines. Try to auto-detect:
708 if (!(l = strchr(w, ':'))) {
709 int maybeASCII = 0, maybeEBCDIC = 0;
710 unsigned char *cp, native;
711 apr_size_t inbytes_left, outbytes_left;
713 for (cp = w; *cp != '\0'; ++cp) {
714 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
715 if (apr_isprint(*cp) && !apr_isprint(native))
716 ++maybeEBCDIC;
717 if (!apr_isprint(*cp) && apr_isprint(native))
718 ++maybeASCII;
720 if (maybeASCII> maybeEBCDIC) {
721 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
722 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
723 r->filename);
724 inbytes_left = outbytes_left = cp - w;
725 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
726 w, &inbytes_left, w, &outbytes_left);
729 #endif /*APR_CHARSET_EBCDIC*/
731 /* if we see a bogus header don't ignore it. Shout and scream */
732 if (!(l = strchr(w, ':'))) {
733 return APR_EGENERAL;
736 *l++ = '\0';
737 while (*l && apr_isspace(*l)) {
738 ++l;
741 apr_table_add(table, w, l);
744 return APR_SUCCESS;
748 * Clean-up memory used by helper libraries, that don't know about apr_palloc
749 * and that (probably) use classical malloc/free
751 apr_status_t deflate_ctx_cleanup(void *data)
753 crccache_client_ctx *ctx = (crccache_client_ctx *)data;
755 if (ctx != NULL)
757 if (ctx->decompression_state != DECOMPRESSION_ENDED)
759 inflateEnd(ctx->decompression_stream);
760 ctx->decompression_state = DECOMPRESSION_ENDED;
763 return APR_SUCCESS;
768 * Reads headers from a buffer and returns an array of headers.
769 * Returns NULL on file error
770 * This routine tries to deal with too long lines and continuation lines.
771 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
772 * Is that okay, or should they be collapsed where possible?
774 apr_status_t recall_headers(cache_handle_t *h, request_rec *r) {
775 const char *data;
776 apr_size_t len;
777 apr_bucket *e;
778 unsigned i;
779 int z_RC;
781 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
783 /* This case should not happen... */
784 if (!dobj->hfd) {
785 /* XXX log message */
786 return APR_NOTFOUND;
789 h->req_hdrs = apr_table_make(r->pool, 20);
790 h->resp_hdrs = apr_table_make(r->pool, 20);
792 /* Call routine to read the header lines/status line */
793 read_table(h, r, h->resp_hdrs, dobj->hfd);
794 read_table(h, r, h->req_hdrs, dobj->hfd);
796 // TODO: We only really want to add our block hashes if the cache is not fresh
797 // TODO: We could achieve that by adding a filter here on sending the request
798 // and then doing all of this in the filter 'JIT'
799 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, r->pool,
800 r->connection->bucket_alloc);
802 /* read */
803 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
805 // this will be rounded down, but thats okay
806 // TODO: I think that we should just add % to the trailing block, otherwise our extra block
807 // is always limited to max of BLOCK_COUNT size.
808 size_t blocksize = len/FULL_BLOCK_COUNT;
809 size_t tail_block_size = len % FULL_BLOCK_COUNT;
810 size_t block_count_including_final_block = FULL_BLOCK_COUNT + (tail_block_size != 0);
811 // sanity check for very small files
812 if (blocksize> 4)
814 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes",FULL_BLOCK_COUNT,blocksize);
816 crccache_client_ctx * ctx;
817 ctx = apr_pcalloc(r->pool, sizeof(*ctx));
818 ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
819 ctx->block_size = blocksize;
820 ctx->tail_block_size = tail_block_size;
821 ctx->state = DECODING_NEW_SECTION;
822 ctx->cached_bucket = e;
824 // Setup inflate for decompressing non-matched literal data
825 ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream)));
826 ctx->decompression_stream->zalloc = Z_NULL;
827 ctx->decompression_stream->zfree = Z_NULL;
828 ctx->decompression_stream->opaque = Z_NULL;
829 ctx->decompression_stream->avail_in = 0;
830 ctx->decompression_stream->next_in = Z_NULL;
831 z_RC = inflateInit(ctx->decompression_stream);
832 if (z_RC != Z_OK)
834 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server,
835 "Can not initialize decompression engine, return code: %d", z_RC);
836 return APR_SUCCESS;
838 ctx->decompression_state = DECOMPRESSION_INITIALIZED;
840 // Register a cleanup function to cleanup internal libz resources
841 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
842 apr_pool_cleanup_null);
844 // All OK to go for the crcsync decoding: add the headers
845 // and set-up the decoding filter
847 // add one for base 64 overflow and null terminator
848 char hash_set[HASH_HEADER_SIZE+1];
850 uint64_t crcs[block_count_including_final_block];
851 crc_of_blocks(data, len, blocksize, HASH_SIZE, crcs);
853 // swap to network byte order
854 for (i = 0; i < block_count_including_final_block;++i)
856 htobe64(crcs[i]);
859 apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0]));
860 hash_set[HASH_HEADER_SIZE] = '\0';
861 //apr_bucket_delete(e);
863 // TODO; bit of a safety margin here, could calculate exact size
864 const int block_header_max_size = HASH_HEADER_SIZE+32;
865 char block_header_txt[block_header_max_size];
866 snprintf(block_header_txt, block_header_max_size,"fs=%zu, h=%s",len,hash_set);
867 apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt);
868 // TODO: do we want to cache the hashes here?
870 // initialise the context for our sha1 digest of the unencoded response
871 EVP_MD_CTX_init(&ctx->mdctx);
872 const EVP_MD *md = EVP_sha1();
873 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
875 // we want to add a filter here so that we can decode the response.
876 // we need access to the original cached data when we get the response as
877 // we need that to fill in the matched blocks.
878 ap_add_output_filter_handle(crccache_decode_filter_handle,
879 ctx, r, r->connection);
881 // TODO: why is hfd file only closed in this case?
882 apr_file_close(dobj->hfd);
884 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
885 "crccache_client: Recalled headers for URL %s", dobj->name);
886 return APR_SUCCESS;
889 apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p,
890 apr_bucket_brigade *bb) {
891 apr_bucket *e;
892 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
894 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, p,
895 bb->bucket_alloc);
897 APR_BRIGADE_INSERT_HEAD(bb, e);
898 e = apr_bucket_eos_create(bb->bucket_alloc);
899 APR_BRIGADE_INSERT_TAIL(bb, e);
901 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, "crccache_client: Recalled body for URL %s", dobj->name);
902 return APR_SUCCESS;
905 apr_status_t store_table(apr_file_t *fd, apr_table_t *table) {
906 int i;
907 apr_status_t rv;
908 struct iovec iov[4];
909 apr_size_t amt;
910 apr_table_entry_t *elts;
912 elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
913 for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
914 if (elts[i].key != NULL) {
915 iov[0].iov_base = elts[i].key;
916 iov[0].iov_len = strlen(elts[i].key);
917 iov[1].iov_base = ": ";
918 iov[1].iov_len = sizeof(": ") - 1;
919 iov[2].iov_base = elts[i].val;
920 iov[2].iov_len = strlen(elts[i].val);
921 iov[3].iov_base = CRLF;
922 iov[3].iov_len = sizeof(CRLF) - 1;
924 rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
925 &amt);
926 if (rv != APR_SUCCESS) {
927 return rv;
931 iov[0].iov_base = CRLF;
932 iov[0].iov_len = sizeof(CRLF) - 1;
933 rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
934 &amt);
935 return rv;
938 apr_status_t store_headers(cache_handle_t *h, request_rec *r,
939 cache_info *info) {
940 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
941 &crccache_client_module);
943 apr_status_t rv;
944 apr_size_t amt;
945 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
947 disk_cache_info_t disk_info;
948 struct iovec iov[2];
950 /* This is flaky... we need to manage the cache_info differently */
951 h->cache_obj->info = *info;
953 if (r->headers_out) {
954 const char *tmp;
956 tmp = apr_table_get(r->headers_out, "Vary");
958 if (tmp) {
959 apr_array_header_t* varray;
960 apr_uint32_t format = VARY_FORMAT_VERSION;
962 /* If we were initially opened as a vary format, rollback
963 * that internal state for the moment so we can recreate the
964 * vary format hints in the appropriate directory.
966 if (dobj->prefix) {
967 dobj->hdrsfile = dobj->prefix;
968 dobj->prefix = NULL;
971 mkdir_structure(conf, dobj->hdrsfile, r->pool);
973 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
974 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
975 r->pool);
977 if (rv != APR_SUCCESS) {
978 return rv;
981 amt = sizeof(format);
982 apr_file_write(dobj->tfd, &format, &amt);
984 amt = sizeof(info->expire);
985 apr_file_write(dobj->tfd, &info->expire, &amt);
987 varray = apr_array_make(r->pool, 6, sizeof(char*));
988 tokens_to_array(r->pool, tmp, varray);
990 store_array(dobj->tfd, varray);
992 apr_file_close(dobj->tfd);
994 dobj->tfd = NULL;
996 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile,
997 r->pool);
998 if (rv != APR_SUCCESS) {
999 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
1000 "disk_cache: rename tempfile to varyfile failed: %s -> %s",
1001 dobj->tempfile, dobj->hdrsfile);
1002 apr_file_remove(dobj->tempfile, r->pool);
1003 return rv;
1006 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
1007 tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
1008 dobj->prefix = dobj->hdrsfile;
1009 dobj->hashfile = NULL;
1010 dobj->datafile = data_file(r->pool, conf, dobj, tmp);
1011 dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);
1016 rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
1017 APR_CREATE | APR_WRITE | APR_BINARY |
1018 APR_BUFFERED | APR_EXCL, r->pool);
1020 if (rv != APR_SUCCESS) {
1021 return rv;
1024 disk_info.format = DISK_FORMAT_VERSION;
1025 disk_info.date = info->date;
1026 disk_info.expire = info->expire;
1027 disk_info.entity_version = dobj->disk_info.entity_version++;
1028 disk_info.request_time = info->request_time;
1029 disk_info.response_time = info->response_time;
1030 disk_info.status = info->status;
1032 disk_info.name_len = strlen(dobj->name);
1034 iov[0].iov_base = (void*)&disk_info;
1035 iov[0].iov_len = sizeof(disk_cache_info_t);
1036 iov[1].iov_base = (void*)dobj->name;
1037 iov[1].iov_len = disk_info.name_len;
1039 rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt);
1040 if (rv != APR_SUCCESS) {
1041 return rv;
1044 if (r->headers_out) {
1045 apr_table_t *headers_out;
1047 headers_out = ap_cache_cacheable_hdrs_out(r->pool, r->headers_out,
1048 r->server);
1050 if (!apr_table_get(headers_out, "Content-Type")
1051 && r->content_type) {
1052 apr_table_setn(headers_out, "Content-Type",
1053 ap_make_content_type(r, r->content_type));
1056 headers_out = apr_table_overlay(r->pool, headers_out,
1057 r->err_headers_out);
1058 rv = store_table(dobj->hfd, headers_out);
1059 if (rv != APR_SUCCESS) {
1060 return rv;
1064 /* Parse the vary header and dump those fields from the headers_in. */
1065 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1066 if (r->headers_in) {
1067 apr_table_t *headers_in;
1069 headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in,
1070 r->server);
1071 rv = store_table(dobj->hfd, headers_in);
1072 if (rv != APR_SUCCESS) {
1073 return rv;
1077 apr_file_close(dobj->hfd); /* flush and close */
1079 /* Remove old file with the same name. If remove fails, then
1080 * perhaps we need to create the directory tree where we are
1081 * about to write the new headers file.
1083 rv = apr_file_remove(dobj->hdrsfile, r->pool);
1084 if (rv != APR_SUCCESS) {
1085 mkdir_structure(conf, dobj->hdrsfile, r->pool);
1088 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool);
1089 if (rv != APR_SUCCESS) {
1090 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
1091 "disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
1092 dobj->tempfile, dobj->hdrsfile);
1093 apr_file_remove(dobj->tempfile, r->pool);
1094 return rv;
1097 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
1099 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1100 "disk_cache: Stored headers for URL %s", dobj->name);
1101 return APR_SUCCESS;
1104 apr_status_t store_body(cache_handle_t *h, request_rec *r,
1105 apr_bucket_brigade *bb) {
1106 apr_bucket *e;
1107 apr_status_t rv;
1109 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1110 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
1111 &crccache_client_module);
1113 /* We write to a temp file and then atomically rename the file over
1114 * in file_cache_el_final().
1116 if (!dobj->tfd) {
1117 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, APR_CREATE | APR_WRITE
1118 | APR_BINARY | APR_BUFFERED | APR_EXCL, r->pool);
1119 if (rv != APR_SUCCESS) {
1120 return rv;
1122 dobj->file_size = 0;
1125 for (e = APR_BRIGADE_FIRST(bb); e != APR_BRIGADE_SENTINEL(bb); e = APR_BUCKET_NEXT(e)) {
1126 const char *str;
1127 apr_size_t length, written;
1128 rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1129 if (rv != APR_SUCCESS) {
1130 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1131 "cache_disk: Error when reading bucket for URL %s",
1132 h->cache_obj->key);
1133 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1134 file_cache_errorcleanup(dobj, r);
1135 return rv;
1137 rv = apr_file_write_full(dobj->tfd, str, length, &written);
1138 if (rv != APR_SUCCESS) {
1139 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1140 "cache_disk: Error when writing cache file for URL %s",
1141 h->cache_obj->key);
1142 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1143 file_cache_errorcleanup(dobj, r);
1144 return rv;
1146 dobj->file_size += written;
1147 if (dobj->file_size> conf->maxfs) {
1148 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1149 "cache_disk: URL %s failed the size check "
1150 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
1151 h->cache_obj->key, dobj->file_size, conf->maxfs);
1152 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1153 file_cache_errorcleanup(dobj, r);
1154 return APR_EGENERAL;
1158 /* Was this the final bucket? If yes, close the temp file and perform
1159 * sanity checks.
1161 if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
1162 if (r->connection->aborted || r->no_cache) {
1163 ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
1164 "disk_cache: Discarding body for URL %s "
1165 "because connection has been aborted.",
1166 h->cache_obj->key);
1167 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1168 file_cache_errorcleanup(dobj, r);
1169 return APR_EGENERAL;
1171 if (dobj->file_size < conf->minfs) {
1172 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1173 "cache_disk: URL %s failed the size check "
1174 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
1175 h->cache_obj->key, dobj->file_size, conf->minfs);
1176 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1177 file_cache_errorcleanup(dobj, r);
1178 return APR_EGENERAL;
1181 /* All checks were fine. Move tempfile to final destination */
1182 /* Link to the perm file, and close the descriptor */
1183 file_cache_el_final(dobj, r);
1184 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1185 "disk_cache: Body for URL %s cached.", dobj->name);
1188 return APR_SUCCESS;
1192 * CACHE_DECODE filter
1193 * ----------------
1195 * Deliver cached content (headers and body) up the stack.
1197 static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
1198 apr_bucket *e;
1199 request_rec *r = f->r;
1200 // TODO: set up context type struct
1201 crccache_client_ctx *ctx = f->ctx;
1203 // if this is the first pass in decoding we should check the headers etc
1204 // and fix up those headers that we modified as part of the encoding
1205 if (ctx->headers_checked == 0)
1207 ctx->headers_checked = 1;
1209 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1210 "CRCSYNC retuned status code (%d)", r->status);
1212 // TODO: make this work if we have multiple encodings
1213 const char * content_encoding;
1214 content_encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
1215 if (content_encoding == NULL || strcmp(CRCCACHE_ENCODING, content_encoding)
1216 != 0) {
1217 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1218 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding?content_encoding:"NULL");
1219 ap_remove_output_filter(f);
1220 return ap_pass_brigade(f->next, bb);
1222 // TODO: Remove crcsync from the content encoding header
1224 // TODO: we should only set the status back to 200 if there are no
1225 // other instance codings used
1226 //r->status = 200;
1227 //r->status_line = "200 OK";
1230 // TODO: Fix up the etag as well
1235 /* Do nothing if asked to filter nothing. */
1236 if (APR_BRIGADE_EMPTY(bb)) {
1237 return ap_pass_brigade(f->next, bb);
1240 /* We require that we have a context already, otherwise we dont have our cached file
1241 * to fill in the gaps with.
1243 if (!ctx) {
1244 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1245 "No context available %s", r->uri);
1246 ap_remove_output_filter(f);
1247 return ap_pass_brigade(f->next, bb);
1250 while (!APR_BRIGADE_EMPTY(bb))
1252 const char *data;
1253 apr_size_t len;
1255 e = APR_BRIGADE_FIRST(bb);
1257 if (APR_BUCKET_IS_EOS(e)) {
1259 /* Remove EOS from the old list, and insert into the new. */
1260 APR_BUCKET_REMOVE(e);
1261 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1263 /* This filter is done once it has served up its content */
1264 ap_remove_output_filter(f);
1266 // TODO: check strong hash here
1267 unsigned md_len;
1268 unsigned char md_value[EVP_MAX_MD_SIZE];
1269 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
1270 EVP_MD_CTX_cleanup(&ctx->mdctx);
1272 if (memcmp(md_value, ctx->md_value_rx, 20) != 0)
1274 // TODO: Actually signal this to the user
1275 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED");
1277 else
1279 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED");
1282 /* Okay, we've seen the EOS.
1283 * Time to pass it along down the chain.
1285 return ap_pass_brigade(f->next, ctx->bb);
1288 if (APR_BUCKET_IS_FLUSH(e)) {
1289 apr_status_t rv;
1291 /* Remove flush bucket from old brigade anf insert into the new. */
1292 APR_BUCKET_REMOVE(e);
1293 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1294 rv = ap_pass_brigade(f->next, ctx->bb);
1295 if (rv != APR_SUCCESS) {
1296 return rv;
1298 continue;
1301 if (APR_BUCKET_IS_METADATA(e)) {
1303 * Remove meta data bucket from old brigade and insert into the
1304 * new.
1306 APR_BUCKET_REMOVE(e);
1307 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1308 continue;
1311 /* read */
1312 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
1313 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
1315 apr_size_t consumed_bytes = 0;
1316 while (consumed_bytes < len)
1318 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
1319 // no guaruntee that our buckets line up with our encoding sections
1320 // so we need a processing state machine stored in our context
1321 switch (ctx->state)
1323 case DECODING_NEW_SECTION:
1325 // check if we have a compressed section or a block section
1326 if (data[consumed_bytes] == ENCODING_COMPRESSED)
1327 ctx->state = DECODING_COMPRESSED;
1328 else if (data[consumed_bytes] == ENCODING_BLOCK)
1329 ctx->state = DECODING_BLOCK_HEADER;
1330 else if (data[consumed_bytes] == ENCODING_LITERAL)
1331 ctx->state = DECODING_LITERAL;
1332 else if (data[consumed_bytes] == ENCODING_HASH)
1334 ctx->state = DECODING_HASH;
1335 ctx->md_value_rx_count = 0;
1337 else
1339 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
1340 "CRCSYNC-DECODE, unknown section %d(%c)",data[consumed_bytes],data[consumed_bytes]);
1341 apr_brigade_cleanup(bb);
1342 return APR_EGENERAL;
1344 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
1345 consumed_bytes++;
1346 break;
1348 case DECODING_BLOCK_HEADER:
1350 unsigned char block_number = data[consumed_bytes];
1351 consumed_bytes++;
1352 ctx->state = DECODING_NEW_SECTION;
1354 // TODO: Output the indicated block here
1355 size_t current_block_size = block_number < FULL_BLOCK_COUNT ? ctx->block_size : ctx->tail_block_size;
1356 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1357 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number, current_block_size);
1359 char * buf = apr_palloc(r->pool, current_block_size);
1360 const char * source_data;
1361 size_t source_len;
1362 apr_bucket_read(ctx->cached_bucket, &source_data, &source_len, APR_BLOCK_READ);
1363 assert(block_number < (FULL_BLOCK_COUNT + (ctx->tail_block_size != 0)));
1364 memcpy(buf,&source_data[block_number*ctx->block_size],current_block_size);
1365 // update our sha1 hash
1366 EVP_DigestUpdate(&ctx->mdctx, buf, current_block_size);
1367 apr_bucket * b = apr_bucket_pool_create(buf, current_block_size, r->pool, f->c->bucket_alloc);
1368 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
1369 break;
1371 case DECODING_HASH:
1373 unsigned avail_in = len - consumed_bytes;
1374 // 20 bytes for an SHA1 hash
1375 unsigned needed = MIN(20-ctx->md_value_rx_count, avail_in);
1376 memcpy(&ctx->md_value_rx[ctx->md_value_rx_count], &data[consumed_bytes],needed);
1377 ctx->md_value_rx_count+=needed;
1378 consumed_bytes += needed;
1379 if (ctx->md_value_rx_count == 20)
1381 ctx->state = DECODING_NEW_SECTION;
1383 break;
1385 case DECODING_COMPRESSED:
1387 unsigned char decompressed_data_buf[30000];
1388 int z_RC;
1389 z_stream *strm = ctx->decompression_stream;
1390 strm->avail_in = len - consumed_bytes;
1391 strm->next_in = (Bytef *)(data + consumed_bytes);
1392 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
1393 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
1394 do {
1395 strm->avail_out = sizeof(decompressed_data_buf);
1396 strm->next_out = decompressed_data_buf;
1397 uInt avail_in_pre_inflate = strm->avail_in;
1398 z_RC = inflate(strm, Z_NO_FLUSH);
1399 if (z_RC == Z_NEED_DICT || z_RC == Z_DATA_ERROR || z_RC == Z_MEM_ERROR)
1401 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server, "CRCSYNC-DECODE inflate error: %d", z_RC);
1402 apr_brigade_cleanup(bb);
1403 return APR_EGENERAL;
1405 int have = sizeof(decompressed_data_buf) - strm->avail_out;
1406 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1407 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
1408 z_RC, avail_in_pre_inflate - strm->avail_in, have);
1409 if (have)
1411 // write output data
1412 char * buf = apr_palloc(r->pool, have);
1413 memcpy(buf,decompressed_data_buf,have);
1414 EVP_DigestUpdate(&ctx->mdctx, buf, have);
1415 apr_bucket * b = apr_bucket_pool_create(buf, have, r->pool, f->c->bucket_alloc);
1416 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
1418 } while (strm->avail_out == 0);
1419 consumed_bytes = len - strm->avail_in;
1420 if (z_RC == Z_STREAM_END)
1422 ctx->state = DECODING_NEW_SECTION;
1423 inflateReset(strm);
1425 break;
1427 default:
1429 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
1430 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx->state);
1431 apr_brigade_cleanup(bb);
1432 return APR_EGENERAL; // TODO: figure out how to pass the error on to the client
1435 APR_BUCKET_REMOVE(e);
1439 apr_brigade_cleanup(bb);
1440 return APR_SUCCESS;
1443 static void *create_config(apr_pool_t *p, server_rec *s) {
1444 crccache_client_conf *conf = apr_pcalloc(p, sizeof(crccache_client_conf));
1445 /* array of URL prefixes for which caching is enabled */
1446 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
1447 /* array of URL prefixes for which caching is enabled */
1448 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
1449 /* array of URL prefixes for which caching is disabled */
1450 conf->cachedisable = apr_array_make(p, 10, sizeof(struct cache_disable));
1451 /* maximum time to cache a document */
1452 conf->maxex = DEFAULT_CACHE_MAXEXPIRE;
1453 conf->maxex_set = 0;
1454 conf->minex = DEFAULT_CACHE_MINEXPIRE;
1455 conf->minex_set = 0;
1456 /* default time to cache a document */
1457 conf->defex = DEFAULT_CACHE_EXPIRE;
1458 conf->defex_set = 0;
1459 /* factor used to estimate Expires date from LastModified date */
1460 conf->factor = DEFAULT_CACHE_LMFACTOR;
1461 conf->factor_set = 0;
1462 conf->no_last_mod_ignore_set = 0;
1463 conf->no_last_mod_ignore = 0;
1464 conf->ignorecachecontrol = 0;
1465 conf->ignorecachecontrol_set = 0;
1466 conf->store_private = 0;
1467 conf->store_private_set = 0;
1468 conf->store_nostore = 0;
1469 conf->store_nostore_set = 0;
1470 /* array of headers that should not be stored in cache */
1471 conf->ignore_headers = apr_array_make(p, 10, sizeof(char *));
1472 conf->ignore_headers_set = CACHE_IGNORE_HEADERS_UNSET;
1473 /* flag indicating that query-string should be ignored when caching */
1474 conf->ignorequerystring = 0;
1475 conf->ignorequerystring_set = 0;
1477 /* XXX: Set default values */
1478 conf->dirlevels = DEFAULT_DIRLEVELS;
1479 conf->dirlength = DEFAULT_DIRLENGTH;
1480 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
1481 conf->minfs = DEFAULT_MIN_FILE_SIZE;
1483 conf->cache_root = NULL;
1484 conf->cache_root_len = 0;
1486 return conf;
1490 * mod_disk_cache configuration directives handlers.
1492 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
1493 const char *arg) {
1494 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1495 &crccache_client_module);
1496 conf->cache_root = arg;
1497 conf->cache_root_len = strlen(arg);
1498 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1500 return NULL;
1504 * Consider eliminating the next two directives in favor of
1505 * Ian's prime number hash...
1506 * key = hash_fn( r->uri)
1507 * filename = "/key % prime1 /key %prime2/key %prime3"
1509 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
1510 const char *arg) {
1511 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1512 &crccache_client_module);
1513 int val = atoi(arg);
1514 if (val < 1)
1515 return "CacheDirLevelsClient value must be an integer greater than 0";
1516 if (val * conf->dirlength > CACHEFILE_LEN)
1517 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1518 conf->dirlevels = val;
1519 return NULL;
1521 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
1522 const char *arg) {
1523 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1524 &crccache_client_module);
1525 int val = atoi(arg);
1526 if (val < 1)
1527 return "CacheDirLengthClient value must be an integer greater than 0";
1528 if (val * conf->dirlevels > CACHEFILE_LEN)
1529 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1531 conf->dirlength = val;
1532 return NULL;
1535 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
1536 const char *arg) {
1537 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1538 &crccache_client_module);
1540 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->minfs
1541 < 0) {
1542 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1544 return NULL;
1547 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
1548 const char *arg) {
1549 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1550 &crccache_client_module);
1551 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->maxfs
1552 < 0) {
1553 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1555 return NULL;
1558 static const char *add_crc_client_enable(cmd_parms *parms, void *dummy,
1559 const char *type,
1560 const char *url)
1562 crccache_client_conf *conf;
1563 struct cache_enable *new;
1565 if (*type == '/') {
1566 return apr_psprintf(parms->pool,
1567 "provider (%s) starts with a '/'. Are url and provider switched?",
1568 type);
1571 conf =
1572 (crccache_client_conf *)ap_get_module_config(parms->server->module_config,
1573 &crccache_client_module);
1574 new = apr_array_push(conf->cacheenable);
1575 new->type = type;
1576 if (apr_uri_parse(parms->pool, url, &(new->url))) {
1577 return NULL;
1579 if (new->url.path) {
1580 new->pathlen = strlen(new->url.path);
1581 } else {
1582 new->pathlen = 1;
1583 new->url.path = "/";
1585 return NULL;
1588 static const command_rec disk_cache_cmds[] =
1590 AP_INIT_TAKE2("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A cache type and partial URL prefix below which caching is enabled"),
1591 AP_INIT_TAKE1("CacheRootClient", set_cache_root, NULL, RSRC_CONF,"The directory to store cache files"),
1592 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"),
1593 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"),
1594 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"),
1595 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"),
1596 { NULL }
1599 int ap_run_insert_filter(request_rec *r);
1601 int cache_url_handler(request_rec *r, int lookup)
1603 apr_status_t rv;
1604 const char *auth;
1605 cache_request_rec *cache;
1606 crccache_client_conf *conf;
1607 apr_bucket_brigade *out;
1608 ap_filter_t *next;
1609 ap_filter_rec_t *cache_out_handle;
1611 /* Delay initialization until we know we are handling a GET */
1612 if (r->method_number != M_GET) {
1613 return DECLINED;
1616 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
1617 &crccache_client_module);
1619 /* make space for the per request config */
1620 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1621 &crccache_client_module);
1622 if (!cache) {
1623 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
1624 ap_set_module_config(r->request_config, &crccache_client_module, cache);
1628 * Are we allowed to serve cached info at all?
1631 /* find certain cache controlling headers */
1632 auth = apr_table_get(r->headers_in, "Authorization");
1634 /* First things first - does the request allow us to return
1635 * cached information at all? If not, just decline the request.
1637 if (auth) {
1638 return DECLINED;
1642 * Try to serve this request from the cache.
1644 * If no existing cache file (DECLINED)
1645 * add cache_save filter
1646 * If cached file (OK)
1647 * clear filter stack
1648 * add cache_out filter
1649 * return OK
1651 rv = cache_select(r);
1652 if (rv != OK) {
1653 if (rv == DECLINED) {
1654 if (!lookup) {
1657 * Add cache_save filter to cache this request. Choose
1658 * the correct filter by checking if we are a subrequest
1659 * or not.
1661 if (r->main) {
1662 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1663 r->server,
1664 "Adding CACHE_SAVE_SUBREQ filter for %s",
1665 r->uri);
1666 ap_add_output_filter_handle(cache_save_subreq_filter_handle,
1667 NULL, r, r->connection);
1669 else {
1670 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1671 r->server, "Adding CACHE_SAVE filter for %s",
1672 r->uri);
1673 ap_add_output_filter_handle(cache_save_filter_handle,
1674 NULL, r, r->connection);
1677 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1678 "Adding CACHE_REMOVE_URL filter for %s",
1679 r->uri);
1681 /* Add cache_remove_url filter to this request to remove a
1682 * stale cache entry if needed. Also put the current cache
1683 * request rec in the filter context, as the request that
1684 * is available later during running the filter maybe
1685 * different due to an internal redirect.
1687 cache->remove_url_filter =
1688 ap_add_output_filter_handle(cache_remove_url_filter_handle,
1689 cache, r, r->connection);
1691 else {
1692 if (cache->stale_headers) {
1693 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1694 r->server, "Restoring request headers for %s",
1695 r->uri);
1697 r->headers_in = cache->stale_headers;
1700 /* Delete our per-request configuration. */
1701 ap_set_module_config(r->request_config, &crccache_client_module, NULL);
1704 else {
1705 /* error */
1706 ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
1707 "cache: error returned while checking for cached "
1708 "file by cache");
1710 return DECLINED;
1713 /* if we are a lookup, we are exiting soon one way or another; Restore
1714 * the headers. */
1715 if (lookup) {
1716 if (cache->stale_headers) {
1717 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1718 "Restoring request headers.");
1719 r->headers_in = cache->stale_headers;
1722 /* Delete our per-request configuration. */
1723 ap_set_module_config(r->request_config, &crccache_client_module, NULL);
1726 rv = ap_meets_conditions(r);
1727 if (rv != OK) {
1728 /* If we are a lookup, we have to return DECLINED as we have no
1729 * way of knowing if we will be able to serve the content.
1731 if (lookup) {
1732 return DECLINED;
1735 /* Return cached status. */
1736 return rv;
1739 /* If we're a lookup, we can exit now instead of serving the content. */
1740 if (lookup) {
1741 return OK;
1744 /* Serve up the content */
1746 /* We are in the quick handler hook, which means that no output
1747 * filters have been set. So lets run the insert_filter hook.
1749 ap_run_insert_filter(r);
1752 * Add cache_out filter to serve this request. Choose
1753 * the correct filter by checking if we are a subrequest
1754 * or not.
1756 if (r->main) {
1757 cache_out_handle = cache_out_subreq_filter_handle;
1759 else {
1760 cache_out_handle = cache_out_filter_handle;
1762 ap_add_output_filter_handle(cache_out_handle, NULL, r, r->connection);
1765 * Remove all filters that are before the cache_out filter. This ensures
1766 * that we kick off the filter stack with our cache_out filter being the
1767 * first in the chain. This make sense because we want to restore things
1768 * in the same manner as we saved them.
1769 * There may be filters before our cache_out filter, because
1771 * 1. We call ap_set_content_type during cache_select. This causes
1772 * Content-Type specific filters to be added.
1773 * 2. We call the insert_filter hook. This causes filters e.g. like
1774 * the ones set with SetOutputFilter to be added.
1776 next = r->output_filters;
1777 while (next && (next->frec != cache_out_handle)) {
1778 ap_remove_output_filter(next);
1779 next = next->next;
1782 /* kick off the filter stack */
1783 out = apr_brigade_create(r->pool, r->connection->bucket_alloc);
1784 rv = ap_pass_brigade(r->output_filters, out);
1785 if (rv != APR_SUCCESS) {
1786 if (rv != AP_FILTER_ERROR) {
1787 ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
1788 "cache: error returned while trying to return "
1789 "cached data");
1791 return rv;
1794 return OK;
1800 * CACHE_OUT filter
1801 * ----------------
1803 * Deliver cached content (headers and body) up the stack.
1805 int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
1807 request_rec *r = f->r;
1808 cache_request_rec *cache;
1810 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1811 &crccache_client_module);
1813 if (!cache) {
1814 /* user likely configured CACHE_OUT manually; they should use mod_cache
1815 * configuration to do that */
1816 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1817 "CACHE_OUT enabled unexpectedly");
1818 ap_remove_output_filter(f);
1819 return ap_pass_brigade(f->next, bb);
1822 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1823 "cache: running CACHE_OUT filter");
1825 /* restore status of cached response */
1826 /* XXX: This exposes a bug in mem_cache, since it does not
1827 * restore the status into it's handle. */
1828 r->status = cache->handle->cache_obj->info.status;
1830 /* recall_headers() was called in cache_select() */
1831 recall_body(cache->handle, r->pool, bb);
1833 /* This filter is done once it has served up its content */
1834 ap_remove_output_filter(f);
1836 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1837 "cache: serving %s", r->uri);
1838 return ap_pass_brigade(f->next, bb);
1843 * CACHE_SAVE filter
1844 * ---------------
1846 * Decide whether or not this content should be cached.
1847 * If we decide no it should not:
1848 * remove the filter from the chain
1849 * If we decide yes it should:
1850 * Have we already started saving the response?
1851 * If we have started, pass the data to the storage manager via store_body
1852 * Otherwise:
1853 * Check to see if we *can* save this particular response.
1854 * If we can, call cache_create_entity() and save the headers and body
1855 * Finally, pass the data to the next filter (the network or whatever)
1858 int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
1860 int rv = !OK;
1861 request_rec *r = f->r;
1862 cache_request_rec *cache;
1863 crccache_client_conf *conf;
1864 //const char *cc_out, *cl;
1865 const char *cl;
1866 const char *exps, /* *lastmods,*/ *dates;//, *etag;
1867 apr_time_t exp, date,/* lastmod,*/ now;
1868 apr_off_t size;
1869 cache_info *info = NULL;
1870 char *reason;
1871 apr_pool_t *p;
1873 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
1874 &crccache_client_module);
1876 /* Setup cache_request_rec */
1877 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1878 &crccache_client_module);
1879 if (!cache) {
1880 /* user likely configured CACHE_SAVE manually; they should really use
1881 * mod_cache configuration to do that
1883 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
1884 ap_set_module_config(r->request_config, &crccache_client_module, cache);
1887 reason = NULL;
1888 p = r->pool;
1890 * Pass Data to Cache
1891 * ------------------
1892 * This section passes the brigades into the cache modules, but only
1893 * if the setup section (see below) is complete.
1895 if (cache->block_response) {
1896 /* We've already sent down the response and EOS. So, ignore
1897 * whatever comes now.
1899 return APR_SUCCESS;
1902 /* have we already run the cachability check and set up the
1903 * cached file handle?
1905 if (cache->in_checked) {
1906 /* pass the brigades into the cache, then pass them
1907 * up the filter stack
1909 rv = store_body(cache->handle, r, in);
1910 if (rv != APR_SUCCESS) {
1911 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1912 "cache: Cache provider's store_body failed!");
1913 ap_remove_output_filter(f);
1915 return ap_pass_brigade(f->next, in);
1919 * Setup Data in Cache
1920 * -------------------
1921 * This section opens the cache entity and sets various caching
1922 * parameters, and decides whether this URL should be cached at
1923 * all. This section is* run before the above section.
1926 /* read expiry date; if a bad date, then leave it so the client can
1927 * read it
1929 exps = apr_table_get(r->err_headers_out, "Expires");
1930 if (exps == NULL) {
1931 exps = apr_table_get(r->headers_out, "Expires");
1933 if (exps != NULL) {
1934 if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) {
1935 exps = NULL;
1938 else {
1939 exp = APR_DATE_BAD;
1941 // we dont care about these
1942 #if 0
1943 /* read the last-modified date; if the date is bad, then delete it */
1944 lastmods = apr_table_get(r->err_headers_out, "Last-Modified");
1945 if (lastmods == NULL) {
1946 lastmods = apr_table_get(r->headers_out, "Last-Modified");
1948 if (lastmods != NULL) {
1949 lastmod = apr_date_parse_http(lastmods);
1950 if (lastmod == APR_DATE_BAD) {
1951 lastmods = NULL;
1954 else {
1955 lastmod = APR_DATE_BAD;
1958 /* read the etag and cache-control from the entity */
1959 etag = apr_table_get(r->err_headers_out, "Etag");
1960 if (etag == NULL) {
1961 etag = apr_table_get(r->headers_out, "Etag");
1963 cc_out = apr_table_get(r->err_headers_out, "Cache-Control");
1964 if (cc_out == NULL) {
1965 cc_out = apr_table_get(r->headers_out, "Cache-Control");
1967 #endif
1969 * what responses should we not cache?
1971 * At this point we decide based on the response headers whether it
1972 * is appropriate _NOT_ to cache the data from the server. There are
1973 * a whole lot of conditions that prevent us from caching this data.
1974 * They are tested here one by one to be clear and unambiguous.
1976 if (r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE
1977 && r->status != HTTP_MULTIPLE_CHOICES
1978 && r->status != HTTP_MOVED_PERMANENTLY
1979 && r->status != HTTP_NOT_MODIFIED) {
1980 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
1981 * We don't cache 206, because we don't (yet) cache partial responses.
1982 * We include 304 Not Modified here too as this is the origin server
1983 * telling us to serve the cached copy.
1985 #if 0
1986 if (exps != NULL || cc_out != NULL) {
1987 /* We are also allowed to cache any response given that it has a
1988 * valid Expires or Cache Control header. If we find a either of
1989 * those here, we pass request through the rest of the tests. From
1990 * the RFC:
1992 * A response received with any other status code (e.g. status
1993 * codes 302 and 307) MUST NOT be returned in a reply to a
1994 * subsequent request unless there are cache-control directives or
1995 * another header(s) that explicitly allow it. For example, these
1996 * include the following: an Expires header (section 14.21); a
1997 * "max-age", "s-maxage", "must-revalidate", "proxy-revalidate",
1998 * "public" or "private" cache-control directive (section 14.9).
2001 else {
2002 reason = apr_psprintf(p, "Response status %d", r->status);
2004 #endif
2007 if (reason) {
2008 /* noop */
2010 #if 0
2011 else if (exps != NULL && exp == APR_DATE_BAD) {
2012 /* if a broken Expires header is present, don't cache it */
2013 reason = apr_pstrcat(p, "Broken expires header: ", exps, NULL);
2015 else if (exp != APR_DATE_BAD && exp < r->request_time)
2017 /* if a Expires header is in the past, don't cache it */
2018 reason = "Expires header already expired, not cacheable";
2020 else if (!conf->ignorequerystring && r->parsed_uri.query && exps == NULL &&
2021 !ap_cache_liststr(NULL, cc_out, "max-age", NULL)) {
2022 /* if a query string is present but no explicit expiration time,
2023 * don't cache it (RFC 2616/13.9 & 13.2.1)
2025 reason = "Query string present but no explicit expiration time";
2027 #endif
2028 else if (r->status == HTTP_NOT_MODIFIED &&
2029 !cache->handle && !cache->stale_handle) {
2030 /* if the server said 304 Not Modified but we have no cache
2031 * file - pass this untouched to the user agent, it's not for us.
2033 reason = "HTTP Status 304 Not Modified";
2035 #if 0
2036 else if (r->status == HTTP_OK && lastmods == NULL && etag == NULL
2037 && (exps == NULL) && (conf->no_last_mod_ignore ==0)) {
2038 /* 200 OK response from HTTP/1.0 and up without Last-Modified,
2039 * Etag, or Expires headers.
2041 /* Note: mod-include clears last_modified/expires/etags - this
2042 * is why we have an optional function for a key-gen ;-)
2044 reason = "No Last-Modified, Etag, or Expires headers";
2046 #endif
2047 else if (r->header_only && !cache->stale_handle) {
2048 /* Forbid HEAD requests unless we have it cached already */
2049 reason = "HTTP HEAD request";
2051 #if 0
2052 else if (!conf->store_nostore &&
2053 ap_cache_liststr(NULL, cc_out, "no-store", NULL)) {
2054 /* RFC2616 14.9.2 Cache-Control: no-store response
2055 * indicating do not cache, or stop now if you are
2056 * trying to cache it.
2058 /* FIXME: The Cache-Control: no-store could have come in on a 304,
2059 * FIXME: while the original request wasn't conditional. IOW, we
2060 * FIXME: made the the request conditional earlier to revalidate
2061 * FIXME: our cached response.
2063 reason = "Cache-Control: no-store present";
2065 else if (!conf->store_private &&
2066 ap_cache_liststr(NULL, cc_out, "private", NULL)) {
2067 /* RFC2616 14.9.1 Cache-Control: private response
2068 * this object is marked for this user's eyes only. Behave
2069 * as a tunnel.
2071 /* FIXME: See above (no-store) */
2072 reason = "Cache-Control: private present";
2074 else if (apr_table_get(r->headers_in, "Authorization") != NULL
2075 && !(ap_cache_liststr(NULL, cc_out, "s-maxage", NULL)
2076 || ap_cache_liststr(NULL, cc_out, "must-revalidate", NULL)
2077 || ap_cache_liststr(NULL, cc_out, "public", NULL))) {
2078 /* RFC2616 14.8 Authorisation:
2079 * if authorisation is included in the request, we don't cache,
2080 * but we can cache if the following exceptions are true:
2081 * 1) If Cache-Control: s-maxage is included
2082 * 2) If Cache-Control: must-revalidate is included
2083 * 3) If Cache-Control: public is included
2085 reason = "Authorization required";
2088 else if (ap_cache_liststr(NULL,
2089 apr_table_get(r->headers_out, "Vary"),
2090 "*", NULL)) {
2091 reason = "Vary header contains '*'";
2093 else if (apr_table_get(r->subprocess_env, "no-cache") != NULL) {
2094 reason = "environment variable 'no-cache' is set";
2096 else if (r->no_cache) {
2097 /* or we've been asked not to cache it above */
2098 reason = "r->no_cache present";
2100 #endif
2101 if (reason) {
2102 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2103 "cache: %s not cached. Reason: %s", r->unparsed_uri,
2104 reason);
2106 /* remove this filter from the chain */
2107 ap_remove_output_filter(f);
2109 /* ship the data up the stack */
2110 return ap_pass_brigade(f->next, in);
2113 /* Make it so that we don't execute this path again. */
2114 cache->in_checked = 1;
2116 /* Set the content length if known.
2118 cl = apr_table_get(r->err_headers_out, "Content-Length");
2119 if (cl == NULL) {
2120 cl = apr_table_get(r->headers_out, "Content-Length");
2122 if (cl) {
2123 char *errp;
2124 if (apr_strtoff(&size, cl, &errp, 10) || *errp || size < 0) {
2125 cl = NULL; /* parse error, see next 'if' block */
2129 if (!cl) {
2130 /* if we don't get the content-length, see if we have all the
2131 * buckets and use their length to calculate the size
2133 apr_bucket *e;
2134 int all_buckets_here=0;
2135 int unresolved_length = 0;
2136 size=0;
2137 for (e = APR_BRIGADE_FIRST(in);
2138 e != APR_BRIGADE_SENTINEL(in);
2139 e = APR_BUCKET_NEXT(e))
2141 if (APR_BUCKET_IS_EOS(e)) {
2142 all_buckets_here=1;
2143 break;
2145 if (APR_BUCKET_IS_FLUSH(e)) {
2146 unresolved_length = 1;
2147 continue;
2149 if (e->length == (apr_size_t)-1) {
2150 break;
2152 size += e->length;
2154 if (!all_buckets_here) {
2155 size = -1;
2159 /* It's safe to cache the response.
2161 * There are two possiblities at this point:
2162 * - cache->handle == NULL. In this case there is no previously
2163 * cached entity anywhere on the system. We must create a brand
2164 * new entity and store the response in it.
2165 * - cache->stale_handle != NULL. In this case there is a stale
2166 * entity in the system which needs to be replaced by new
2167 * content (unless the result was 304 Not Modified, which means
2168 * the cached entity is actually fresh, and we should update
2169 * the headers).
2172 /* Did we have a stale cache entry that really is stale?
2174 * Note that for HEAD requests, we won't get the body, so for a stale
2175 * HEAD request, we don't remove the entity - instead we let the
2176 * CACHE_REMOVE_URL filter remove the stale item from the cache.
2178 if (cache->stale_handle) {
2179 if (r->status == HTTP_NOT_MODIFIED) {
2180 /* Oh, hey. It isn't that stale! Yay! */
2181 cache->handle = cache->stale_handle;
2182 info = &cache->handle->cache_obj->info;
2183 rv = OK;
2185 else if (!r->header_only) {
2186 /* Oh, well. Toss it. */
2187 remove_entity(cache->stale_handle);
2188 /* Treat the request as if it wasn't conditional. */
2189 cache->stale_handle = NULL;
2191 * Restore the original request headers as they may be needed
2192 * by further output filters like the byterange filter to make
2193 * the correct decisions.
2195 r->headers_in = cache->stale_headers;
2199 /* no cache handle, create a new entity only for non-HEAD requests */
2200 if (!cache->handle && !r->header_only) {
2201 rv = cache_create_entity(r, size);
2202 info = apr_pcalloc(r->pool, sizeof(cache_info));
2203 /* We only set info->status upon the initial creation. */
2204 info->status = r->status;
2207 if (rv != OK) {
2208 /* Caching layer declined the opportunity to cache the response */
2209 ap_remove_output_filter(f);
2210 return ap_pass_brigade(f->next, in);
2213 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2214 "cache: Caching url: %s", r->unparsed_uri);
2216 /* We are actually caching this response. So it does not
2217 * make sense to remove this entity any more.
2219 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2220 "cache: Removing CACHE_REMOVE_URL filter.");
2221 ap_remove_output_filter(cache->remove_url_filter);
2224 * We now want to update the cache file header information with
2225 * the new date, last modified, expire and content length and write
2226 * it away to our cache file. First, we determine these values from
2227 * the response, using heuristics if appropriate.
2229 * In addition, we make HTTP/1.1 age calculations and write them away
2230 * too.
2233 /* Read the date. Generate one if one is not supplied */
2234 dates = apr_table_get(r->err_headers_out, "Date");
2235 if (dates == NULL) {
2236 dates = apr_table_get(r->headers_out, "Date");
2238 if (dates != NULL) {
2239 info->date = apr_date_parse_http(dates);
2241 else {
2242 info->date = APR_DATE_BAD;
2245 now = apr_time_now();
2246 if (info->date == APR_DATE_BAD) { /* No, or bad date */
2247 /* no date header (or bad header)! */
2248 info->date = now;
2250 date = info->date;
2252 /* set response_time for HTTP/1.1 age calculations */
2253 info->response_time = now;
2255 /* get the request time */
2256 info->request_time = r->request_time;
2257 #if 0
2258 /* check last-modified date */
2259 if (lastmod != APR_DATE_BAD && lastmod > date) {
2260 /* if it's in the future, then replace by date */
2261 lastmod = date;
2262 lastmods = dates;
2263 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0,
2264 r->server,
2265 "cache: Last modified is in the future, "
2266 "replacing with now");
2269 /* if no expiry date then
2270 * if Cache-Control: max-age
2271 * expiry date = date + max-age
2272 * else if lastmod
2273 * expiry date = date + min((date - lastmod) * factor, maxexpire)
2274 * else
2275 * expire date = date + defaultexpire
2277 if (exp == APR_DATE_BAD) {
2278 char *max_age_val;
2280 if (ap_cache_liststr(r->pool, cc_out, "max-age", &max_age_val) &&
2281 max_age_val != NULL) {
2282 apr_int64_t x;
2284 errno = 0;
2285 x = apr_atoi64(max_age_val);
2286 if (errno) {
2287 x = conf->defex;
2289 else {
2290 x = x * MSEC_ONE_SEC;
2292 if (x < conf->minex) {
2293 x = conf->minex;
2295 if (x > conf->maxex) {
2296 x = conf->maxex;
2298 exp = date + x;
2300 else if ((lastmod != APR_DATE_BAD) && (lastmod < date)) {
2301 /* if lastmod == date then you get 0*conf->factor which results in
2302 * an expiration time of now. This causes some problems with
2303 * freshness calculations, so we choose the else path...
2305 apr_time_t x = (apr_time_t) ((date - lastmod) * conf->factor);
2307 if (x < conf->minex) {
2308 x = conf->minex;
2310 if (x > conf->maxex) {
2311 x = conf->maxex;
2313 exp = date + x;
2315 else {
2316 exp = date + conf->defex;
2319 #endif
2320 info->expire = exp;
2322 /* We found a stale entry which wasn't really stale. */
2323 if (cache->stale_handle) {
2324 /* Load in the saved status and clear the status line. */
2325 r->status = info->status;
2326 r->status_line = NULL;
2328 /* RFC 2616 10.3.5 states that entity headers are not supposed
2329 * to be in the 304 response. Therefore, we need to combine the
2330 * response headers with the cached headers *before* we update
2331 * the cached headers.
2333 * However, before doing that, we need to first merge in
2334 * err_headers_out and we also need to strip any hop-by-hop
2335 * headers that might have snuck in.
2337 r->headers_out = ap_cache_cacheable_headers_out(r);
2339 /* Merge in our cached headers. However, keep any updated values. */
2340 ap_cache_accept_headers(cache->handle, r, 1);
2343 /* Write away header information to cache. It is possible that we are
2344 * trying to update headers for an entity which has already been cached.
2346 * This may fail, due to an unwritable cache area. E.g. filesystem full,
2347 * permissions problems or a read-only (re)mount. This must be handled
2348 * later.
2350 rv = store_headers(cache->handle, r, info);
2352 /* Did we just update the cached headers on a revalidated response?
2354 * If so, we can now decide what to serve to the client. This is done in
2355 * the same way as with a regular response, but conditions are now checked
2356 * against the cached or merged response headers.
2358 if (cache->stale_handle) {
2359 apr_bucket_brigade *bb;
2360 apr_bucket *bkt;
2361 int status;
2363 bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2365 /* Restore the original request headers and see if we need to
2366 * return anything else than the cached response (ie. the original
2367 * request was conditional).
2369 r->headers_in = cache->stale_headers;
2370 status = ap_meets_conditions(r);
2371 if (status != OK) {
2372 r->status = status;
2374 bkt = apr_bucket_flush_create(bb->bucket_alloc);
2375 APR_BRIGADE_INSERT_TAIL(bb, bkt);
2377 else {
2378 recall_body(cache->handle, r->pool, bb);
2381 cache->block_response = 1;
2383 /* Before returning we need to handle the possible case of an
2384 * unwritable cache. Rather than leaving the entity in the cache
2385 * and having it constantly re-validated, now that we have recalled
2386 * the body it is safe to try and remove the url from the cache.
2388 if (rv != APR_SUCCESS) {
2389 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2390 "cache: updating headers with store_headers failed. "
2391 "Removing cached url.");
2393 rv = remove_url(cache->stale_handle, r->pool);
2394 if (rv != OK) {
2395 /* Probably a mod_disk_cache cache area has been (re)mounted
2396 * read-only, or that there is a permissions problem.
2398 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2399 "cache: attempt to remove url from cache unsuccessful.");
2403 return ap_pass_brigade(f->next, bb);
2406 if(rv != APR_SUCCESS) {
2407 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2408 "cache: store_headers failed");
2409 ap_remove_output_filter(f);
2411 return ap_pass_brigade(f->next, in);
2414 rv = store_body(cache->handle, r, in);
2415 if (rv != APR_SUCCESS) {
2416 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2417 "cache: store_body failed");
2418 ap_remove_output_filter(f);
2421 return ap_pass_brigade(f->next, in);
2426 * CACHE_REMOVE_URL filter
2427 * ---------------
2429 * This filter gets added in the quick handler every time the CACHE_SAVE filter
2430 * gets inserted. Its purpose is to remove a confirmed stale cache entry from
2431 * the cache.
2433 * CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
2434 * the response is a canned error message, which removes the content filters
2435 * and thus the CACHE_SAVE filter from the chain.
2437 * CACHE_REMOVE_URL expects cache request rec within its context because the
2438 * request this filter runs on can be different from the one whose cache entry
2439 * should be removed, due to internal redirects.
2441 * Note that CACHE_SAVE_URL (as a content-set filter, hence run before the
2442 * protocol filters) will remove this filter if it decides to cache the file.
2443 * Therefore, if this filter is left in, it must mean we need to toss any
2444 * existing files.
2446 int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in)
2448 request_rec *r = f->r;
2449 cache_request_rec *cache;
2451 /* Setup cache_request_rec */
2452 cache = (cache_request_rec *) f->ctx;
2454 if (!cache) {
2455 /* user likely configured CACHE_REMOVE_URL manually; they should really
2456 * use mod_cache configuration to do that. So:
2457 * 1. Remove ourselves
2458 * 2. Do nothing and bail out
2460 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2461 "cache: CACHE_REMOVE_URL enabled unexpectedly");
2462 ap_remove_output_filter(f);
2463 return ap_pass_brigade(f->next, in);
2465 /* Now remove this cache entry from the cache */
2466 cache_remove_url(cache, r->pool);
2468 /* remove ourselves */
2469 ap_remove_output_filter(f);
2470 return ap_pass_brigade(f->next, in);
2474 /*static const cache_provider crccache_client_provider = { &remove_entity,
2475 &store_headers, &store_body, &recall_headers, &recall_body,
2476 &create_entity, &open_entity, &remove_url, };
2478 static void disk_cache_register_hook(apr_pool_t *p) {
2479 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
2480 "Registering crccache client module, (C) 2009, Toby Collett");
2482 /* cache initializer */
2483 /* cache handler */
2484 ap_hook_quick_handler(cache_url_handler, NULL, NULL, APR_HOOK_FIRST);
2485 /* cache filters
2486 * XXX The cache filters need to run right after the handlers and before
2487 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
2489 * Depending on the type of request (subrequest / main request) they
2490 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
2491 * filters. Thus create two filter handles for each type:
2492 * cache_save_filter_handle / cache_out_filter_handle to be used by
2493 * main requests and
2494 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
2495 * to be run by subrequest
2498 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
2499 * filter to ensure that the compressed content is stored.
2500 * Incrementing filter type by 1 ensures his happens.
2502 cache_save_filter_handle =
2503 ap_register_output_filter("CACHE_SAVE",
2504 cache_save_filter,
2505 NULL,
2506 AP_FTYPE_CONTENT_SET+1);
2508 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
2509 * handle subrequsts. Decrementing filter type by 1 ensures this
2510 * happens.
2512 cache_save_subreq_filter_handle =
2513 ap_register_output_filter("CACHE_SAVE_SUBREQ",
2514 cache_save_filter,
2515 NULL,
2516 AP_FTYPE_CONTENT_SET-1);
2518 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2519 * filter to ensure that already compressed cache objects do not
2520 * get compressed again. Incrementing filter type by 1 ensures
2521 * his happens.
2523 cache_out_filter_handle =
2524 ap_register_output_filter("CACHE_OUT",
2525 cache_out_filter,
2526 NULL,
2527 AP_FTYPE_CONTENT_SET+1);
2529 * CACHE_OUT_SUBREQ must go into the filter chain before SUBREQ_CORE to
2530 * handle subrequsts. Decrementing filter type by 1 ensures this
2531 * happens.
2533 cache_out_subreq_filter_handle =
2534 ap_register_output_filter("CACHE_OUT_SUBREQ",
2535 cache_out_filter,
2536 NULL,
2537 AP_FTYPE_CONTENT_SET-1);
2538 /* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
2539 * run even if the response is a canned error message, which
2540 * removes the content filters.
2542 cache_remove_url_filter_handle =
2543 ap_register_output_filter("CACHE_REMOVE_URL",
2544 cache_remove_url_filter,
2545 NULL,
2546 AP_FTYPE_PROTOCOL);
2548 /* cache initializer */
2549 // ap_register_provider(p, CACHE_PROVIDER_GROUP, "crccache_client", "0",
2550 // &crccache_client_provider);
2552 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2553 * filter to ensure that already compressed cache objects do not
2554 * get compressed again. Incrementing filter type by 1 ensures
2555 * his happens.
2557 crccache_decode_filter_handle = ap_register_output_filter(
2558 "CRCCACHE_DECODE", crccache_decode_filter, NULL,
2559 AP_FTYPE_CONTENT_SET + 1);
2561 ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
2565 module AP_MODULE_DECLARE_DATA crccache_client_module = {
2566 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
2567 NULL , /* merge per-directory config structures */
2568 create_config, /* create per-server config structure */
2569 NULL , /* merge per-server config structures */
2570 disk_cache_cmds, /* command apr_table_t */
2571 disk_cache_register_hook /* register hooks */