reimplement uncompressed literal support
[httpd-crcsyncproxy.git] / crccache / mod_crccache_client.c
blobf00374760ccac127020289e9a305c179e39fce58
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
32 #include <assert.h>
34 #include <apr-1.0/apr_file_io.h>
35 #include <apr-1.0/apr_strings.h>
36 #include <apr-1.0/apr_base64.h>
37 #include <apr-1.0/apr_lib.h>
38 #include <apr-1.0/apr_date.h>
39 #include "ap_provider.h"
40 #include "util_filter.h"
41 #include "util_script.h"
42 #include "util_charset.h"
43 #include <http_log.h>
44 #include <http_protocol.h>
46 #include "crccache.h"
47 #include "ap_wrapper.h"
48 #include <crcsync/crcsync.h>
49 #include <crc/crc.h>
50 #include <zlib.h>
52 #include "mod_crccache_client.h"
54 static ap_filter_rec_t *crccache_decode_filter_handle;
56 /* Handles for cache filters, resolved at startup to eliminate
57 * a name-to-function mapping on each request
59 static ap_filter_rec_t *cache_save_filter_handle;
60 static ap_filter_rec_t *cache_save_subreq_filter_handle;
61 static ap_filter_rec_t *cache_out_filter_handle;
62 static ap_filter_rec_t *cache_out_subreq_filter_handle;
63 static ap_filter_rec_t *cache_remove_url_filter_handle;
66 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
68 * Flow to Find the .data file:
69 * Incoming client requests URI /foo/bar/baz
70 * Generate <hash> off of /foo/bar/baz
71 * Open <hash>.header
72 * Read in <hash>.header file (may contain Format #1 or Format #2)
73 * If format #1 (Contains a list of Vary Headers):
74 * Use each header name (from .header) with our request values (headers_in) to
75 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
76 * re-read in <hash>.header (must be format #2)
77 * read in <hash>.data
79 * Format #1:
80 * apr_uint32_t format;
81 * apr_time_t expire;
82 * apr_array_t vary_headers (delimited by CRLF)
84 * Format #2:
85 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
86 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
87 * r->headers_out (delimited by CRLF)
88 * CRLF
89 * r->headers_in (delimited by CRLF)
90 * CRLF
93 module AP_MODULE_DECLARE_DATA crccache_client_module;
94 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
97 static int cache_post_config(apr_pool_t *p, apr_pool_t *plog,
98 apr_pool_t *ptemp, server_rec *s)
100 /* This is the means by which unusual (non-unix) os's may find alternate
101 * means to run a given command (e.g. shebang/registry parsing on Win32)
103 cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
104 if (!cache_generate_key) {
105 cache_generate_key = cache_generate_key_default;
107 return OK;
112 * Local static functions
115 static char *header_file(apr_pool_t *p, crccache_client_conf *conf,
116 disk_cache_object_t *dobj, const char *name) {
117 if (!dobj->hashfile) {
118 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
119 conf->dirlength, name);
122 if (dobj->prefix) {
123 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
124 dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
125 } else {
126 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
127 CACHE_HEADER_SUFFIX, NULL);
131 static char *data_file(apr_pool_t *p, crccache_client_conf *conf,
132 disk_cache_object_t *dobj, const char *name) {
133 if (!dobj->hashfile) {
134 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
135 conf->dirlength, name);
138 if (dobj->prefix) {
139 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
140 dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
141 } else {
142 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
143 CACHE_DATA_SUFFIX, NULL);
147 static void mkdir_structure(crccache_client_conf *conf, const char *file,
148 apr_pool_t *pool) {
149 apr_status_t rv;
150 char *p;
152 for (p = (char*) file + conf->cache_root_len + 1;;) {
153 p = strchr(p, '/');
154 if (!p)
155 break;
156 *p = '\0';
158 rv = apr_dir_make(file, APR_UREAD | APR_UWRITE | APR_UEXECUTE, pool);
159 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
160 /* XXX */
162 *p = '/';
163 ++p;
167 /* htcacheclean may remove directories underneath us.
168 * So, we'll try renaming three times at a cost of 0.002 seconds.
170 static apr_status_t safe_file_rename(crccache_client_conf *conf, const char *src,
171 const char *dest, apr_pool_t *pool) {
172 apr_status_t rv;
174 rv = apr_file_rename(src, dest, pool);
176 if (rv != APR_SUCCESS) {
177 int i;
179 for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
180 /* 1000 micro-seconds aka 0.001 seconds. */
181 apr_sleep(1000);
183 mkdir_structure(conf, dest, pool);
185 rv = apr_file_rename(src, dest, pool);
189 return rv;
192 static apr_status_t file_cache_el_final(disk_cache_object_t *dobj,
193 request_rec *r) {
194 /* move the data over */
195 if (dobj->tfd) {
196 apr_status_t rv;
198 apr_file_close(dobj->tfd);
200 /* This assumes that the tempfile is on the same file system
201 * as the cache_root. If not, then we need a file copy/move
202 * rather than a rename.
204 rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool);
205 if (rv != APR_SUCCESS) {
206 ap_log_error(APLOG_MARK, APLOG_WARNING, rv,r->server, "disk_cache: rename tempfile to datafile failed:"
207 " %s -> %s", dobj->tempfile, dobj->datafile);
208 apr_file_remove(dobj->tempfile, r->pool);
211 dobj->tfd = NULL;
214 return APR_SUCCESS;
217 static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj,
218 request_rec *r) {
219 /* Remove the header file and the body file. */
220 apr_file_remove(dobj->hdrsfile, r->pool);
221 apr_file_remove(dobj->datafile, r->pool);
223 /* If we opened the temporary data file, close and remove it. */
224 if (dobj->tfd) {
225 apr_file_close(dobj->tfd);
226 apr_file_remove(dobj->tempfile, r->pool);
227 dobj->tfd = NULL;
230 return APR_SUCCESS;
233 /* These two functions get and put state information into the data
234 * file for an ap_cache_el, this state information will be read
235 * and written transparent to clients of this module
237 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
238 disk_cache_object_t *dobj, request_rec *r) {
239 apr_status_t rv;
240 char *urlbuff;
241 disk_cache_info_t disk_info;
242 apr_size_t len;
244 /* read the data from the cache file */
245 len = sizeof(disk_cache_info_t);
246 rv = apr_file_read_full(fd, &disk_info, len, &len);
247 if (rv != APR_SUCCESS) {
248 return rv;
251 /* Store it away so we can get it later. */
252 dobj->disk_info = disk_info;
254 info->status = disk_info.status;
255 info->date = disk_info.date;
256 info->expire = disk_info.expire;
257 info->request_time = disk_info.request_time;
258 info->response_time = disk_info.response_time;
260 /* Note that we could optimize this by conditionally doing the palloc
261 * depending upon the size. */
262 urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
263 len = disk_info.name_len;
264 rv = apr_file_read_full(fd, urlbuff, len, &len);
265 if (rv != APR_SUCCESS) {
266 return rv;
268 urlbuff[disk_info.name_len] = '\0';
270 /* check that we have the same URL */
271 /* Would strncmp be correct? */
272 if (strcmp(urlbuff, dobj->name) != 0) {
273 return APR_EGENERAL;
276 return APR_SUCCESS;
279 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
280 apr_array_header_t *varray, const char *oldkey) {
281 struct iovec *iov;
282 int i, k;
283 int nvec;
284 const char *header;
285 const char **elts;
287 nvec = (varray->nelts * 2) + 1;
288 iov = apr_palloc(p, sizeof(struct iovec) * nvec);
289 elts = (const char **) varray->elts;
291 /* TODO:
292 * - Handle multiple-value headers better. (sort them?)
293 * - Handle Case in-sensitive Values better.
294 * This isn't the end of the world, since it just lowers the cache
295 * hit rate, but it would be nice to fix.
297 * The majority are case insenstive if they are values (encoding etc).
298 * Most of rfc2616 is case insensitive on header contents.
300 * So the better solution may be to identify headers which should be
301 * treated case-sensitive?
302 * HTTP URI's (3.2.3) [host and scheme are insensitive]
303 * HTTP method (5.1.1)
304 * HTTP-date values (3.3.1)
305 * 3.7 Media Types [exerpt]
306 * The type, subtype, and parameter attribute names are case-
307 * insensitive. Parameter values might or might not be case-sensitive,
308 * depending on the semantics of the parameter name.
309 * 4.20 Except [exerpt]
310 * Comparison of expectation values is case-insensitive for unquoted
311 * tokens (including the 100-continue token), and is case-sensitive for
312 * quoted-string expectation-extensions.
315 for (i = 0, k = 0; i < varray->nelts; i++) {
316 header = apr_table_get(headers, elts[i]);
317 if (!header) {
318 header = "";
320 iov[k].iov_base = (char*) elts[i];
321 iov[k].iov_len = strlen(elts[i]);
322 k++;
323 iov[k].iov_base = (char*) header;
324 iov[k].iov_len = strlen(header);
325 k++;
327 iov[k].iov_base = (char*) oldkey;
328 iov[k].iov_len = strlen(oldkey);
329 k++;
331 return apr_pstrcatv(p, iov, k, NULL);
334 static int array_alphasort(const void *fn1, const void *fn2) {
335 return strcmp(*(char**) fn1, *(char**) fn2);
338 static void tokens_to_array(apr_pool_t *p, const char *data,
339 apr_array_header_t *arr) {
340 char *token;
342 while ((token = ap_get_list_item(p, &data)) != NULL) {
343 *((const char **) apr_array_push(arr)) = token;
346 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
347 qsort((void *) arr->elts, arr->nelts, sizeof(char *), array_alphasort);
351 * Hook and mod_cache callback functions
353 int create_entity(cache_handle_t *h, request_rec *r, const char *key,
354 apr_off_t len) {
355 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
356 &crccache_client_module);
357 cache_object_t *obj;
358 disk_cache_object_t *dobj;
360 if (conf->cache_root == NULL) {
361 return DECLINED;
364 /* Allocate and initialize cache_object_t and disk_cache_object_t */
365 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
366 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
368 obj->key = apr_pstrdup(r->pool, key);
370 dobj->name = obj->key;
371 dobj->prefix = NULL;
372 /* Save the cache root */
373 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
374 dobj->root_len = conf->cache_root_len;
375 dobj->datafile = data_file(r->pool, conf, dobj, key);
376 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
377 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
379 return OK;
382 int open_entity(cache_handle_t *h, request_rec *r, const char *key) {
383 apr_uint32_t format;
384 apr_size_t len;
385 const char *nkey;
386 apr_status_t rc;
387 static int error_logged = 0;
388 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
389 &crccache_client_module);
390 apr_finfo_t finfo;
391 cache_object_t *obj;
392 cache_info *info;
393 disk_cache_object_t *dobj;
394 int flags;
395 h->cache_obj = NULL;
397 /* Look up entity keyed to 'url' */
398 if (conf->cache_root == NULL) {
399 if (!error_logged) {
400 error_logged = 1;
401 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
402 "disk_cache: Cannot cache files to disk without a CacheRootClient specified.");
404 return DECLINED;
407 /* Create and init the cache object */
408 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
409 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
411 info = &(obj->info);
413 /* Open the headers file */
414 dobj->prefix = NULL;
416 /* Save the cache root */
417 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
418 dobj->root_len = conf->cache_root_len;
420 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
421 flags = APR_READ|APR_BINARY|APR_BUFFERED;
422 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
423 if (rc != APR_SUCCESS) {
424 return DECLINED;
427 /* read the format from the cache file */
428 len = sizeof(format);
429 apr_file_read_full(dobj->hfd, &format, len, &len);
431 if (format == VARY_FORMAT_VERSION) {
432 apr_array_header_t* varray;
433 apr_time_t expire;
435 len = sizeof(expire);
436 apr_file_read_full(dobj->hfd, &expire, len, &len);
438 varray = apr_array_make(r->pool, 5, sizeof(char*));
439 rc = read_array(r, varray, dobj->hfd);
440 if (rc != APR_SUCCESS) {
441 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
442 "disk_cache: Cannot parse vary header file: %s",
443 dobj->hdrsfile);
444 return DECLINED;
446 apr_file_close(dobj->hfd);
448 nkey = regen_key(r->pool, r->headers_in, varray, key);
450 dobj->hashfile = NULL;
451 dobj->prefix = dobj->hdrsfile;
452 dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
454 flags = APR_READ|APR_BINARY|APR_BUFFERED;
455 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
456 if (rc != APR_SUCCESS) {
457 return DECLINED;
460 else if (format != DISK_FORMAT_VERSION) {
461 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
462 "cache_disk: File '%s' has a version mismatch. File had version: %d.",
463 dobj->hdrsfile, format);
464 return DECLINED;
466 else {
467 apr_off_t offset = 0;
468 /* This wasn't a Vary Format file, so we must seek to the
469 * start of the file again, so that later reads work.
471 apr_file_seek(dobj->hfd, APR_SET, &offset);
472 nkey = key;
475 obj->key = nkey;
476 dobj->key = nkey;
477 dobj->name = key;
478 dobj->datafile = data_file(r->pool, conf, dobj, nkey);
479 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
481 /* Open the data file */
482 flags = APR_READ|APR_BINARY;
483 #ifdef APR_SENDFILE_ENABLED
484 flags |= APR_SENDFILE_ENABLED;
485 #endif
486 rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool);
487 if (rc != APR_SUCCESS) {
488 /* XXX: Log message */
489 return DECLINED;
492 rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
493 if (rc == APR_SUCCESS) {
494 dobj->file_size = finfo.size;
497 /* Read the bytes to setup the cache_info fields */
498 rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r);
499 if (rc != APR_SUCCESS) {
500 /* XXX log message */
501 return DECLINED;
504 /* Initialize the cache_handle callback functions */
505 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
506 "disk_cache: Recalled cached URL info header %s", dobj->name);
507 return OK;
510 int remove_entity(cache_handle_t *h) {
511 /* Null out the cache object pointer so next time we start from scratch */
512 h->cache_obj = NULL;
514 return OK;
517 int remove_url(cache_handle_t *h, apr_pool_t *p) {
518 apr_status_t rc;
519 disk_cache_object_t *dobj;
521 /* Get disk cache object from cache handle */
522 dobj = (disk_cache_object_t *) h->cache_obj->vobj;
523 if (!dobj) {
524 return DECLINED;
527 /* Delete headers file */
528 if (dobj->hdrsfile) {
529 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
530 "disk_cache: Deleting %s from cache.", dobj->hdrsfile);
532 rc = apr_file_remove(dobj->hdrsfile, p);
533 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
534 /* Will only result in an output if httpd is started with -e debug.
535 * For reason see log_error_core for the case s == NULL.
537 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
538 "disk_cache: Failed to delete headers file %s from cache.",
539 dobj->hdrsfile);
540 return DECLINED;
544 /* Delete data file */
545 if (dobj->datafile) {
546 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
547 "disk_cache: Deleting %s from cache.", dobj->datafile);
549 rc = apr_file_remove(dobj->datafile, p);
550 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
551 /* Will only result in an output if httpd is started with -e debug.
552 * For reason see log_error_core for the case s == NULL.
554 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
555 "disk_cache: Failed to delete data file %s from cache.",
556 dobj->datafile);
557 return DECLINED;
561 /* now delete directories as far as possible up to our cache root */
562 if (dobj->root) {
563 const char *str_to_copy;
565 str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->datafile;
566 if (str_to_copy) {
567 char *dir, *slash, *q;
569 dir = apr_pstrdup(p, str_to_copy);
571 /* remove filename */
572 slash = strrchr(dir, '/');
573 *slash = '\0';
576 * now walk our way back to the cache root, delete everything
577 * in the way as far as possible
579 * Note: due to the way we constructed the file names in
580 * header_file and data_file, we are guaranteed that the
581 * cache_root is suffixed by at least one '/' which will be
582 * turned into a terminating null by this loop. Therefore,
583 * we won't either delete or go above our cache root.
585 for (q = dir + dobj->root_len; *q; ) {
586 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
587 "disk_cache: Deleting directory %s from cache",
588 dir);
590 rc = apr_dir_remove(dir, p);
591 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
592 break;
594 slash = strrchr(q, '/');
595 *slash = '\0';
600 return OK;
603 apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
604 apr_file_t *file) {
605 char w[MAX_STRING_LEN];
606 int p;
607 apr_status_t rv;
609 while (1) {
610 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
611 if (rv != APR_SUCCESS) {
612 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
613 "Premature end of vary array.");
614 return rv;
617 p = strlen(w);
618 if (p> 0 && w[p - 1] == '\n') {
619 if (p> 1 && w[p - 2] == CR) {
620 w[p - 2] = '\0';
622 else {
623 w[p - 1] = '\0';
627 /* If we've finished reading the array, break out of the loop. */
628 if (w[0] == '\0') {
629 break;
632 *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
635 return APR_SUCCESS;
638 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr) {
639 int i;
640 apr_status_t rv;
641 struct iovec iov[2];
642 apr_size_t amt;
643 const char **elts;
645 elts = (const char **) arr->elts;
647 for (i = 0; i < arr->nelts; i++) {
648 iov[0].iov_base = (char*) elts[i];
649 iov[0].iov_len = strlen(elts[i]);
650 iov[1].iov_base = CRLF;
651 iov[1].iov_len = sizeof(CRLF) - 1;
653 rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
654 &amt);
655 if (rv != APR_SUCCESS) {
656 return rv;
660 iov[0].iov_base = CRLF;
661 iov[0].iov_len = sizeof(CRLF) - 1;
663 return apr_file_writev(fd, (const struct iovec *) &iov, 1,
664 &amt);
667 apr_status_t read_table(cache_handle_t *handle, request_rec *r,
668 apr_table_t *table, apr_file_t *file) {
669 char w[MAX_STRING_LEN];
670 char *l;
671 int p;
672 apr_status_t rv;
674 while (1) {
676 /* ### What about APR_EOF? */
677 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
678 if (rv != APR_SUCCESS) {
679 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
680 "Premature end of cache headers.");
681 return rv;
684 /* Delete terminal (CR?)LF */
686 p = strlen(w);
687 /* Indeed, the host's '\n':
688 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
689 -- whatever the script generates.
691 if (p> 0 && w[p - 1] == '\n') {
692 if (p> 1 && w[p - 2] == CR) {
693 w[p - 2] = '\0';
695 else {
696 w[p - 1] = '\0';
700 /* If we've finished reading the headers, break out of the loop. */
701 if (w[0] == '\0') {
702 break;
705 #if APR_CHARSET_EBCDIC
706 /* Chances are that we received an ASCII header text instead of
707 * the expected EBCDIC header lines. Try to auto-detect:
709 if (!(l = strchr(w, ':'))) {
710 int maybeASCII = 0, maybeEBCDIC = 0;
711 unsigned char *cp, native;
712 apr_size_t inbytes_left, outbytes_left;
714 for (cp = w; *cp != '\0'; ++cp) {
715 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
716 if (apr_isprint(*cp) && !apr_isprint(native))
717 ++maybeEBCDIC;
718 if (!apr_isprint(*cp) && apr_isprint(native))
719 ++maybeASCII;
721 if (maybeASCII> maybeEBCDIC) {
722 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
723 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
724 r->filename);
725 inbytes_left = outbytes_left = cp - w;
726 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
727 w, &inbytes_left, w, &outbytes_left);
730 #endif /*APR_CHARSET_EBCDIC*/
732 /* if we see a bogus header don't ignore it. Shout and scream */
733 if (!(l = strchr(w, ':'))) {
734 return APR_EGENERAL;
737 *l++ = '\0';
738 while (*l && apr_isspace(*l)) {
739 ++l;
742 apr_table_add(table, w, l);
745 return APR_SUCCESS;
749 * Clean-up memory used by helper libraries, that don't know about apr_palloc
750 * and that (probably) use classical malloc/free
752 apr_status_t deflate_ctx_cleanup(void *data)
754 crccache_client_ctx *ctx = (crccache_client_ctx *)data;
756 if (ctx != NULL)
758 if (ctx->decompression_state != DECOMPRESSION_ENDED)
760 inflateEnd(ctx->decompression_stream);
761 ctx->decompression_state = DECOMPRESSION_ENDED;
764 return APR_SUCCESS;
769 * Reads headers from a buffer and returns an array of headers.
770 * Returns NULL on file error
771 * This routine tries to deal with too long lines and continuation lines.
772 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
773 * Is that okay, or should they be collapsed where possible?
775 apr_status_t recall_headers(cache_handle_t *h, request_rec *r) {
776 const char *data;
777 apr_size_t len;
778 apr_bucket *e;
779 unsigned i;
780 int z_RC;
782 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
784 /* This case should not happen... */
785 if (!dobj->hfd) {
786 /* XXX log message */
787 return APR_NOTFOUND;
790 h->req_hdrs = apr_table_make(r->pool, 20);
791 h->resp_hdrs = apr_table_make(r->pool, 20);
793 /* Call routine to read the header lines/status line */
794 read_table(h, r, h->resp_hdrs, dobj->hfd);
795 read_table(h, r, h->req_hdrs, dobj->hfd);
797 // TODO: We only really want to add our block hashes if the cache is not fresh
798 // TODO: We could achieve that by adding a filter here on sending the request
799 // and then doing all of this in the filter 'JIT'
800 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, r->pool,
801 r->connection->bucket_alloc);
803 /* read */
804 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
806 // this will be rounded down, but thats okay
807 // TODO: I think that we should just add % to the trailing block, otherwise our extra block
808 // is always limited to max of BLOCK_COUNT size.
809 size_t blocksize = len/FULL_BLOCK_COUNT;
810 size_t tail_block_size = blocksize + len % FULL_BLOCK_COUNT;
811 size_t block_count_including_final_block = FULL_BLOCK_COUNT;// + (tail_block_size != 0);
812 // sanity check for very small files
813 if (blocksize> 4)
815 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes",FULL_BLOCK_COUNT,blocksize);
817 crccache_client_ctx * ctx;
818 ctx = apr_pcalloc(r->pool, sizeof(*ctx));
819 ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
820 ctx->block_size = blocksize;
821 ctx->tail_block_size = tail_block_size;
822 ctx->state = DECODING_NEW_SECTION;
823 ctx->cached_bucket = e;
825 // Setup inflate for decompressing non-matched literal data
826 ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream)));
827 ctx->decompression_stream->zalloc = Z_NULL;
828 ctx->decompression_stream->zfree = Z_NULL;
829 ctx->decompression_stream->opaque = Z_NULL;
830 ctx->decompression_stream->avail_in = 0;
831 ctx->decompression_stream->next_in = Z_NULL;
832 z_RC = inflateInit(ctx->decompression_stream);
833 if (z_RC != Z_OK)
835 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server,
836 "Can not initialize decompression engine, return code: %d", z_RC);
837 return APR_SUCCESS;
839 ctx->decompression_state = DECOMPRESSION_INITIALIZED;
841 // Register a cleanup function to cleanup internal libz resources
842 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
843 apr_pool_cleanup_null);
845 // All OK to go for the crcsync decoding: add the headers
846 // and set-up the decoding filter
848 // add one for base 64 overflow and null terminator
849 char hash_set[HASH_HEADER_SIZE+1];
851 uint64_t crcs[block_count_including_final_block];
852 //crc_of_blocks(data, len, blocksize, HASH_SIZE, crcs);
853 for (i = 0; i < FULL_BLOCK_COUNT - 1; i++) {
854 crcs[i] = crc64_iso(0, &data[i*blocksize], blocksize);
856 crcs[FULL_BLOCK_COUNT] = crc64_iso(0, &data[(FULL_BLOCK_COUNT-1)*blocksize], tail_block_size);
858 // swap to network byte order
859 for (i = 0; i < block_count_including_final_block;++i)
861 htobe64(crcs[i]);
864 apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0]));
865 hash_set[HASH_HEADER_SIZE] = '\0';
866 //apr_bucket_delete(e);
868 // TODO; bit of a safety margin here, could calculate exact size
869 const int block_header_max_size = HASH_HEADER_SIZE+32;
870 char block_header_txt[block_header_max_size];
871 snprintf(block_header_txt, block_header_max_size,"fs=%zu, h=%s",len,hash_set);
872 apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt);
873 // TODO: do we want to cache the hashes here?
875 // initialise the context for our sha1 digest of the unencoded response
876 EVP_MD_CTX_init(&ctx->mdctx);
877 const EVP_MD *md = EVP_sha1();
878 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
880 // we want to add a filter here so that we can decode the response.
881 // we need access to the original cached data when we get the response as
882 // we need that to fill in the matched blocks.
883 ap_add_output_filter_handle(crccache_decode_filter_handle,
884 ctx, r, r->connection);
886 // TODO: why is hfd file only closed in this case?
887 apr_file_close(dobj->hfd);
889 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
890 "crccache_client: Recalled headers for URL %s", dobj->name);
891 return APR_SUCCESS;
894 apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p,
895 apr_bucket_brigade *bb) {
896 apr_bucket *e;
897 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
899 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, p,
900 bb->bucket_alloc);
902 APR_BRIGADE_INSERT_HEAD(bb, e);
903 e = apr_bucket_eos_create(bb->bucket_alloc);
904 APR_BRIGADE_INSERT_TAIL(bb, e);
906 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, "crccache_client: Recalled body for URL %s", dobj->name);
907 return APR_SUCCESS;
910 apr_status_t store_table(apr_file_t *fd, apr_table_t *table) {
911 int i;
912 apr_status_t rv;
913 struct iovec iov[4];
914 apr_size_t amt;
915 apr_table_entry_t *elts;
917 elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
918 for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
919 if (elts[i].key != NULL) {
920 iov[0].iov_base = elts[i].key;
921 iov[0].iov_len = strlen(elts[i].key);
922 iov[1].iov_base = ": ";
923 iov[1].iov_len = sizeof(": ") - 1;
924 iov[2].iov_base = elts[i].val;
925 iov[2].iov_len = strlen(elts[i].val);
926 iov[3].iov_base = CRLF;
927 iov[3].iov_len = sizeof(CRLF) - 1;
929 rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
930 &amt);
931 if (rv != APR_SUCCESS) {
932 return rv;
936 iov[0].iov_base = CRLF;
937 iov[0].iov_len = sizeof(CRLF) - 1;
938 rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
939 &amt);
940 return rv;
943 apr_status_t store_headers(cache_handle_t *h, request_rec *r,
944 cache_info *info) {
945 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
946 &crccache_client_module);
948 apr_status_t rv;
949 apr_size_t amt;
950 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
952 disk_cache_info_t disk_info;
953 struct iovec iov[2];
955 /* This is flaky... we need to manage the cache_info differently */
956 h->cache_obj->info = *info;
958 if (r->headers_out) {
959 const char *tmp;
961 tmp = apr_table_get(r->headers_out, "Vary");
963 if (tmp) {
964 apr_array_header_t* varray;
965 apr_uint32_t format = VARY_FORMAT_VERSION;
967 /* If we were initially opened as a vary format, rollback
968 * that internal state for the moment so we can recreate the
969 * vary format hints in the appropriate directory.
971 if (dobj->prefix) {
972 dobj->hdrsfile = dobj->prefix;
973 dobj->prefix = NULL;
976 mkdir_structure(conf, dobj->hdrsfile, r->pool);
978 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
979 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
980 r->pool);
982 if (rv != APR_SUCCESS) {
983 return rv;
986 amt = sizeof(format);
987 apr_file_write(dobj->tfd, &format, &amt);
989 amt = sizeof(info->expire);
990 apr_file_write(dobj->tfd, &info->expire, &amt);
992 varray = apr_array_make(r->pool, 6, sizeof(char*));
993 tokens_to_array(r->pool, tmp, varray);
995 store_array(dobj->tfd, varray);
997 apr_file_close(dobj->tfd);
999 dobj->tfd = NULL;
1001 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile,
1002 r->pool);
1003 if (rv != APR_SUCCESS) {
1004 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
1005 "disk_cache: rename tempfile to varyfile failed: %s -> %s",
1006 dobj->tempfile, dobj->hdrsfile);
1007 apr_file_remove(dobj->tempfile, r->pool);
1008 return rv;
1011 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
1012 tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
1013 dobj->prefix = dobj->hdrsfile;
1014 dobj->hashfile = NULL;
1015 dobj->datafile = data_file(r->pool, conf, dobj, tmp);
1016 dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);
1021 rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
1022 APR_CREATE | APR_WRITE | APR_BINARY |
1023 APR_BUFFERED | APR_EXCL, r->pool);
1025 if (rv != APR_SUCCESS) {
1026 return rv;
1029 disk_info.format = DISK_FORMAT_VERSION;
1030 disk_info.date = info->date;
1031 disk_info.expire = info->expire;
1032 disk_info.entity_version = dobj->disk_info.entity_version++;
1033 disk_info.request_time = info->request_time;
1034 disk_info.response_time = info->response_time;
1035 disk_info.status = info->status;
1037 disk_info.name_len = strlen(dobj->name);
1039 iov[0].iov_base = (void*)&disk_info;
1040 iov[0].iov_len = sizeof(disk_cache_info_t);
1041 iov[1].iov_base = (void*)dobj->name;
1042 iov[1].iov_len = disk_info.name_len;
1044 rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt);
1045 if (rv != APR_SUCCESS) {
1046 return rv;
1049 if (r->headers_out) {
1050 apr_table_t *headers_out;
1052 headers_out = ap_cache_cacheable_hdrs_out(r->pool, r->headers_out,
1053 r->server);
1055 if (!apr_table_get(headers_out, "Content-Type")
1056 && r->content_type) {
1057 apr_table_setn(headers_out, "Content-Type",
1058 ap_make_content_type(r, r->content_type));
1061 headers_out = apr_table_overlay(r->pool, headers_out,
1062 r->err_headers_out);
1063 rv = store_table(dobj->hfd, headers_out);
1064 if (rv != APR_SUCCESS) {
1065 return rv;
1069 /* Parse the vary header and dump those fields from the headers_in. */
1070 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1071 if (r->headers_in) {
1072 apr_table_t *headers_in;
1074 headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in,
1075 r->server);
1076 rv = store_table(dobj->hfd, headers_in);
1077 if (rv != APR_SUCCESS) {
1078 return rv;
1082 apr_file_close(dobj->hfd); /* flush and close */
1084 /* Remove old file with the same name. If remove fails, then
1085 * perhaps we need to create the directory tree where we are
1086 * about to write the new headers file.
1088 rv = apr_file_remove(dobj->hdrsfile, r->pool);
1089 if (rv != APR_SUCCESS) {
1090 mkdir_structure(conf, dobj->hdrsfile, r->pool);
1093 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool);
1094 if (rv != APR_SUCCESS) {
1095 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
1096 "disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
1097 dobj->tempfile, dobj->hdrsfile);
1098 apr_file_remove(dobj->tempfile, r->pool);
1099 return rv;
1102 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
1104 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1105 "disk_cache: Stored headers for URL %s", dobj->name);
1106 return APR_SUCCESS;
1109 apr_status_t store_body(cache_handle_t *h, request_rec *r,
1110 apr_bucket_brigade *bb) {
1111 apr_bucket *e;
1112 apr_status_t rv;
1114 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1115 crccache_client_conf *conf = ap_get_module_config(r->server->module_config,
1116 &crccache_client_module);
1118 /* We write to a temp file and then atomically rename the file over
1119 * in file_cache_el_final().
1121 if (!dobj->tfd) {
1122 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, APR_CREATE | APR_WRITE
1123 | APR_BINARY | APR_BUFFERED | APR_EXCL, r->pool);
1124 if (rv != APR_SUCCESS) {
1125 return rv;
1127 dobj->file_size = 0;
1130 for (e = APR_BRIGADE_FIRST(bb); e != APR_BRIGADE_SENTINEL(bb); e = APR_BUCKET_NEXT(e)) {
1131 const char *str;
1132 apr_size_t length, written;
1133 rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1134 if (rv != APR_SUCCESS) {
1135 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1136 "cache_disk: Error when reading bucket for URL %s",
1137 h->cache_obj->key);
1138 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1139 file_cache_errorcleanup(dobj, r);
1140 return rv;
1142 rv = apr_file_write_full(dobj->tfd, str, length, &written);
1143 if (rv != APR_SUCCESS) {
1144 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1145 "cache_disk: Error when writing cache file for URL %s",
1146 h->cache_obj->key);
1147 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1148 file_cache_errorcleanup(dobj, r);
1149 return rv;
1151 dobj->file_size += written;
1152 if (dobj->file_size> conf->maxfs) {
1153 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1154 "cache_disk: URL %s failed the size check "
1155 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
1156 h->cache_obj->key, dobj->file_size, conf->maxfs);
1157 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1158 file_cache_errorcleanup(dobj, r);
1159 return APR_EGENERAL;
1163 /* Was this the final bucket? If yes, close the temp file and perform
1164 * sanity checks.
1166 if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
1167 if (r->connection->aborted || r->no_cache) {
1168 ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
1169 "disk_cache: Discarding body for URL %s "
1170 "because connection has been aborted.",
1171 h->cache_obj->key);
1172 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1173 file_cache_errorcleanup(dobj, r);
1174 return APR_EGENERAL;
1176 if (dobj->file_size < conf->minfs) {
1177 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1178 "cache_disk: URL %s failed the size check "
1179 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
1180 h->cache_obj->key, dobj->file_size, conf->minfs);
1181 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1182 file_cache_errorcleanup(dobj, r);
1183 return APR_EGENERAL;
1186 /* All checks were fine. Move tempfile to final destination */
1187 /* Link to the perm file, and close the descriptor */
1188 file_cache_el_final(dobj, r);
1189 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1190 "disk_cache: Body for URL %s cached.", dobj->name);
1193 return APR_SUCCESS;
1197 * CACHE_DECODE filter
1198 * ----------------
1200 * Deliver cached content (headers and body) up the stack.
1202 static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
1203 apr_bucket *e;
1204 request_rec *r = f->r;
1205 // TODO: set up context type struct
1206 crccache_client_ctx *ctx = f->ctx;
1208 // if this is the first pass in decoding we should check the headers etc
1209 // and fix up those headers that we modified as part of the encoding
1210 if (ctx->headers_checked == 0)
1212 ctx->headers_checked = 1;
1214 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1215 "CRCSYNC retuned status code (%d)", r->status);
1217 // TODO: make this work if we have multiple encodings
1218 const char * content_encoding;
1219 content_encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
1220 if (content_encoding == NULL || strcmp(CRCCACHE_ENCODING, content_encoding)
1221 != 0) {
1222 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1223 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding?content_encoding:"NULL");
1224 ap_remove_output_filter(f);
1225 return ap_pass_brigade(f->next, bb);
1227 // TODO: Remove crcsync from the content encoding header
1228 // TODO: Remove If-block from the headers
1229 // TODO: Fix up the etag as well
1234 /* Do nothing if asked to filter nothing. */
1235 if (APR_BRIGADE_EMPTY(bb)) {
1236 return ap_pass_brigade(f->next, bb);
1239 /* We require that we have a context already, otherwise we dont have our cached file
1240 * to fill in the gaps with.
1242 if (!ctx) {
1243 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1244 "No context available %s", r->uri);
1245 ap_remove_output_filter(f);
1246 return ap_pass_brigade(f->next, bb);
1249 while (!APR_BRIGADE_EMPTY(bb))
1251 const char *data;
1252 apr_size_t len;
1254 e = APR_BRIGADE_FIRST(bb);
1256 if (APR_BUCKET_IS_EOS(e)) {
1258 /* Remove EOS from the old list, and insert into the new. */
1259 APR_BUCKET_REMOVE(e);
1260 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1262 /* This filter is done once it has served up its content */
1263 ap_remove_output_filter(f);
1265 // check strong hash here
1266 unsigned md_len;
1267 unsigned char md_value[EVP_MAX_MD_SIZE];
1268 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
1269 EVP_MD_CTX_cleanup(&ctx->mdctx);
1271 if (memcmp(md_value, ctx->md_value_rx, 20) != 0)
1273 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED");
1274 apr_brigade_cleanup(bb);
1275 return APR_EGENERAL;
1277 else
1279 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED");
1282 /* Okay, we've seen the EOS.
1283 * Time to pass it along down the chain.
1285 return ap_pass_brigade(f->next, ctx->bb);
1288 if (APR_BUCKET_IS_FLUSH(e)) {
1289 apr_status_t rv;
1291 /* Remove flush bucket from old brigade anf insert into the new. */
1292 APR_BUCKET_REMOVE(e);
1293 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1294 rv = ap_pass_brigade(f->next, ctx->bb);
1295 if (rv != APR_SUCCESS) {
1296 return rv;
1298 continue;
1301 if (APR_BUCKET_IS_METADATA(e)) {
1303 * Remove meta data bucket from old brigade and insert into the
1304 * new.
1306 APR_BUCKET_REMOVE(e);
1307 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1308 continue;
1311 /* read */
1312 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
1313 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
1315 apr_size_t consumed_bytes = 0;
1316 while (consumed_bytes < len)
1318 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
1319 // no guaruntee that our buckets line up with our encoding sections
1320 // so we need a processing state machine stored in our context
1321 switch (ctx->state)
1323 case DECODING_NEW_SECTION:
1325 // check if we have a compressed section or a block section
1326 if (data[consumed_bytes] == ENCODING_COMPRESSED)
1327 ctx->state = DECODING_COMPRESSED;
1328 else if (data[consumed_bytes] == ENCODING_BLOCK)
1329 ctx->state = DECODING_BLOCK_HEADER;
1330 else if (data[consumed_bytes] == ENCODING_LITERAL)
1332 ctx->state = DECODING_LITERAL_SIZE;
1333 ctx->partial_literal = NULL;
1334 ctx->rx_count = 0;
1336 else if (data[consumed_bytes] == ENCODING_HASH)
1338 ctx->state = DECODING_HASH;
1339 ctx->rx_count = 0;
1341 else
1343 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
1344 "CRCSYNC-DECODE, unknown section %d(%c)",data[consumed_bytes],data[consumed_bytes]);
1345 apr_brigade_cleanup(bb);
1346 return APR_EGENERAL;
1348 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
1349 consumed_bytes++;
1350 break;
1352 case DECODING_BLOCK_HEADER:
1354 unsigned char block_number = data[consumed_bytes];
1355 consumed_bytes++;
1356 ctx->state = DECODING_NEW_SECTION;
1358 // TODO: Output the indicated block here
1359 size_t current_block_size = block_number < FULL_BLOCK_COUNT ? ctx->block_size : ctx->tail_block_size;
1360 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1361 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number, current_block_size);
1363 char * buf = apr_palloc(r->pool, current_block_size);
1364 const char * source_data;
1365 size_t source_len;
1366 apr_bucket_read(ctx->cached_bucket, &source_data, &source_len, APR_BLOCK_READ);
1367 assert(block_number < (FULL_BLOCK_COUNT + (ctx->tail_block_size != 0)));
1368 memcpy(buf,&source_data[block_number*ctx->block_size],current_block_size);
1369 // update our sha1 hash
1370 EVP_DigestUpdate(&ctx->mdctx, buf, current_block_size);
1371 apr_bucket * b = apr_bucket_pool_create(buf, current_block_size, r->pool, f->c->bucket_alloc);
1372 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
1373 break;
1375 case DECODING_LITERAL_SIZE:
1377 unsigned avail_in = len - consumed_bytes;
1378 // if we havent got the full int then store the data for later
1379 if (avail_in < 4 || ctx->rx_count != 0)
1381 if (ctx->partial_literal == NULL)
1383 ctx->partial_literal = apr_palloc(r->pool, 4);
1385 unsigned len_to_copy = MIN(4-ctx->rx_count, avail_in);
1386 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
1387 ctx->rx_count += len_to_copy;
1388 consumed_bytes += len_to_copy;
1390 if (ctx->rx_count == 4)
1392 ctx->literal_size = ntohl(*(unsigned*)ctx->partial_literal);
1393 ctx->rx_count = 0;
1395 else
1397 break;
1400 else
1402 ctx->literal_size = ntohl(*(unsigned*)&data[consumed_bytes]);
1403 consumed_bytes += 4;
1405 ctx->partial_literal = apr_palloc(r->pool, ctx->literal_size);
1406 ctx->state = DECODING_LITERAL_BODY;
1407 break;
1409 case DECODING_LITERAL_BODY:
1411 unsigned avail_in = len - consumed_bytes;
1412 unsigned len_to_copy = MIN(ctx->literal_size-ctx->rx_count, avail_in);
1413 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
1414 ctx->rx_count += len_to_copy;
1415 consumed_bytes += len_to_copy;
1417 if (ctx->rx_count == ctx->literal_size)
1419 EVP_DigestUpdate(&ctx->mdctx, ctx->partial_literal, ctx->literal_size);
1420 apr_bucket * b = apr_bucket_pool_create((char*)ctx->partial_literal, ctx->literal_size, r->pool, f->c->bucket_alloc);
1421 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
1422 ctx->state = DECODING_NEW_SECTION;
1425 break;
1427 case DECODING_HASH:
1429 unsigned avail_in = len - consumed_bytes;
1430 // 20 bytes for an SHA1 hash
1431 unsigned needed = MIN(20-ctx->rx_count, avail_in);
1432 memcpy(&ctx->md_value_rx[ctx->rx_count], &data[consumed_bytes],needed);
1433 ctx->rx_count+=needed;
1434 consumed_bytes += needed;
1435 if (ctx->rx_count == 20)
1437 ctx->state = DECODING_NEW_SECTION;
1439 break;
1441 case DECODING_COMPRESSED:
1443 unsigned char decompressed_data_buf[30000];
1444 int z_RC;
1445 z_stream *strm = ctx->decompression_stream;
1446 strm->avail_in = len - consumed_bytes;
1447 strm->next_in = (Bytef *)(data + consumed_bytes);
1448 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
1449 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
1450 do {
1451 strm->avail_out = sizeof(decompressed_data_buf);
1452 strm->next_out = decompressed_data_buf;
1453 uInt avail_in_pre_inflate = strm->avail_in;
1454 z_RC = inflate(strm, Z_NO_FLUSH);
1455 if (z_RC == Z_NEED_DICT || z_RC == Z_DATA_ERROR || z_RC == Z_MEM_ERROR)
1457 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server, "CRCSYNC-DECODE inflate error: %d", z_RC);
1458 apr_brigade_cleanup(bb);
1459 return APR_EGENERAL;
1461 int have = sizeof(decompressed_data_buf) - strm->avail_out;
1462 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1463 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
1464 z_RC, avail_in_pre_inflate - strm->avail_in, have);
1465 if (have)
1467 // write output data
1468 char * buf = apr_palloc(r->pool, have);
1469 memcpy(buf,decompressed_data_buf,have);
1470 EVP_DigestUpdate(&ctx->mdctx, buf, have);
1471 apr_bucket * b = apr_bucket_pool_create(buf, have, r->pool, f->c->bucket_alloc);
1472 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
1474 } while (strm->avail_out == 0);
1475 consumed_bytes = len - strm->avail_in;
1476 if (z_RC == Z_STREAM_END)
1478 ctx->state = DECODING_NEW_SECTION;
1479 inflateReset(strm);
1481 break;
1483 default:
1485 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
1486 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx->state);
1487 apr_brigade_cleanup(bb);
1488 return APR_EGENERAL; // TODO: figure out how to pass the error on to the client
1491 APR_BUCKET_REMOVE(e);
1495 apr_brigade_cleanup(bb);
1496 return APR_SUCCESS;
1499 static void *create_config(apr_pool_t *p, server_rec *s) {
1500 crccache_client_conf *conf = apr_pcalloc(p, sizeof(crccache_client_conf));
1501 /* array of URL prefixes for which caching is enabled */
1502 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
1503 /* array of URL prefixes for which caching is enabled */
1504 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
1505 /* array of URL prefixes for which caching is disabled */
1506 conf->cachedisable = apr_array_make(p, 10, sizeof(struct cache_disable));
1507 /* maximum time to cache a document */
1508 conf->maxex = DEFAULT_CACHE_MAXEXPIRE;
1509 conf->maxex_set = 0;
1510 conf->minex = DEFAULT_CACHE_MINEXPIRE;
1511 conf->minex_set = 0;
1512 /* default time to cache a document */
1513 conf->defex = DEFAULT_CACHE_EXPIRE;
1514 conf->defex_set = 0;
1515 /* factor used to estimate Expires date from LastModified date */
1516 conf->factor = DEFAULT_CACHE_LMFACTOR;
1517 conf->factor_set = 0;
1518 conf->no_last_mod_ignore_set = 0;
1519 conf->no_last_mod_ignore = 0;
1520 conf->ignorecachecontrol = 0;
1521 conf->ignorecachecontrol_set = 0;
1522 conf->store_private = 0;
1523 conf->store_private_set = 0;
1524 conf->store_nostore = 0;
1525 conf->store_nostore_set = 0;
1526 /* array of headers that should not be stored in cache */
1527 conf->ignore_headers = apr_array_make(p, 10, sizeof(char *));
1528 conf->ignore_headers_set = CACHE_IGNORE_HEADERS_UNSET;
1529 /* flag indicating that query-string should be ignored when caching */
1530 conf->ignorequerystring = 0;
1531 conf->ignorequerystring_set = 0;
1533 /* XXX: Set default values */
1534 conf->dirlevels = DEFAULT_DIRLEVELS;
1535 conf->dirlength = DEFAULT_DIRLENGTH;
1536 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
1537 conf->minfs = DEFAULT_MIN_FILE_SIZE;
1539 conf->cache_root = NULL;
1540 conf->cache_root_len = 0;
1542 return conf;
1546 * mod_disk_cache configuration directives handlers.
1548 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
1549 const char *arg) {
1550 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1551 &crccache_client_module);
1552 conf->cache_root = arg;
1553 conf->cache_root_len = strlen(arg);
1554 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1556 return NULL;
1560 * Consider eliminating the next two directives in favor of
1561 * Ian's prime number hash...
1562 * key = hash_fn( r->uri)
1563 * filename = "/key % prime1 /key %prime2/key %prime3"
1565 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
1566 const char *arg) {
1567 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1568 &crccache_client_module);
1569 int val = atoi(arg);
1570 if (val < 1)
1571 return "CacheDirLevelsClient value must be an integer greater than 0";
1572 if (val * conf->dirlength > CACHEFILE_LEN)
1573 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1574 conf->dirlevels = val;
1575 return NULL;
1577 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
1578 const char *arg) {
1579 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1580 &crccache_client_module);
1581 int val = atoi(arg);
1582 if (val < 1)
1583 return "CacheDirLengthClient value must be an integer greater than 0";
1584 if (val * conf->dirlevels > CACHEFILE_LEN)
1585 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1587 conf->dirlength = val;
1588 return NULL;
1591 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
1592 const char *arg) {
1593 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1594 &crccache_client_module);
1596 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->minfs
1597 < 0) {
1598 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1600 return NULL;
1603 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
1604 const char *arg) {
1605 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
1606 &crccache_client_module);
1607 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->maxfs
1608 < 0) {
1609 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1611 return NULL;
1614 static const char *add_crc_client_enable(cmd_parms *parms, void *dummy,
1615 const char *type,
1616 const char *url)
1618 crccache_client_conf *conf;
1619 struct cache_enable *new;
1621 if (*type == '/') {
1622 return apr_psprintf(parms->pool,
1623 "provider (%s) starts with a '/'. Are url and provider switched?",
1624 type);
1627 conf =
1628 (crccache_client_conf *)ap_get_module_config(parms->server->module_config,
1629 &crccache_client_module);
1630 new = apr_array_push(conf->cacheenable);
1631 new->type = type;
1632 if (apr_uri_parse(parms->pool, url, &(new->url))) {
1633 return NULL;
1635 if (new->url.path) {
1636 new->pathlen = strlen(new->url.path);
1637 } else {
1638 new->pathlen = 1;
1639 new->url.path = "/";
1641 return NULL;
1644 static const command_rec disk_cache_cmds[] =
1646 AP_INIT_TAKE2("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A cache type and partial URL prefix below which caching is enabled"),
1647 AP_INIT_TAKE1("CacheRootClient", set_cache_root, NULL, RSRC_CONF,"The directory to store cache files"),
1648 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"),
1649 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"),
1650 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"),
1651 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"),
1652 { NULL }
1655 int ap_run_insert_filter(request_rec *r);
1657 int cache_url_handler(request_rec *r, int lookup)
1659 apr_status_t rv;
1660 const char *auth;
1661 cache_request_rec *cache;
1662 crccache_client_conf *conf;
1663 apr_bucket_brigade *out;
1664 ap_filter_t *next;
1665 ap_filter_rec_t *cache_out_handle;
1667 /* Delay initialization until we know we are handling a GET */
1668 if (r->method_number != M_GET) {
1669 return DECLINED;
1672 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
1673 &crccache_client_module);
1675 /* make space for the per request config */
1676 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1677 &crccache_client_module);
1678 if (!cache) {
1679 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
1680 ap_set_module_config(r->request_config, &crccache_client_module, cache);
1684 * Are we allowed to serve cached info at all?
1687 /* find certain cache controlling headers */
1688 auth = apr_table_get(r->headers_in, "Authorization");
1690 /* First things first - does the request allow us to return
1691 * cached information at all? If not, just decline the request.
1693 if (auth) {
1694 return DECLINED;
1698 * Try to serve this request from the cache.
1700 * If no existing cache file (DECLINED)
1701 * add cache_save filter
1702 * If cached file (OK)
1703 * clear filter stack
1704 * add cache_out filter
1705 * return OK
1707 rv = cache_select(r);
1708 if (rv != OK) {
1709 if (rv == DECLINED) {
1710 if (!lookup) {
1713 * Add cache_save filter to cache this request. Choose
1714 * the correct filter by checking if we are a subrequest
1715 * or not.
1717 if (r->main) {
1718 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1719 r->server,
1720 "Adding CACHE_SAVE_SUBREQ filter for %s",
1721 r->uri);
1722 ap_add_output_filter_handle(cache_save_subreq_filter_handle,
1723 NULL, r, r->connection);
1725 else {
1726 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1727 r->server, "Adding CACHE_SAVE filter for %s",
1728 r->uri);
1729 ap_add_output_filter_handle(cache_save_filter_handle,
1730 NULL, r, r->connection);
1733 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1734 "Adding CACHE_REMOVE_URL filter for %s",
1735 r->uri);
1737 /* Add cache_remove_url filter to this request to remove a
1738 * stale cache entry if needed. Also put the current cache
1739 * request rec in the filter context, as the request that
1740 * is available later during running the filter maybe
1741 * different due to an internal redirect.
1743 cache->remove_url_filter =
1744 ap_add_output_filter_handle(cache_remove_url_filter_handle,
1745 cache, r, r->connection);
1747 else {
1748 if (cache->stale_headers) {
1749 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
1750 r->server, "Restoring request headers for %s",
1751 r->uri);
1753 r->headers_in = cache->stale_headers;
1756 /* Delete our per-request configuration. */
1757 ap_set_module_config(r->request_config, &crccache_client_module, NULL);
1760 else {
1761 /* error */
1762 ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
1763 "cache: error returned while checking for cached "
1764 "file by cache");
1766 return DECLINED;
1769 /* if we are a lookup, we are exiting soon one way or another; Restore
1770 * the headers. */
1771 if (lookup) {
1772 if (cache->stale_headers) {
1773 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1774 "Restoring request headers.");
1775 r->headers_in = cache->stale_headers;
1778 /* Delete our per-request configuration. */
1779 ap_set_module_config(r->request_config, &crccache_client_module, NULL);
1782 rv = ap_meets_conditions(r);
1783 if (rv != OK) {
1784 /* If we are a lookup, we have to return DECLINED as we have no
1785 * way of knowing if we will be able to serve the content.
1787 if (lookup) {
1788 return DECLINED;
1791 /* Return cached status. */
1792 return rv;
1795 /* If we're a lookup, we can exit now instead of serving the content. */
1796 if (lookup) {
1797 return OK;
1800 /* Serve up the content */
1802 /* We are in the quick handler hook, which means that no output
1803 * filters have been set. So lets run the insert_filter hook.
1805 ap_run_insert_filter(r);
1808 * Add cache_out filter to serve this request. Choose
1809 * the correct filter by checking if we are a subrequest
1810 * or not.
1812 if (r->main) {
1813 cache_out_handle = cache_out_subreq_filter_handle;
1815 else {
1816 cache_out_handle = cache_out_filter_handle;
1818 ap_add_output_filter_handle(cache_out_handle, NULL, r, r->connection);
1821 * Remove all filters that are before the cache_out filter. This ensures
1822 * that we kick off the filter stack with our cache_out filter being the
1823 * first in the chain. This make sense because we want to restore things
1824 * in the same manner as we saved them.
1825 * There may be filters before our cache_out filter, because
1827 * 1. We call ap_set_content_type during cache_select. This causes
1828 * Content-Type specific filters to be added.
1829 * 2. We call the insert_filter hook. This causes filters e.g. like
1830 * the ones set with SetOutputFilter to be added.
1832 next = r->output_filters;
1833 while (next && (next->frec != cache_out_handle)) {
1834 ap_remove_output_filter(next);
1835 next = next->next;
1838 /* kick off the filter stack */
1839 out = apr_brigade_create(r->pool, r->connection->bucket_alloc);
1840 rv = ap_pass_brigade(r->output_filters, out);
1841 if (rv != APR_SUCCESS) {
1842 if (rv != AP_FILTER_ERROR) {
1843 ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
1844 "cache: error returned while trying to return "
1845 "cached data");
1847 return rv;
1850 return OK;
1856 * CACHE_OUT filter
1857 * ----------------
1859 * Deliver cached content (headers and body) up the stack.
1861 int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
1863 request_rec *r = f->r;
1864 cache_request_rec *cache;
1866 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1867 &crccache_client_module);
1869 if (!cache) {
1870 /* user likely configured CACHE_OUT manually; they should use mod_cache
1871 * configuration to do that */
1872 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1873 "CACHE_OUT enabled unexpectedly");
1874 ap_remove_output_filter(f);
1875 return ap_pass_brigade(f->next, bb);
1878 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1879 "cache: running CACHE_OUT filter");
1881 /* restore status of cached response */
1882 /* XXX: This exposes a bug in mem_cache, since it does not
1883 * restore the status into it's handle. */
1884 r->status = cache->handle->cache_obj->info.status;
1886 /* recall_headers() was called in cache_select() */
1887 recall_body(cache->handle, r->pool, bb);
1889 /* This filter is done once it has served up its content */
1890 ap_remove_output_filter(f);
1892 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1893 "cache: serving %s", r->uri);
1894 return ap_pass_brigade(f->next, bb);
1899 * CACHE_SAVE filter
1900 * ---------------
1902 * Decide whether or not this content should be cached.
1903 * If we decide no it should not:
1904 * remove the filter from the chain
1905 * If we decide yes it should:
1906 * Have we already started saving the response?
1907 * If we have started, pass the data to the storage manager via store_body
1908 * Otherwise:
1909 * Check to see if we *can* save this particular response.
1910 * If we can, call cache_create_entity() and save the headers and body
1911 * Finally, pass the data to the next filter (the network or whatever)
1914 int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
1916 int rv = !OK;
1917 request_rec *r = f->r;
1918 cache_request_rec *cache;
1919 crccache_client_conf *conf;
1920 //const char *cc_out, *cl;
1921 const char *cl;
1922 const char *exps, /* *lastmods,*/ *dates;//, *etag;
1923 apr_time_t exp, date,/* lastmod,*/ now;
1924 apr_off_t size;
1925 cache_info *info = NULL;
1926 char *reason;
1927 apr_pool_t *p;
1929 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
1930 &crccache_client_module);
1932 /* Setup cache_request_rec */
1933 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
1934 &crccache_client_module);
1935 if (!cache) {
1936 /* user likely configured CACHE_SAVE manually; they should really use
1937 * mod_cache configuration to do that
1939 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
1940 ap_set_module_config(r->request_config, &crccache_client_module, cache);
1943 reason = NULL;
1944 p = r->pool;
1946 * Pass Data to Cache
1947 * ------------------
1948 * This section passes the brigades into the cache modules, but only
1949 * if the setup section (see below) is complete.
1951 if (cache->block_response) {
1952 /* We've already sent down the response and EOS. So, ignore
1953 * whatever comes now.
1955 return APR_SUCCESS;
1958 /* have we already run the cachability check and set up the
1959 * cached file handle?
1961 if (cache->in_checked) {
1962 /* pass the brigades into the cache, then pass them
1963 * up the filter stack
1965 rv = store_body(cache->handle, r, in);
1966 if (rv != APR_SUCCESS) {
1967 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1968 "cache: Cache provider's store_body failed!");
1969 ap_remove_output_filter(f);
1971 return ap_pass_brigade(f->next, in);
1975 * Setup Data in Cache
1976 * -------------------
1977 * This section opens the cache entity and sets various caching
1978 * parameters, and decides whether this URL should be cached at
1979 * all. This section is* run before the above section.
1982 /* read expiry date; if a bad date, then leave it so the client can
1983 * read it
1985 exps = apr_table_get(r->err_headers_out, "Expires");
1986 if (exps == NULL) {
1987 exps = apr_table_get(r->headers_out, "Expires");
1989 if (exps != NULL) {
1990 if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) {
1991 exps = NULL;
1994 else {
1995 exp = APR_DATE_BAD;
1997 // we dont care about these
1998 #if 0
1999 /* read the last-modified date; if the date is bad, then delete it */
2000 lastmods = apr_table_get(r->err_headers_out, "Last-Modified");
2001 if (lastmods == NULL) {
2002 lastmods = apr_table_get(r->headers_out, "Last-Modified");
2004 if (lastmods != NULL) {
2005 lastmod = apr_date_parse_http(lastmods);
2006 if (lastmod == APR_DATE_BAD) {
2007 lastmods = NULL;
2010 else {
2011 lastmod = APR_DATE_BAD;
2014 /* read the etag and cache-control from the entity */
2015 etag = apr_table_get(r->err_headers_out, "Etag");
2016 if (etag == NULL) {
2017 etag = apr_table_get(r->headers_out, "Etag");
2019 cc_out = apr_table_get(r->err_headers_out, "Cache-Control");
2020 if (cc_out == NULL) {
2021 cc_out = apr_table_get(r->headers_out, "Cache-Control");
2023 #endif
2025 * what responses should we not cache?
2027 * At this point we decide based on the response headers whether it
2028 * is appropriate _NOT_ to cache the data from the server. There are
2029 * a whole lot of conditions that prevent us from caching this data.
2030 * They are tested here one by one to be clear and unambiguous.
2032 if (r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE
2033 && r->status != HTTP_MULTIPLE_CHOICES
2034 && r->status != HTTP_MOVED_PERMANENTLY
2035 && r->status != HTTP_NOT_MODIFIED) {
2036 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
2037 * We don't cache 206, because we don't (yet) cache partial responses.
2038 * We include 304 Not Modified here too as this is the origin server
2039 * telling us to serve the cached copy.
2041 #if 0
2042 if (exps != NULL || cc_out != NULL) {
2043 /* We are also allowed to cache any response given that it has a
2044 * valid Expires or Cache Control header. If we find a either of
2045 * those here, we pass request through the rest of the tests. From
2046 * the RFC:
2048 * A response received with any other status code (e.g. status
2049 * codes 302 and 307) MUST NOT be returned in a reply to a
2050 * subsequent request unless there are cache-control directives or
2051 * another header(s) that explicitly allow it. For example, these
2052 * include the following: an Expires header (section 14.21); a
2053 * "max-age", "s-maxage", "must-revalidate", "proxy-revalidate",
2054 * "public" or "private" cache-control directive (section 14.9).
2057 else {
2058 reason = apr_psprintf(p, "Response status %d", r->status);
2060 #endif
2063 if (reason) {
2064 /* noop */
2066 #if 0
2067 else if (exps != NULL && exp == APR_DATE_BAD) {
2068 /* if a broken Expires header is present, don't cache it */
2069 reason = apr_pstrcat(p, "Broken expires header: ", exps, NULL);
2071 else if (exp != APR_DATE_BAD && exp < r->request_time)
2073 /* if a Expires header is in the past, don't cache it */
2074 reason = "Expires header already expired, not cacheable";
2076 else if (!conf->ignorequerystring && r->parsed_uri.query && exps == NULL &&
2077 !ap_cache_liststr(NULL, cc_out, "max-age", NULL)) {
2078 /* if a query string is present but no explicit expiration time,
2079 * don't cache it (RFC 2616/13.9 & 13.2.1)
2081 reason = "Query string present but no explicit expiration time";
2083 #endif
2084 else if (r->status == HTTP_NOT_MODIFIED &&
2085 !cache->handle && !cache->stale_handle) {
2086 /* if the server said 304 Not Modified but we have no cache
2087 * file - pass this untouched to the user agent, it's not for us.
2089 reason = "HTTP Status 304 Not Modified";
2091 #if 0
2092 else if (r->status == HTTP_OK && lastmods == NULL && etag == NULL
2093 && (exps == NULL) && (conf->no_last_mod_ignore ==0)) {
2094 /* 200 OK response from HTTP/1.0 and up without Last-Modified,
2095 * Etag, or Expires headers.
2097 /* Note: mod-include clears last_modified/expires/etags - this
2098 * is why we have an optional function for a key-gen ;-)
2100 reason = "No Last-Modified, Etag, or Expires headers";
2102 #endif
2103 else if (r->header_only && !cache->stale_handle) {
2104 /* Forbid HEAD requests unless we have it cached already */
2105 reason = "HTTP HEAD request";
2107 #if 0
2108 else if (!conf->store_nostore &&
2109 ap_cache_liststr(NULL, cc_out, "no-store", NULL)) {
2110 /* RFC2616 14.9.2 Cache-Control: no-store response
2111 * indicating do not cache, or stop now if you are
2112 * trying to cache it.
2114 /* FIXME: The Cache-Control: no-store could have come in on a 304,
2115 * FIXME: while the original request wasn't conditional. IOW, we
2116 * FIXME: made the the request conditional earlier to revalidate
2117 * FIXME: our cached response.
2119 reason = "Cache-Control: no-store present";
2121 else if (!conf->store_private &&
2122 ap_cache_liststr(NULL, cc_out, "private", NULL)) {
2123 /* RFC2616 14.9.1 Cache-Control: private response
2124 * this object is marked for this user's eyes only. Behave
2125 * as a tunnel.
2127 /* FIXME: See above (no-store) */
2128 reason = "Cache-Control: private present";
2130 else if (apr_table_get(r->headers_in, "Authorization") != NULL
2131 && !(ap_cache_liststr(NULL, cc_out, "s-maxage", NULL)
2132 || ap_cache_liststr(NULL, cc_out, "must-revalidate", NULL)
2133 || ap_cache_liststr(NULL, cc_out, "public", NULL))) {
2134 /* RFC2616 14.8 Authorisation:
2135 * if authorisation is included in the request, we don't cache,
2136 * but we can cache if the following exceptions are true:
2137 * 1) If Cache-Control: s-maxage is included
2138 * 2) If Cache-Control: must-revalidate is included
2139 * 3) If Cache-Control: public is included
2141 reason = "Authorization required";
2144 else if (ap_cache_liststr(NULL,
2145 apr_table_get(r->headers_out, "Vary"),
2146 "*", NULL)) {
2147 reason = "Vary header contains '*'";
2149 else if (apr_table_get(r->subprocess_env, "no-cache") != NULL) {
2150 reason = "environment variable 'no-cache' is set";
2152 else if (r->no_cache) {
2153 /* or we've been asked not to cache it above */
2154 reason = "r->no_cache present";
2156 #endif
2157 if (reason) {
2158 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2159 "cache: %s not cached. Reason: %s", r->unparsed_uri,
2160 reason);
2162 /* remove this filter from the chain */
2163 ap_remove_output_filter(f);
2165 /* ship the data up the stack */
2166 return ap_pass_brigade(f->next, in);
2169 /* Make it so that we don't execute this path again. */
2170 cache->in_checked = 1;
2172 /* Set the content length if known.
2174 cl = apr_table_get(r->err_headers_out, "Content-Length");
2175 if (cl == NULL) {
2176 cl = apr_table_get(r->headers_out, "Content-Length");
2178 if (cl) {
2179 char *errp;
2180 if (apr_strtoff(&size, cl, &errp, 10) || *errp || size < 0) {
2181 cl = NULL; /* parse error, see next 'if' block */
2185 if (!cl) {
2186 /* if we don't get the content-length, see if we have all the
2187 * buckets and use their length to calculate the size
2189 apr_bucket *e;
2190 int all_buckets_here=0;
2191 int unresolved_length = 0;
2192 size=0;
2193 for (e = APR_BRIGADE_FIRST(in);
2194 e != APR_BRIGADE_SENTINEL(in);
2195 e = APR_BUCKET_NEXT(e))
2197 if (APR_BUCKET_IS_EOS(e)) {
2198 all_buckets_here=1;
2199 break;
2201 if (APR_BUCKET_IS_FLUSH(e)) {
2202 unresolved_length = 1;
2203 continue;
2205 if (e->length == (apr_size_t)-1) {
2206 break;
2208 size += e->length;
2210 if (!all_buckets_here) {
2211 size = -1;
2215 /* It's safe to cache the response.
2217 * There are two possiblities at this point:
2218 * - cache->handle == NULL. In this case there is no previously
2219 * cached entity anywhere on the system. We must create a brand
2220 * new entity and store the response in it.
2221 * - cache->stale_handle != NULL. In this case there is a stale
2222 * entity in the system which needs to be replaced by new
2223 * content (unless the result was 304 Not Modified, which means
2224 * the cached entity is actually fresh, and we should update
2225 * the headers).
2228 /* Did we have a stale cache entry that really is stale?
2230 * Note that for HEAD requests, we won't get the body, so for a stale
2231 * HEAD request, we don't remove the entity - instead we let the
2232 * CACHE_REMOVE_URL filter remove the stale item from the cache.
2234 if (cache->stale_handle) {
2235 if (r->status == HTTP_NOT_MODIFIED) {
2236 /* Oh, hey. It isn't that stale! Yay! */
2237 cache->handle = cache->stale_handle;
2238 info = &cache->handle->cache_obj->info;
2239 rv = OK;
2241 else if (!r->header_only) {
2242 /* Oh, well. Toss it. */
2243 remove_entity(cache->stale_handle);
2244 /* Treat the request as if it wasn't conditional. */
2245 cache->stale_handle = NULL;
2247 * Restore the original request headers as they may be needed
2248 * by further output filters like the byterange filter to make
2249 * the correct decisions.
2251 r->headers_in = cache->stale_headers;
2255 /* no cache handle, create a new entity only for non-HEAD requests */
2256 if (!cache->handle && !r->header_only) {
2257 rv = cache_create_entity(r, size);
2258 info = apr_pcalloc(r->pool, sizeof(cache_info));
2259 /* We only set info->status upon the initial creation. */
2260 info->status = r->status;
2263 if (rv != OK) {
2264 /* Caching layer declined the opportunity to cache the response */
2265 ap_remove_output_filter(f);
2266 return ap_pass_brigade(f->next, in);
2269 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2270 "cache: Caching url: %s", r->unparsed_uri);
2272 /* We are actually caching this response. So it does not
2273 * make sense to remove this entity any more.
2275 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2276 "cache: Removing CACHE_REMOVE_URL filter.");
2277 ap_remove_output_filter(cache->remove_url_filter);
2280 * We now want to update the cache file header information with
2281 * the new date, last modified, expire and content length and write
2282 * it away to our cache file. First, we determine these values from
2283 * the response, using heuristics if appropriate.
2285 * In addition, we make HTTP/1.1 age calculations and write them away
2286 * too.
2289 /* Read the date. Generate one if one is not supplied */
2290 dates = apr_table_get(r->err_headers_out, "Date");
2291 if (dates == NULL) {
2292 dates = apr_table_get(r->headers_out, "Date");
2294 if (dates != NULL) {
2295 info->date = apr_date_parse_http(dates);
2297 else {
2298 info->date = APR_DATE_BAD;
2301 now = apr_time_now();
2302 if (info->date == APR_DATE_BAD) { /* No, or bad date */
2303 /* no date header (or bad header)! */
2304 info->date = now;
2306 date = info->date;
2308 /* set response_time for HTTP/1.1 age calculations */
2309 info->response_time = now;
2311 /* get the request time */
2312 info->request_time = r->request_time;
2313 #if 0
2314 /* check last-modified date */
2315 if (lastmod != APR_DATE_BAD && lastmod > date) {
2316 /* if it's in the future, then replace by date */
2317 lastmod = date;
2318 lastmods = dates;
2319 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0,
2320 r->server,
2321 "cache: Last modified is in the future, "
2322 "replacing with now");
2325 /* if no expiry date then
2326 * if Cache-Control: max-age
2327 * expiry date = date + max-age
2328 * else if lastmod
2329 * expiry date = date + min((date - lastmod) * factor, maxexpire)
2330 * else
2331 * expire date = date + defaultexpire
2333 if (exp == APR_DATE_BAD) {
2334 char *max_age_val;
2336 if (ap_cache_liststr(r->pool, cc_out, "max-age", &max_age_val) &&
2337 max_age_val != NULL) {
2338 apr_int64_t x;
2340 errno = 0;
2341 x = apr_atoi64(max_age_val);
2342 if (errno) {
2343 x = conf->defex;
2345 else {
2346 x = x * MSEC_ONE_SEC;
2348 if (x < conf->minex) {
2349 x = conf->minex;
2351 if (x > conf->maxex) {
2352 x = conf->maxex;
2354 exp = date + x;
2356 else if ((lastmod != APR_DATE_BAD) && (lastmod < date)) {
2357 /* if lastmod == date then you get 0*conf->factor which results in
2358 * an expiration time of now. This causes some problems with
2359 * freshness calculations, so we choose the else path...
2361 apr_time_t x = (apr_time_t) ((date - lastmod) * conf->factor);
2363 if (x < conf->minex) {
2364 x = conf->minex;
2366 if (x > conf->maxex) {
2367 x = conf->maxex;
2369 exp = date + x;
2371 else {
2372 exp = date + conf->defex;
2375 #endif
2376 info->expire = exp;
2378 /* We found a stale entry which wasn't really stale. */
2379 if (cache->stale_handle) {
2380 /* Load in the saved status and clear the status line. */
2381 r->status = info->status;
2382 r->status_line = NULL;
2384 /* RFC 2616 10.3.5 states that entity headers are not supposed
2385 * to be in the 304 response. Therefore, we need to combine the
2386 * response headers with the cached headers *before* we update
2387 * the cached headers.
2389 * However, before doing that, we need to first merge in
2390 * err_headers_out and we also need to strip any hop-by-hop
2391 * headers that might have snuck in.
2393 r->headers_out = ap_cache_cacheable_headers_out(r);
2395 /* Merge in our cached headers. However, keep any updated values. */
2396 ap_cache_accept_headers(cache->handle, r, 1);
2399 /* Write away header information to cache. It is possible that we are
2400 * trying to update headers for an entity which has already been cached.
2402 * This may fail, due to an unwritable cache area. E.g. filesystem full,
2403 * permissions problems or a read-only (re)mount. This must be handled
2404 * later.
2406 rv = store_headers(cache->handle, r, info);
2408 /* Did we just update the cached headers on a revalidated response?
2410 * If so, we can now decide what to serve to the client. This is done in
2411 * the same way as with a regular response, but conditions are now checked
2412 * against the cached or merged response headers.
2414 if (cache->stale_handle) {
2415 apr_bucket_brigade *bb;
2416 apr_bucket *bkt;
2417 int status;
2419 bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2421 /* Restore the original request headers and see if we need to
2422 * return anything else than the cached response (ie. the original
2423 * request was conditional).
2425 r->headers_in = cache->stale_headers;
2426 status = ap_meets_conditions(r);
2427 if (status != OK) {
2428 r->status = status;
2430 bkt = apr_bucket_flush_create(bb->bucket_alloc);
2431 APR_BRIGADE_INSERT_TAIL(bb, bkt);
2433 else {
2434 recall_body(cache->handle, r->pool, bb);
2437 cache->block_response = 1;
2439 /* Before returning we need to handle the possible case of an
2440 * unwritable cache. Rather than leaving the entity in the cache
2441 * and having it constantly re-validated, now that we have recalled
2442 * the body it is safe to try and remove the url from the cache.
2444 if (rv != APR_SUCCESS) {
2445 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2446 "cache: updating headers with store_headers failed. "
2447 "Removing cached url.");
2449 rv = remove_url(cache->stale_handle, r->pool);
2450 if (rv != OK) {
2451 /* Probably a mod_disk_cache cache area has been (re)mounted
2452 * read-only, or that there is a permissions problem.
2454 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2455 "cache: attempt to remove url from cache unsuccessful.");
2459 return ap_pass_brigade(f->next, bb);
2462 if(rv != APR_SUCCESS) {
2463 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2464 "cache: store_headers failed");
2465 ap_remove_output_filter(f);
2467 return ap_pass_brigade(f->next, in);
2470 rv = store_body(cache->handle, r, in);
2471 if (rv != APR_SUCCESS) {
2472 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
2473 "cache: store_body failed");
2474 ap_remove_output_filter(f);
2477 return ap_pass_brigade(f->next, in);
2482 * CACHE_REMOVE_URL filter
2483 * ---------------
2485 * This filter gets added in the quick handler every time the CACHE_SAVE filter
2486 * gets inserted. Its purpose is to remove a confirmed stale cache entry from
2487 * the cache.
2489 * CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
2490 * the response is a canned error message, which removes the content filters
2491 * and thus the CACHE_SAVE filter from the chain.
2493 * CACHE_REMOVE_URL expects cache request rec within its context because the
2494 * request this filter runs on can be different from the one whose cache entry
2495 * should be removed, due to internal redirects.
2497 * Note that CACHE_SAVE_URL (as a content-set filter, hence run before the
2498 * protocol filters) will remove this filter if it decides to cache the file.
2499 * Therefore, if this filter is left in, it must mean we need to toss any
2500 * existing files.
2502 int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in)
2504 request_rec *r = f->r;
2505 cache_request_rec *cache;
2507 /* Setup cache_request_rec */
2508 cache = (cache_request_rec *) f->ctx;
2510 if (!cache) {
2511 /* user likely configured CACHE_REMOVE_URL manually; they should really
2512 * use mod_cache configuration to do that. So:
2513 * 1. Remove ourselves
2514 * 2. Do nothing and bail out
2516 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
2517 "cache: CACHE_REMOVE_URL enabled unexpectedly");
2518 ap_remove_output_filter(f);
2519 return ap_pass_brigade(f->next, in);
2521 /* Now remove this cache entry from the cache */
2522 cache_remove_url(cache, r->pool);
2524 /* remove ourselves */
2525 ap_remove_output_filter(f);
2526 return ap_pass_brigade(f->next, in);
2529 static void disk_cache_register_hook(apr_pool_t *p) {
2530 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
2531 "Registering crccache client module, (C) 2009, Toby Collett");
2533 /* cache initializer */
2534 /* cache handler */
2535 ap_hook_quick_handler(cache_url_handler, NULL, NULL, APR_HOOK_FIRST);
2536 /* cache filters
2537 * XXX The cache filters need to run right after the handlers and before
2538 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
2540 * Depending on the type of request (subrequest / main request) they
2541 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
2542 * filters. Thus create two filter handles for each type:
2543 * cache_save_filter_handle / cache_out_filter_handle to be used by
2544 * main requests and
2545 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
2546 * to be run by subrequest
2549 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
2550 * filter to ensure that the compressed content is stored.
2551 * Incrementing filter type by 1 ensures his happens.
2553 cache_save_filter_handle =
2554 ap_register_output_filter("CACHE_SAVE",
2555 cache_save_filter,
2556 NULL,
2557 AP_FTYPE_CONTENT_SET+1);
2559 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
2560 * handle subrequsts. Decrementing filter type by 1 ensures this
2561 * happens.
2563 cache_save_subreq_filter_handle =
2564 ap_register_output_filter("CACHE_SAVE_SUBREQ",
2565 cache_save_filter,
2566 NULL,
2567 AP_FTYPE_CONTENT_SET-1);
2569 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2570 * filter to ensure that already compressed cache objects do not
2571 * get compressed again. Incrementing filter type by 1 ensures
2572 * his happens.
2574 cache_out_filter_handle =
2575 ap_register_output_filter("CACHE_OUT",
2576 cache_out_filter,
2577 NULL,
2578 AP_FTYPE_CONTENT_SET+1);
2580 * CACHE_OUT_SUBREQ must go into the filter chain before SUBREQ_CORE to
2581 * handle subrequsts. Decrementing filter type by 1 ensures this
2582 * happens.
2584 cache_out_subreq_filter_handle =
2585 ap_register_output_filter("CACHE_OUT_SUBREQ",
2586 cache_out_filter,
2587 NULL,
2588 AP_FTYPE_CONTENT_SET-1);
2589 /* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
2590 * run even if the response is a canned error message, which
2591 * removes the content filters.
2593 cache_remove_url_filter_handle =
2594 ap_register_output_filter("CACHE_REMOVE_URL",
2595 cache_remove_url_filter,
2596 NULL,
2597 AP_FTYPE_PROTOCOL);
2600 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2601 * filter to ensure that already compressed cache objects do not
2602 * get compressed again. Incrementing filter type by 1 ensures
2603 * his happens.
2605 crccache_decode_filter_handle = ap_register_output_filter(
2606 "CRCCACHE_DECODE", crccache_decode_filter, NULL,
2607 AP_FTYPE_CONTENT_SET + 1);
2609 ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
2613 module AP_MODULE_DECLARE_DATA crccache_client_module = {
2614 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
2615 NULL , /* merge per-directory config structures */
2616 create_config, /* create per-server config structure */
2617 NULL , /* merge per-server config structures */
2618 disk_cache_cmds, /* command apr_table_t */
2619 disk_cache_register_hook /* register hooks */