Add Capability header to server. Use ; as field-separater in headers
[httpd-crcsyncproxy.git] / crccache / mod_crccache_client.c
blob2f2cbafd7589bd835246aba6d7fccbd5c5bf2cfb
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
32 #include <assert.h>
34 #include <apr_file_io.h>
35 #include <apr_strings.h>
36 #include <apr_base64.h>
37 #include <apr_lib.h>
38 #include <apr_date.h>
39 #include <apr_tables.h>
40 #include "ap_provider.h"
41 #include "util_filter.h"
42 #include "util_script.h"
43 #include "util_charset.h"
44 #include <http_log.h>
45 #include <http_protocol.h>
47 #include "crccache.h"
48 #include "ap_wrapper.h"
49 #include <crcsync/crcsync.h>
50 #include <crc/crc.h>
51 #include <zlib.h>
53 #include "mod_crccache_client.h"
55 static ap_filter_rec_t *crccache_decode_filter_handle;
56 static ap_filter_rec_t *cache_save_filter_handle;
57 static ap_filter_rec_t *cache_save_subreq_filter_handle;
59 module AP_MODULE_DECLARE_DATA crccache_client_module;
60 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
63 static int crccache_client_post_config(apr_pool_t *p, apr_pool_t *plog,
64 apr_pool_t *ptemp, server_rec *s)
66 /* This is the means by which unusual (non-unix) os's may find alternate
67 * means to run a given command (e.g. shebang/registry parsing on Win32)
69 cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
70 if (!cache_generate_key) {
71 cache_generate_key = cache_generate_key_default;
73 return OK;
77 /**
78 * Clean-up memory used by helper libraries, that don't know about apr_palloc
79 * and that (probably) use classical malloc/free
81 apr_status_t deflate_ctx_cleanup(void *data)
83 crccache_client_ctx *ctx = (crccache_client_ctx *)data;
85 if (ctx != NULL)
87 if (ctx->decompression_state != DECOMPRESSION_ENDED)
89 inflateEnd(ctx->decompression_stream);
90 ctx->decompression_state = DECOMPRESSION_ENDED;
93 return APR_SUCCESS;
98 * Reads headers from a buffer and returns an array of headers.
99 * Returns NULL on file error
100 * This routine tries to deal with too long lines and continuation lines.
101 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
102 * Is that okay, or should they be collapsed where possible?
104 apr_status_t recall_headers(cache_handle_t *h, request_rec *r) {
105 const char *data;
106 apr_size_t len;
107 apr_bucket *e;
108 unsigned i;
109 int z_RC;
111 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
113 /* This case should not happen... */
114 if (!dobj->hfd) {
115 /* XXX log message */
116 return APR_NOTFOUND;
119 h->req_hdrs = apr_table_make(r->pool, 20);
120 h->resp_hdrs = apr_table_make(r->pool, 20);
122 /* Call routine to read the header lines/status line */
123 read_table(h, r, h->resp_hdrs, dobj->hfd);
124 read_table(h, r, h->req_hdrs, dobj->hfd);
126 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, r->pool,
127 r->connection->bucket_alloc);
129 /* read */
130 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
132 // this will be rounded down, but thats okay
133 size_t blocksize = len/FULL_BLOCK_COUNT;
134 size_t tail_block_size = blocksize + len % FULL_BLOCK_COUNT;
135 size_t block_count_including_final_block = FULL_BLOCK_COUNT;
136 // sanity check for very small files
137 if (blocksize> 4)
139 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes, one block of %ld bytes",FULL_BLOCK_COUNT-1,blocksize,tail_block_size);
141 crccache_client_ctx * ctx;
142 ctx = apr_pcalloc(r->pool, sizeof(*ctx));
143 ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
144 ctx->block_size = blocksize;
145 ctx->tail_block_size = tail_block_size;
146 ctx->state = DECODING_NEW_SECTION;
147 ctx->cached_bucket = e;
149 // Setup inflate for decompressing non-matched literal data
150 ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream)));
151 ctx->decompression_stream->zalloc = Z_NULL;
152 ctx->decompression_stream->zfree = Z_NULL;
153 ctx->decompression_stream->opaque = Z_NULL;
154 ctx->decompression_stream->avail_in = 0;
155 ctx->decompression_stream->next_in = Z_NULL;
156 z_RC = inflateInit(ctx->decompression_stream);
157 if (z_RC != Z_OK)
159 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server,
160 "Can not initialize decompression engine, return code: %d", z_RC);
161 return APR_SUCCESS;
163 ctx->decompression_state = DECOMPRESSION_INITIALIZED;
165 // Register a cleanup function to cleanup internal libz resources
166 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
167 apr_pool_cleanup_null);
169 // All OK to go for the crcsync decoding: add the headers
170 // and set-up the decoding filter
172 // add one for base 64 overflow and null terminator
173 char hash_set[HASH_HEADER_SIZE+1];
175 uint64_t crcs[block_count_including_final_block];
176 crc_of_blocks(data, len, blocksize, HASH_SIZE, true, crcs);
178 // swap to network byte order
179 for (i = 0; i < block_count_including_final_block;++i)
181 htobe64(crcs[i]);
184 apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0]));
185 hash_set[HASH_HEADER_SIZE] = '\0';
186 //apr_bucket_delete(e);
188 // TODO; bit of a safety margin here, could calculate exact size
189 const int block_header_max_size = HASH_HEADER_SIZE+40;
190 char block_header_txt[block_header_max_size];
191 snprintf(block_header_txt, block_header_max_size,"v=1; fs=%zu; h=%s",len,hash_set);
192 apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt);
193 // TODO: do we want to cache the hashes here?
195 // initialise the context for our sha1 digest of the unencoded response
196 EVP_MD_CTX_init(&ctx->mdctx);
197 const EVP_MD *md = EVP_sha1();
198 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
200 // we want to add a filter here so that we can decode the response.
201 // we need access to the original cached data when we get the response as
202 // we need that to fill in the matched blocks.
203 // TODO: does the original cached data file remain open between this request
204 // and the subsequent response or do we run the risk that a concurrent
205 // request modifies it?
206 ap_add_output_filter_handle(crccache_decode_filter_handle,
207 ctx, r, r->connection);
209 // TODO: why is hfd file only closed in this case?
210 apr_file_close(dobj->hfd);
212 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
213 "crccache_client: Recalled headers for URL %s", dobj->name);
214 return APR_SUCCESS;
218 * CACHE_DECODE filter
219 * ----------------
221 * Deliver cached content (headers and body) up the stack.
223 static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
224 apr_bucket *e;
225 request_rec *r = f->r;
226 // TODO: set up context type struct
227 crccache_client_ctx *ctx = f->ctx;
229 // if this is the first pass in decoding we should check the headers etc
230 // and fix up those headers that we modified as part of the encoding
231 if (ctx->headers_checked == 0)
233 ctx->headers_checked = 1;
235 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
236 "CRCSYNC returned status code (%d)", r->status);
238 // TODO: make this work if we have multiple encodings
239 const char * content_encoding;
240 content_encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
241 if (content_encoding == NULL || strcmp(CRCCACHE_ENCODING, content_encoding)
242 != 0) {
243 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
244 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding?content_encoding:"NULL");
245 ap_remove_output_filter(f);
246 return ap_pass_brigade(f->next, bb);
249 // remove the encoding header
250 apr_table_unset(r->headers_out, ENCODING_HEADER);
252 // remove If-Block from the Vary header
253 char * vary = apr_pstrdup(r->pool, apr_table_get(r->headers_out, "Vary"));
254 if (vary)
256 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Incoming Vary header: %s", vary);
257 apr_table_unset(r->headers_out, "Vary");
258 char * tok;
259 char * last = NULL;
260 for (tok = apr_strtok(vary,", ",&last);tok != NULL;tok = apr_strtok(NULL,", ",&last))
262 if (strcmp(BLOCK_HEADER,tok)!=0)
264 apr_table_mergen(r->headers_out,"Vary",tok);
269 // fix up etag
270 char * etag = apr_pstrdup(r->pool, apr_table_get(r->headers_out, ETAG_HEADER));
271 if (etag)
273 // TODO: get original encoding from etag header so that it can be re-applied
274 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Incoming ETag header: %s", etag);
275 int etaglen = strlen(etag);
276 if (etaglen>strlen(CRCCACHE_ENCODING) + 1)
278 if (strcmp("-"CRCCACHE_ENCODING,&etag[etaglen-(strlen(CRCCACHE_ENCODING) + 1)])==0)
280 etag[etaglen-(strlen(CRCCACHE_ENCODING) + 1)] = '\0';
281 apr_table_setn(r->headers_out,"etag",etag);
289 /* Do nothing if asked to filter nothing. */
290 if (APR_BRIGADE_EMPTY(bb)) {
291 return ap_pass_brigade(f->next, bb);
294 /* We require that we have a context already, otherwise we dont have our cached file
295 * to fill in the gaps with.
297 if (!ctx) {
298 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
299 "No context available %s", r->uri);
300 ap_remove_output_filter(f);
301 return ap_pass_brigade(f->next, bb);
304 while (!APR_BRIGADE_EMPTY(bb))
306 const char *data;
307 apr_size_t len;
309 e = APR_BRIGADE_FIRST(bb);
311 if (APR_BUCKET_IS_EOS(e)) {
313 /* Remove EOS from the old list, and insert into the new. */
314 APR_BUCKET_REMOVE(e);
315 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
317 /* This filter is done once it has served up its content */
318 ap_remove_output_filter(f);
320 // check strong hash here
321 unsigned md_len;
322 unsigned char md_value[EVP_MAX_MD_SIZE];
323 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
324 EVP_MD_CTX_cleanup(&ctx->mdctx);
326 if (memcmp(md_value, ctx->md_value_rx, 20) != 0)
328 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED");
329 apr_brigade_cleanup(bb);
330 return APR_EGENERAL;
332 else
334 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED");
337 /* Okay, we've seen the EOS.
338 * Time to pass it along down the chain.
340 return ap_pass_brigade(f->next, ctx->bb);
343 if (APR_BUCKET_IS_FLUSH(e)) {
344 apr_status_t rv;
346 /* Remove flush bucket from old brigade anf insert into the new. */
347 APR_BUCKET_REMOVE(e);
348 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
349 rv = ap_pass_brigade(f->next, ctx->bb);
350 if (rv != APR_SUCCESS) {
351 return rv;
353 continue;
356 if (APR_BUCKET_IS_METADATA(e)) {
358 * Remove meta data bucket from old brigade and insert into the
359 * new.
361 APR_BUCKET_REMOVE(e);
362 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
363 continue;
366 /* read */
367 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
368 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
370 apr_size_t consumed_bytes = 0;
371 while (consumed_bytes < len)
373 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
374 // no guaruntee that our buckets line up with our encoding sections
375 // so we need a processing state machine stored in our context
376 switch (ctx->state)
378 case DECODING_NEW_SECTION:
380 // check if we have a compressed section or a block section
381 if (data[consumed_bytes] == ENCODING_COMPRESSED)
382 ctx->state = DECODING_COMPRESSED;
383 else if (data[consumed_bytes] == ENCODING_BLOCK)
384 ctx->state = DECODING_BLOCK_HEADER;
385 else if (data[consumed_bytes] == ENCODING_LITERAL)
387 ctx->state = DECODING_LITERAL_SIZE;
388 ctx->partial_literal = NULL;
389 ctx->rx_count = 0;
391 else if (data[consumed_bytes] == ENCODING_HASH)
393 ctx->state = DECODING_HASH;
394 ctx->rx_count = 0;
396 else
398 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
399 "CRCSYNC-DECODE, unknown section %d(%c)",data[consumed_bytes],data[consumed_bytes]);
400 apr_brigade_cleanup(bb);
401 return APR_EGENERAL;
403 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
404 consumed_bytes++;
405 break;
407 case DECODING_BLOCK_HEADER:
409 unsigned char block_number = data[consumed_bytes];
410 consumed_bytes++;
411 ctx->state = DECODING_NEW_SECTION;
413 // TODO: Output the indicated block here
414 size_t current_block_size = block_number < FULL_BLOCK_COUNT-1 ? ctx->block_size : ctx->tail_block_size;
415 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
416 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number, current_block_size);
418 char * buf = apr_palloc(r->pool, current_block_size);
419 const char * source_data;
420 size_t source_len;
421 apr_bucket_read(ctx->cached_bucket, &source_data, &source_len, APR_BLOCK_READ);
422 assert(block_number < (FULL_BLOCK_COUNT /*+ (ctx->tail_block_size != 0)*/));
423 memcpy(buf,&source_data[block_number*ctx->block_size],current_block_size);
424 // update our sha1 hash
425 EVP_DigestUpdate(&ctx->mdctx, buf, current_block_size);
426 apr_bucket * b = apr_bucket_pool_create(buf, current_block_size, r->pool, f->c->bucket_alloc);
427 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
428 break;
430 case DECODING_LITERAL_SIZE:
432 unsigned avail_in = len - consumed_bytes;
433 // if we havent got the full int then store the data for later
434 if (avail_in < 4 || ctx->rx_count != 0)
436 if (ctx->partial_literal == NULL)
438 ctx->partial_literal = apr_palloc(r->pool, 4);
440 unsigned len_to_copy = MIN(4-ctx->rx_count, avail_in);
441 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
442 ctx->rx_count += len_to_copy;
443 consumed_bytes += len_to_copy;
445 if (ctx->rx_count == 4)
447 ctx->literal_size = ntohl(*(unsigned*)ctx->partial_literal);
448 ctx->rx_count = 0;
450 else
452 break;
455 else
457 ctx->literal_size = ntohl(*(unsigned*)&data[consumed_bytes]);
458 consumed_bytes += 4;
460 ctx->partial_literal = apr_palloc(r->pool, ctx->literal_size);
461 ctx->state = DECODING_LITERAL_BODY;
462 break;
464 case DECODING_LITERAL_BODY:
466 unsigned avail_in = len - consumed_bytes;
467 unsigned len_to_copy = MIN(ctx->literal_size-ctx->rx_count, avail_in);
468 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
469 ctx->rx_count += len_to_copy;
470 consumed_bytes += len_to_copy;
472 if (ctx->rx_count == ctx->literal_size)
474 EVP_DigestUpdate(&ctx->mdctx, ctx->partial_literal, ctx->literal_size);
475 apr_bucket * b = apr_bucket_pool_create((char*)ctx->partial_literal, ctx->literal_size, r->pool, f->c->bucket_alloc);
476 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
477 ctx->state = DECODING_NEW_SECTION;
480 break;
482 case DECODING_HASH:
484 unsigned avail_in = len - consumed_bytes;
485 // 20 bytes for an SHA1 hash
486 unsigned needed = MIN(20-ctx->rx_count, avail_in);
487 memcpy(&ctx->md_value_rx[ctx->rx_count], &data[consumed_bytes],needed);
488 ctx->rx_count+=needed;
489 consumed_bytes += needed;
490 if (ctx->rx_count == 20)
492 ctx->state = DECODING_NEW_SECTION;
494 break;
496 case DECODING_COMPRESSED:
498 unsigned char decompressed_data_buf[30000];
499 int z_RC;
500 z_stream *strm = ctx->decompression_stream;
501 strm->avail_in = len - consumed_bytes;
502 strm->next_in = (Bytef *)(data + consumed_bytes);
503 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
504 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
505 do {
506 strm->avail_out = sizeof(decompressed_data_buf);
507 strm->next_out = decompressed_data_buf;
508 uInt avail_in_pre_inflate = strm->avail_in;
509 z_RC = inflate(strm, Z_NO_FLUSH);
510 if (z_RC == Z_NEED_DICT || z_RC == Z_DATA_ERROR || z_RC == Z_MEM_ERROR)
512 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server, "CRCSYNC-DECODE inflate error: %d", z_RC);
513 apr_brigade_cleanup(bb);
514 return APR_EGENERAL;
516 int have = sizeof(decompressed_data_buf) - strm->avail_out;
517 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
518 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
519 z_RC, avail_in_pre_inflate - strm->avail_in, have);
520 if (have)
522 // write output data
523 char * buf = apr_palloc(r->pool, have);
524 memcpy(buf,decompressed_data_buf,have);
525 EVP_DigestUpdate(&ctx->mdctx, buf, have);
526 apr_bucket * b = apr_bucket_pool_create(buf, have, r->pool, f->c->bucket_alloc);
527 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
529 } while (strm->avail_out == 0);
530 consumed_bytes = len - strm->avail_in;
531 if (z_RC == Z_STREAM_END)
533 ctx->state = DECODING_NEW_SECTION;
534 inflateReset(strm);
536 break;
538 default:
540 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
541 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx->state);
542 apr_brigade_cleanup(bb);
543 return APR_EGENERAL; // TODO: figure out how to pass the error on to the client
546 APR_BUCKET_REMOVE(e);
550 apr_brigade_cleanup(bb);
551 return APR_SUCCESS;
554 static void *crccache_client_create_config(apr_pool_t *p, server_rec *s) {
555 crccache_client_conf *conf = apr_pcalloc(p, sizeof(crccache_client_conf));
556 /* array of URL prefixes for which caching is enabled */
557 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
558 /* array of URL prefixes for which caching is disabled */
559 conf->cachedisable = apr_array_make(p, 10, sizeof(struct cache_disable));
561 /* XXX: Set default values */
562 conf->dirlevels = DEFAULT_DIRLEVELS;
563 conf->dirlength = DEFAULT_DIRLENGTH;
564 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
565 conf->minfs = DEFAULT_MIN_FILE_SIZE;
567 conf->cache_root = NULL;
568 conf->cache_root_len = 0;
570 return conf;
574 * mod_disk_cache configuration directives handlers.
576 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
577 const char *arg) {
578 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
579 &crccache_client_module);
580 conf->cache_root = arg;
581 conf->cache_root_len = strlen(arg);
582 /* TODO: canonicalize cache_root and strip off any trailing slashes */
584 return NULL;
588 * Consider eliminating the next two directives in favor of
589 * Ian's prime number hash...
590 * key = hash_fn( r->uri)
591 * filename = "/key % prime1 /key %prime2/key %prime3"
593 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
594 const char *arg) {
595 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
596 &crccache_client_module);
597 int val = atoi(arg);
598 if (val < 1)
599 return "CacheDirLevelsClient value must be an integer greater than 0";
600 if (val * conf->dirlength > CACHEFILE_LEN)
601 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
602 conf->dirlevels = val;
603 return NULL;
605 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
606 const char *arg) {
607 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
608 &crccache_client_module);
609 int val = atoi(arg);
610 if (val < 1)
611 return "CacheDirLengthClient value must be an integer greater than 0";
612 if (val * conf->dirlevels > CACHEFILE_LEN)
613 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
615 conf->dirlength = val;
616 return NULL;
619 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
620 const char *arg) {
621 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
622 &crccache_client_module);
624 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->minfs
625 < 0) {
626 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
628 return NULL;
631 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
632 const char *arg) {
633 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
634 &crccache_client_module);
635 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->maxfs
636 < 0) {
637 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
639 return NULL;
642 static const char *add_crc_client_enable(cmd_parms *parms, void *dummy,
643 const char *url)
645 crccache_client_conf *conf;
646 struct cache_enable *new;
648 conf =
649 (crccache_client_conf *)ap_get_module_config(parms->server->module_config,
650 &crccache_client_module);
651 new = apr_array_push(conf->cacheenable);
652 if (apr_uri_parse(parms->pool, url, &(new->url))) {
653 return NULL;
655 if (new->url.path) {
656 new->pathlen = strlen(new->url.path);
657 } else {
658 new->pathlen = 1;
659 new->url.path = "/";
661 return NULL;
664 static const command_rec crccache_client_cmds[] =
666 AP_INIT_TAKE1("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A cache type and partial URL prefix below which caching is enabled"),
667 AP_INIT_TAKE1("CacheRootClient", set_cache_root, NULL, RSRC_CONF,"The directory to store cache files"),
668 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"),
669 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"),
670 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"),
671 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"),
672 { NULL }
675 int ap_run_insert_filter(request_rec *r);
677 int crccache_client_url_handler(request_rec *r, int lookup)
679 const char *auth;
680 cache_request_rec *cache;
681 crccache_client_conf *conf;
683 /* Delay initialization until we know we are handling a GET */
684 if (r->method_number != M_GET) {
685 return DECLINED;
688 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
689 &crccache_client_module);
691 if (conf->cacheenable->nelts == 0)
692 return DECLINED;
694 /* make space for the per request config */
695 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
696 &crccache_client_module);
697 if (!cache) {
698 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
699 ap_set_module_config(r->request_config, &crccache_client_module, cache);
703 * Are we allowed to serve cached info at all?
706 /* find certain cache controlling headers */
707 auth = apr_table_get(r->headers_in, "Authorization");
709 /* First things first - does the request allow us to return
710 * cached information at all? If not, just decline the request.
712 if (auth) {
713 return DECLINED;
717 * Add cache_save filter to cache this request. Choose
718 * the correct filter by checking if we are a subrequest
719 * or not.
721 if (r->main) {
722 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
723 r->server,
724 "Adding CACHE_SAVE_SUBREQ filter for %s",
725 r->uri);
726 ap_add_output_filter_handle(cache_save_subreq_filter_handle,
727 NULL, r, r->connection);
729 else {
730 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
731 r->server, "Adding CACHE_SAVE filter for %s",
732 r->uri);
733 ap_add_output_filter_handle(cache_save_filter_handle,
734 NULL, r, r->connection);
737 cache_handle_t *h;
738 char *key;
740 if (cache_generate_key(r, r->pool, &key) != APR_SUCCESS) {
741 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
742 r->server, "Failed to generate key");
743 return DECLINED;
745 h = apr_palloc(r->pool, sizeof(cache_handle_t));
746 if (open_entity(h, r, key) != OK)
748 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
749 r->server, "Failed to open entity not good");
750 return DECLINED;
752 if (recall_headers(h, r) != APR_SUCCESS) {
753 /* TODO: Handle this error */
754 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
755 r->server, "Failed to recall headers");
756 return DECLINED;
758 cache->handle = h;
759 return DECLINED;
765 * CACHE_SAVE filter
766 * ---------------
768 * Decide whether or not this content should be cached.
769 * If we decide no it should not:
770 * remove the filter from the chain
771 * If we decide yes it should:
772 * Have we already started saving the response?
773 * If we have started, pass the data to the storage manager via store_body
774 * Otherwise:
775 * Check to see if we *can* save this particular response.
776 * If we can, call cache_create_entity() and save the headers and body
777 * Finally, pass the data to the next filter (the network or whatever)
780 int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
782 int rv = !OK;
783 request_rec *r = f->r;
784 cache_request_rec *cache;
785 crccache_client_conf *conf;
786 //const char *cc_out, *cl;
787 const char *cl;
788 const char *exps, /* *lastmods,*/ *dates;//, *etag;
789 apr_time_t exp, date,/* lastmod,*/ now;
790 apr_off_t size;
791 cache_info *info = NULL;
792 char *reason;
793 apr_pool_t *p;
795 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
796 &crccache_client_module);
798 /* Setup cache_request_rec */
799 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
800 &crccache_client_module);
801 if (!cache) {
802 /* user likely configured CACHE_SAVE manually; they should really use
803 * mod_cache configuration to do that
805 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
806 ap_set_module_config(r->request_config, &crccache_client_module, cache);
809 reason = NULL;
810 p = r->pool;
812 * Pass Data to Cache
813 * ------------------
814 * This section passes the brigades into the cache modules, but only
815 * if the setup section (see below) is complete.
817 if (cache->block_response) {
818 /* We've already sent down the response and EOS. So, ignore
819 * whatever comes now.
821 return APR_SUCCESS;
824 /* have we already run the cachability check and set up the
825 * cached file handle?
827 if (cache->in_checked) {
828 /* pass the brigades into the cache, then pass them
829 * up the filter stack
831 rv = store_body(cache->handle, r, in);
832 if (rv != APR_SUCCESS) {
833 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
834 "cache: Cache provider's store_body failed!");
835 ap_remove_output_filter(f);
837 return ap_pass_brigade(f->next, in);
841 * Setup Data in Cache
842 * -------------------
843 * This section opens the cache entity and sets various caching
844 * parameters, and decides whether this URL should be cached at
845 * all. This section is* run before the above section.
848 /* read expiry date; if a bad date, then leave it so the client can
849 * read it
851 exps = apr_table_get(r->err_headers_out, "Expires");
852 if (exps == NULL) {
853 exps = apr_table_get(r->headers_out, "Expires");
855 if (exps != NULL) {
856 if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) {
857 exps = NULL;
860 else {
861 exp = APR_DATE_BAD;
865 * what responses should we not cache?
867 * At this point we decide based on the response headers whether it
868 * is appropriate _NOT_ to cache the data from the server. There are
869 * a whole lot of conditions that prevent us from caching this data.
870 * They are tested here one by one to be clear and unambiguous.
872 if (r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE
873 && r->status != HTTP_MULTIPLE_CHOICES
874 && r->status != HTTP_MOVED_PERMANENTLY
875 && r->status != HTTP_NOT_MODIFIED) {
876 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
877 * We don't cache 206, because we don't (yet) cache partial responses.
878 * We include 304 Not Modified here too as this is the origin server
879 * telling us to serve the cached copy.
883 if (reason) {
884 /* noop */
887 else if (r->status == HTTP_NOT_MODIFIED &&
888 !cache->handle && !cache->stale_handle) {
889 /* if the server said 304 Not Modified but we have no cache
890 * file - pass this untouched to the user agent, it's not for us.
892 reason = "HTTP Status 304 Not Modified";
895 else if (r->header_only && !cache->stale_handle) {
896 /* Forbid HEAD requests unless we have it cached already */
897 reason = "HTTP HEAD request";
899 if (reason) {
900 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
901 "cache: %s not cached. Reason: %s", r->unparsed_uri,
902 reason);
904 /* remove this filter from the chain */
905 ap_remove_output_filter(f);
907 /* ship the data up the stack */
908 return ap_pass_brigade(f->next, in);
911 /* Make it so that we don't execute this path again. */
912 cache->in_checked = 1;
914 /* Set the content length if known.
916 cl = apr_table_get(r->err_headers_out, "Content-Length");
917 if (cl == NULL) {
918 cl = apr_table_get(r->headers_out, "Content-Length");
920 if (cl) {
921 char *errp;
922 if (apr_strtoff(&size, cl, &errp, 10) || *errp || size < 0) {
923 cl = NULL; /* parse error, see next 'if' block */
927 if (!cl) {
928 /* if we don't get the content-length, see if we have all the
929 * buckets and use their length to calculate the size
931 apr_bucket *e;
932 int all_buckets_here=0;
933 int unresolved_length = 0;
934 size=0;
935 for (e = APR_BRIGADE_FIRST(in);
936 e != APR_BRIGADE_SENTINEL(in);
937 e = APR_BUCKET_NEXT(e))
939 if (APR_BUCKET_IS_EOS(e)) {
940 all_buckets_here=1;
941 break;
943 if (APR_BUCKET_IS_FLUSH(e)) {
944 unresolved_length = 1;
945 continue;
947 if (e->length == (apr_size_t)-1) {
948 break;
950 size += e->length;
952 if (!all_buckets_here) {
953 size = -1;
957 /* It's safe to cache the response.
959 * There are two possiblities at this point:
960 * - cache->handle == NULL. In this case there is no previously
961 * cached entity anywhere on the system. We must create a brand
962 * new entity and store the response in it.
963 * - cache->stale_handle != NULL. In this case there is a stale
964 * entity in the system which needs to be replaced by new
965 * content (unless the result was 304 Not Modified, which means
966 * the cached entity is actually fresh, and we should update
967 * the headers).
970 /* Did we have a stale cache entry that really is stale?
972 * Note that for HEAD requests, we won't get the body, so for a stale
973 * HEAD request, we don't remove the entity - instead we let the
974 * CACHE_REMOVE_URL filter remove the stale item from the cache.
976 if (cache->stale_handle) {
977 if (r->status == HTTP_NOT_MODIFIED) {
978 /* Oh, hey. It isn't that stale! Yay! */
979 cache->handle = cache->stale_handle;
980 info = &cache->handle->cache_obj->info;
981 rv = OK;
983 else if (!r->header_only) {
984 /* Oh, well. Toss it. */
985 remove_entity(cache->stale_handle);
986 /* Treat the request as if it wasn't conditional. */
987 cache->stale_handle = NULL;
989 * Restore the original request headers as they may be needed
990 * by further output filters like the byterange filter to make
991 * the correct decisions.
993 r->headers_in = cache->stale_headers;
997 /* no cache handle, create a new entity only for non-HEAD requests */
998 if (!cache->handle && !r->header_only) {
999 char *key;
1000 cache_handle_t *h = apr_pcalloc(r->pool, sizeof(cache_handle_t));
1001 rv = cache_generate_key(r, r->pool, &key);
1002 if (rv != APR_SUCCESS) {
1003 return rv;
1005 rv = create_entity(h, r, key, size);
1006 if (rv != APR_SUCCESS) {
1007 return rv;
1009 cache->handle = h;
1010 info = apr_pcalloc(r->pool, sizeof(cache_info));
1011 /* We only set info->status upon the initial creation. */
1012 info->status = r->status;
1015 if (rv != OK) {
1016 /* Caching layer declined the opportunity to cache the response */
1017 ap_remove_output_filter(f);
1018 return ap_pass_brigade(f->next, in);
1021 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1022 "cache: Caching url: %s", r->unparsed_uri);
1024 /* We are actually caching this response. So it does not
1025 * make sense to remove this entity any more.
1027 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1028 "cache: Removing CACHE_REMOVE_URL filter.");
1029 //ap_remove_output_filter(cache->remove_url_filter);
1032 * We now want to update the cache file header information with
1033 * the new date, last modified, expire and content length and write
1034 * it away to our cache file. First, we determine these values from
1035 * the response, using heuristics if appropriate.
1037 * In addition, we make HTTP/1.1 age calculations and write them away
1038 * too.
1041 /* Read the date. Generate one if one is not supplied */
1042 dates = apr_table_get(r->err_headers_out, "Date");
1043 if (dates == NULL) {
1044 dates = apr_table_get(r->headers_out, "Date");
1046 if (dates != NULL) {
1047 info->date = apr_date_parse_http(dates);
1049 else {
1050 info->date = APR_DATE_BAD;
1053 now = apr_time_now();
1054 if (info->date == APR_DATE_BAD) { /* No, or bad date */
1055 /* no date header (or bad header)! */
1056 info->date = now;
1058 date = info->date;
1060 /* set response_time for HTTP/1.1 age calculations */
1061 info->response_time = now;
1063 /* get the request time */
1064 info->request_time = r->request_time;
1066 info->expire = exp;
1068 /* We found a stale entry which wasn't really stale. */
1069 if (cache->stale_handle) {
1070 /* Load in the saved status and clear the status line. */
1071 r->status = info->status;
1072 r->status_line = NULL;
1074 /* RFC 2616 10.3.5 states that entity headers are not supposed
1075 * to be in the 304 response. Therefore, we need to combine the
1076 * response headers with the cached headers *before* we update
1077 * the cached headers.
1079 * However, before doing that, we need to first merge in
1080 * err_headers_out and we also need to strip any hop-by-hop
1081 * headers that might have snuck in.
1083 r->headers_out = ap_cache_cacheable_headers_out(r);
1085 /* Merge in our cached headers. However, keep any updated values. */
1086 ap_cache_accept_headers(cache->handle, r, 1);
1089 /* Write away header information to cache. It is possible that we are
1090 * trying to update headers for an entity which has already been cached.
1092 * This may fail, due to an unwritable cache area. E.g. filesystem full,
1093 * permissions problems or a read-only (re)mount. This must be handled
1094 * later.
1096 rv = store_headers(cache->handle, r, info);
1098 if(rv != APR_SUCCESS) {
1099 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1100 "cache: store_headers failed");
1101 ap_remove_output_filter(f);
1103 return ap_pass_brigade(f->next, in);
1106 rv = store_body(cache->handle, r, in);
1107 if (rv != APR_SUCCESS) {
1108 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1109 "cache: store_body failed");
1110 ap_remove_output_filter(f);
1113 return ap_pass_brigade(f->next, in);
1116 static void crccache_client_register_hook(apr_pool_t *p) {
1117 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
1118 "Registering crccache client module, (C) 2009, Toby Collett");
1120 /* cache initializer */
1121 ap_hook_post_config(crccache_client_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
1122 /* cache handler */
1123 ap_hook_quick_handler(crccache_client_url_handler, NULL, NULL, APR_HOOK_FIRST);
1124 /* cache filters
1125 * XXX The cache filters need to run right after the handlers and before
1126 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
1128 * Depending on the type of request (subrequest / main request) they
1129 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
1130 * filters. Thus create two filter handles for each type:
1131 * cache_save_filter_handle / cache_out_filter_handle to be used by
1132 * main requests and
1133 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
1134 * to be run by subrequest
1137 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
1138 * filter to ensure that the compressed content is stored.
1139 * Incrementing filter type by 1 ensures his happens.
1140 * TODO: Revise this logic. In order for the crccache to work properly,
1141 * the plain text content must be cached and not the deflated content
1142 * Even more so, when receiving compressed content from the upstream
1143 * server, the cache_save_filter handler should uncompress it before
1144 * storing in the cache (but provide the compressed data to the client)
1146 cache_save_filter_handle =
1147 ap_register_output_filter("CACHE_SAVE",
1148 cache_save_filter,
1149 NULL,
1150 AP_FTYPE_CONTENT_SET+1);
1152 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
1153 * handle subrequsts. Decrementing filter type by 1 ensures this
1154 * happens.
1156 cache_save_subreq_filter_handle =
1157 ap_register_output_filter("CACHE_SAVE_SUBREQ",
1158 cache_save_filter,
1159 NULL,
1160 AP_FTYPE_CONTENT_SET-1);
1162 * CRCCACHE_DECODE must go into the filter chain after a possible DEFLATE
1163 * filter to ensure that already compressed cache objects do not
1164 * get compressed again. Incrementing filter type by 1 ensures
1165 * his happens.
1167 crccache_decode_filter_handle = ap_register_output_filter(
1168 "CRCCACHE_DECODE", crccache_decode_filter, NULL,
1169 AP_FTYPE_CONTENT_SET + 1);
1174 module AP_MODULE_DECLARE_DATA crccache_client_module = {
1175 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
1176 NULL , /* merge per-directory config structures */
1177 crccache_client_create_config, /* create per-server config structure */
1178 NULL , /* merge per-server config structures */
1179 crccache_client_cmds, /* command apr_table_t */
1180 crccache_client_register_hook /* register hooks */