update hash header and add vary header
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blob524a4f3cd57e6260ff8049d6ee2c3732db323510
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
29 #include <stdbool.h>
30 #include "apr_file_io.h"
31 #include "apr_strings.h"
32 #include <apr_base64.h>
33 #include "mod_cache.h"
34 #include "mod_disk_cache.h"
35 #include "ap_provider.h"
36 #include "util_filter.h"
37 #include "util_script.h"
38 #include "util_charset.h"
40 #include "crccache.h"
41 #include "ap_wrapper.h"
42 #include "mod_crccache_server.h"
44 #include <crcsync/crcsync.h>
45 #include "zlib.h"
47 module AP_MODULE_DECLARE_DATA crccache_server_module;
49 // Possible states for the output compression
50 typedef enum {
51 COMPRESSION_BUFFER_EMPTY,
52 COMPRESSION_FIRST_DATA_RECEIVED,
53 COMPRESSION_FIRST_BLOCK_WRITTEN,
54 COMPRESSION_ENDED
55 } compression_state_t;
57 //#define MIN(X,Y) (X<Y?X:Y)
59 static void *create_config(apr_pool_t *p, server_rec *s) {
60 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
61 conf->disk_cache_conf = apr_pcalloc(p, sizeof(disk_cache_conf));
63 /* XXX: Set default values */
64 conf->enabled = 0;
65 conf->disk_cache_conf->dirlevels = DEFAULT_DIRLEVELS;
66 conf->disk_cache_conf->dirlength = DEFAULT_DIRLENGTH;
67 conf->disk_cache_conf->maxfs = DEFAULT_MAX_FILE_SIZE;
68 conf->disk_cache_conf->minfs = DEFAULT_MIN_FILE_SIZE;
70 conf->disk_cache_conf->cache_root = NULL;
71 conf->disk_cache_conf->cache_root_len = 0;
73 return conf;
76 typedef struct crccache_ctx_t {
77 unsigned char *buffer;
78 size_t buffer_digest_getpos;
79 size_t buffer_read_getpos;
80 size_t buffer_putpos;
81 size_t buffer_size;
82 long crc_read_block_result;
83 size_t crc_read_block_ndigested;
84 apr_bucket_brigade *bb;
85 size_t block_size;
86 size_t tail_block_size;
87 uint64_t hashes[FULL_BLOCK_COUNT+1];
88 struct crc_context *crcctx;
89 size_t orig_length;
90 size_t tx_length;
91 size_t tx_uncompressed_length;
92 compression_state_t compression_state;
93 z_stream *compression_stream;
94 int debug_skip_writing; // ____
95 } crccache_ctx;
99 * mod_disk_cache configuration directives handlers.
101 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
102 const char *arg) {
103 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
104 &crccache_server_module);
105 conf->disk_cache_conf->cache_root = arg;
106 conf->disk_cache_conf->cache_root_len = strlen(arg);
107 /* TODO: canonicalize cache_root and strip off any trailing slashes */
109 return NULL;
113 * Only enable CRCCache Server when requested through the config file
114 * so that the user can switch CRCCache server on in a specific virtual server
116 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
118 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
119 &crccache_server_module);
120 conf->enabled = flag;
121 return NULL;
126 * Consider eliminating the next two directives in favor of
127 * Ian's prime number hash...
128 * key = hash_fn( r->uri)
129 * filename = "/key % prime1 /key %prime2/key %prime3"
131 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
132 const char *arg) {
133 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
134 &crccache_server_module);
135 int val = atoi(arg);
136 if (val < 1)
137 return "CacheDirLevelsServer value must be an integer greater than 0";
138 if (val * conf->disk_cache_conf->dirlength > CACHEFILE_LEN)
139 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
140 conf->disk_cache_conf->dirlevels = val;
141 return NULL;
143 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
144 const char *arg) {
145 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
146 &crccache_server_module);
147 int val = atoi(arg);
148 if (val < 1)
149 return "CacheDirLengthServer value must be an integer greater than 0";
150 if (val * conf->disk_cache_conf->dirlevels > CACHEFILE_LEN)
151 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
153 conf->disk_cache_conf->dirlength = val;
154 return NULL;
157 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
158 const char *arg) {
159 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
160 &crccache_server_module);
162 if (apr_strtoff(&conf->disk_cache_conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->minfs
163 < 0) {
164 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
166 return NULL;
169 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
170 const char *arg) {
171 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
172 &crccache_server_module);
173 if (apr_strtoff(&conf->disk_cache_conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->maxfs
174 < 0) {
175 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
177 return NULL;
180 static const command_rec disk_cache_cmds[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root, NULL, RSRC_CONF,
181 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels, NULL, RSRC_CONF,
182 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength, NULL, RSRC_CONF,
183 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs, NULL, RSRC_CONF,
184 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs, NULL, RSRC_CONF,
185 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF,
186 "Enable the CRCCache server in this virtual server"),{ NULL } };
188 static ap_filter_rec_t *crccache_out_filter_handle;
190 int decode_if_block_header(const char * header, int * version, size_t * file_size, char ** hashes)
192 *version = 1;
193 *file_size = 0;
194 *hashes = NULL; // this will be allocated below, make sure we free it
195 int start = 0;
196 int ii;
197 for (ii = 0; ii < strlen(header);++ii)
199 if (header[ii] == ',' || ii == strlen(header)-1)
201 sscanf(&header[start]," v=%d",version);
202 sscanf(&header[start]," h=%as",hashes);
203 sscanf(&header[start]," fs=%zu",file_size);
204 start = ii + 1;
208 if (*hashes == NULL)
210 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no hashes reported in header");
211 return -1;
213 if (*version != 1)
215 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE Unsupported header version, %d",*version);
216 free(*hashes);
217 *hashes = NULL;
218 return -1;
220 if (*file_size == 0)
222 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no file size reported in header");
223 free(*hashes);
224 *hashes = NULL;
225 return -1;
227 return 0;
230 static int crccache_server_header_parser_handler(request_rec *r) {
231 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
232 &crccache_server_module);
233 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE handler");
234 if (conf->enabled)
236 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Checking for headers");
237 const char * header;
238 header = apr_table_get(r->headers_in, BLOCK_HEADER);
239 if (header)
241 int version;
242 size_t file_size;
243 char * hashes;
244 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
246 // failed to decode if block header so just process request normally
247 return OK;
249 free (hashes);
250 hashes = NULL;
251 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes);
252 // Insert mod_deflate's INFLATE filter in the chain to unzip content
253 // so that there is clear text available for the delta algorithm
254 ap_filter_t *inflate_filter = ap_add_output_filter("INFLATE", NULL, r, r->connection);
255 if (inflate_filter == NULL)
257 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
259 else
261 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
263 // And the crccache filter itself ofcourse
264 ap_add_output_filter_handle(crccache_out_filter_handle,
265 NULL, r, r->connection);
268 /* // All is okay, so set response header to IM Used
269 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
270 r->status=226;
271 r->status_line="226 IM Used";
272 return 226;*/
274 return OK;
277 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
278 //request_rec *r)
279 request_rec *r = f->r;
281 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
283 // All is okay, so set response header to IM Used
284 r->status=226;
285 r->status_line="HTTP/1.1 226 IM Used";
286 return 226;
289 /* PR 39727: we're screwing up our clients if we leave a strong ETag
290 * header while transforming content. Henrik Nordstrom suggests
291 * appending ";gzip".
293 * Pending a more thorough review of our Etag handling, let's just
294 * implement his suggestion. It fixes the bug, or at least turns it
295 * from a showstopper to an inefficiency. And it breaks nothing that
296 * wasn't already broken.
297 * TODO: the crccache_client should undo this once the reconstructed page has been saved in the cache
299 static void crccache_check_etag(request_rec *r, const char *transform) {
300 const char *etag = apr_table_get(r->headers_out, "ETag");
301 if (etag && (((etag[0] != 'W') && (etag[0] != 'w')) || (etag[1] != '/'))) {
302 apr_table_set(r->headers_out, "ETag", apr_pstrcat(r->pool, etag, "-",
303 transform, NULL));
307 static apr_status_t write_compress_buffer(ap_filter_t *f, int flush)
309 unsigned char compress_buf[30000];
310 request_rec *r = f->r;
311 crccache_ctx *ctx = f->ctx;
312 z_stream *strm = ctx->compression_stream;
314 if (ctx->debug_skip_writing)
315 return APR_SUCCESS;
319 strm->avail_out = sizeof(compress_buf);
320 strm->next_out = compress_buf;
321 uInt avail_in_pre_deflate = strm->avail_in;
322 int zRC = deflate(strm, flush);
323 if (zRC == Z_STREAM_ERROR)
325 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate error: %d", zRC);
326 return APR_EGENERAL;
328 int have = sizeof(compress_buf) - strm->avail_out;
329 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
330 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
331 zRC, flush, avail_in_pre_deflate - strm->avail_in, have);
332 if (have != 0)
334 // output buffer contains some data to be written
335 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
336 unsigned bucket_size = have;
337 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
339 bucket_size += ENCODING_COMPRESSED_HEADER_SIZE;
341 ctx->tx_length += bucket_size;
342 char * buf = apr_palloc(r->pool, bucket_size);
344 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
346 buf[0] = ENCODING_COMPRESSED;
347 memcpy(buf + ENCODING_COMPRESSED_HEADER_SIZE, compress_buf, have);
348 ctx->compression_state = COMPRESSION_FIRST_BLOCK_WRITTEN;
350 else
352 memcpy(buf, compress_buf, have);
354 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
355 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
358 while (strm->avail_out == 0);
359 if (strm->avail_in != 0)
361 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm->avail_in);
362 return APR_EGENERAL;
365 return APR_SUCCESS;
369 static apr_status_t flush_compress_buffer(ap_filter_t *f)
371 crccache_ctx *ctx = f->ctx;
372 apr_status_t rslt = APR_SUCCESS; // assume all will be fine
374 if (ctx->debug_skip_writing)
375 return APR_SUCCESS;
377 if (ctx->compression_state != COMPRESSION_BUFFER_EMPTY)
379 rslt = write_compress_buffer(f, Z_FINISH); // take the real status
380 deflateReset(ctx->compression_stream);
381 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
382 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
384 return rslt;
388 * Write literal data
390 static apr_status_t write_literal(ap_filter_t *f, unsigned char *buffer, long count)
392 crccache_ctx *ctx = f->ctx;
394 if (ctx->debug_skip_writing)
395 return APR_SUCCESS;
397 apr_status_t rslt;
398 if (ctx->compression_state == COMPRESSION_BUFFER_EMPTY)
400 ctx->compression_state = COMPRESSION_FIRST_DATA_RECEIVED;
402 ctx->compression_stream->avail_in = count;
403 ctx->compression_stream->next_in = buffer;
404 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
405 rslt = write_compress_buffer(f, Z_NO_FLUSH);
406 ctx->tx_uncompressed_length += count;
407 return rslt;
411 * Write a block reference
413 static apr_status_t write_block_reference(ap_filter_t *f, long result)
415 request_rec *r = f->r;
416 crccache_ctx *ctx = f->ctx;
417 apr_status_t rslt;
419 rslt = flush_compress_buffer(f);
420 if (rslt != APR_SUCCESS)
422 return rslt;
425 if (ctx->debug_skip_writing)
426 return APR_SUCCESS;
428 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
429 ctx->tx_length += bucket_size;
430 ctx->tx_uncompressed_length += bucket_size;
431 char * buf = apr_palloc(r->pool, bucket_size);
433 buf[0] = ENCODING_BLOCK;
434 buf[1] = (unsigned char) ((-result)-1); // invert and get back to zero based
435 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE block %d",buf[1]);
436 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
437 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
438 return APR_SUCCESS;
442 * Process one block of data: try to match it against the CRC, append
443 * the result to the ouput ring and remember the result (e.g. was
444 * it a block-match or was a literal processed)
446 static apr_status_t process_block(ap_filter_t *f)
448 request_rec *r = f->r;
449 crccache_ctx *ctx = f->ctx;
450 apr_status_t rslt = APR_SUCCESS;
452 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
453 if (ctx->crcctx == NULL)
455 // This should never happen
456 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
457 return APR_EGENERAL;
460 long rd_block_rslt;
461 size_t ndigested = crc_read_block(
462 ctx->crcctx,
463 &rd_block_rslt,
464 ctx->buffer+ctx->buffer_digest_getpos,
465 ctx->buffer_putpos-ctx->buffer_digest_getpos
467 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
468 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested, rd_block_rslt);
471 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
472 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
473 // rd_block_rslt > 0: send literal
474 // rd_block_rslt < 0: send block
475 if (rd_block_rslt > 0)
477 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_block_rslt);
478 ctx->buffer_read_getpos += rd_block_rslt;
480 else if (rd_block_rslt < 0)
482 rslt = write_block_reference(f, rd_block_rslt);
483 unsigned char blocknum = (unsigned char) ((-rd_block_rslt)-1);
484 ctx->buffer_read_getpos += (blocknum == FULL_BLOCK_COUNT) ? ctx->tail_block_size : ctx->block_size;
487 // Update the context with the results
488 ctx->crc_read_block_result = rd_block_rslt;
489 ctx->crc_read_block_ndigested = ndigested;
490 ctx->buffer_digest_getpos += ndigested;
491 return rslt;
495 * Flush one block of data: get it from the crccontext, append
496 * the result to the ouput ring and remember the result (e.g. was
497 * it a block-match or was a literal processed)
499 static apr_status_t flush_block(ap_filter_t *f)
501 request_rec *r = f->r;
502 crccache_ctx *ctx = f->ctx;
503 apr_status_t rslt = APR_SUCCESS;
505 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
506 if (ctx->crcctx == NULL)
508 // This should never happen
509 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
510 return APR_EGENERAL;
512 long rd_flush_rslt = crc_read_flush(ctx->crcctx);
513 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt);
515 // rd_flush_rslt = 0: do nothing
516 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
517 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
518 if (rd_flush_rslt > 0)
520 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_flush_rslt);
521 ctx->buffer_read_getpos += rd_flush_rslt;
523 else if (rd_flush_rslt < 0)
525 rslt = write_block_reference(f, rd_flush_rslt);
526 unsigned char blocknum = (unsigned char) ((-rd_flush_rslt)-1);
527 ctx->buffer_read_getpos += (blocknum == FULL_BLOCK_COUNT) ? ctx->tail_block_size : ctx->block_size;
530 // Update the context with the results
531 ctx->crc_read_block_result = rd_flush_rslt;
532 ctx->crc_read_block_ndigested = 0;
533 return rslt;
537 * Clean-up memory used by helper libraries, that don't know about apr_palloc
538 * and that (probably) use classical malloc/free
540 static apr_status_t deflate_ctx_cleanup(void *data)
542 crccache_ctx *ctx = (crccache_ctx *)data;
544 if (ctx != NULL)
546 if (ctx->compression_state != COMPRESSION_ENDED)
548 deflateEnd(ctx->compression_stream);
549 ctx->compression_state = COMPRESSION_ENDED;
551 if (ctx->crcctx != NULL)
553 crc_context_free(ctx->crcctx);
554 ctx->crcctx = NULL;
557 return APR_SUCCESS;
560 * End of stream has been reached:
561 * Process any data still in the buffer and flush all internal
562 * structures of crcsync and of zlib
563 * Furthermore, add a strong hash
565 static apr_status_t process_eos(ap_filter_t *f)
567 crccache_ctx *ctx = f->ctx;
568 apr_status_t rslt;
570 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,"CRCCACHE-ENCODE EOS reached for APR bucket");
573 while (ctx->buffer_digest_getpos < ctx->buffer_putpos)
575 // There is still data in the buffer. Process it.
576 rslt = process_block(f);
577 if (rslt != APR_SUCCESS)
579 return rslt;
585 // Flush remaining block in the crcctx
586 rslt = flush_block(f);
587 if (rslt != APR_SUCCESS)
589 return rslt;
592 while (ctx->crc_read_block_result != 0);
594 // Flush anything that is remaining in the compress buffer
595 rslt = flush_compress_buffer(f);
596 if (rslt != APR_SUCCESS)
598 return rslt;
601 // TODO: add strong hash here
603 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,
604 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length);
606 return APR_SUCCESS;
610 * Process a data bucket; append data into a moving window buffer
611 * and encode it with crcsync algorithm when window contains enough
612 * data for crcsync to find potential matches
614 static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e)
616 request_rec *r = f->r;
617 crccache_ctx *ctx = f->ctx;
619 const char *data;
620 apr_size_t len;
621 apr_status_t rslt;
623 /* read */
624 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
625 ctx->orig_length += len;
626 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
628 // append data to the buffer and encode buffer content using the crc_read_block magic
629 size_t bucket_used_count = 0;
630 size_t bucket_data_left;
631 while(bucket_used_count < len)
633 /* Append as much data as possible into the buffer */
634 bucket_data_left = len - bucket_used_count;
635 size_t copy_size = MIN(ctx->buffer_size-ctx->buffer_putpos, bucket_data_left);
636 memcpy(ctx->buffer+ctx->buffer_putpos, data+bucket_used_count, copy_size);
637 bucket_used_count += copy_size;
638 bucket_data_left -= copy_size;
639 ctx->buffer_putpos += copy_size;
640 /* flush the buffer if it is appropriate */
641 if (ctx->buffer_putpos == ctx->buffer_size)
643 // Buffer is filled to the end. Flush as much as possible
644 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
645 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
646 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
647 while (ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->block_size)
649 // We can still scan at least 1 block + 1 byte forward: try to flush next part
650 rslt = process_block(f);
651 if (rslt != APR_SUCCESS)
653 return rslt;
655 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
656 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
657 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
660 if (ctx->buffer_putpos != ctx->buffer_read_getpos)
662 // Copy the remaining part of the buffer to the start of the buffer,
663 // so that it can be filled again as new data arrive
664 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
665 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
666 ctx->buffer_putpos - ctx->buffer_read_getpos);
667 memcpy(ctx->buffer, ctx->buffer + ctx->buffer_read_getpos, ctx->buffer_putpos - ctx->buffer_read_getpos);
669 // Reset getpos to the beginning of the buffer and putpos accordingly
670 ctx->buffer_putpos -= ctx->buffer_read_getpos;
671 ctx->buffer_digest_getpos -= ctx->buffer_read_getpos;
672 ctx->buffer_read_getpos = 0;
674 while (ctx->crc_read_block_result < 0 && ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->block_size)
676 // Previous block matched exactly. Let's hope the next block as well
677 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
678 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
679 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
680 rslt = process_block(f);
681 if (rslt != APR_SUCCESS)
683 return rslt;
687 return APR_SUCCESS; // Yahoo, all went well
691 * CACHE_OUT filter
692 * ----------------
694 * Deliver cached content (headers and body) up the stack.
696 static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
697 apr_bucket *e;
698 request_rec *r = f->r;
699 crccache_ctx *ctx = f->ctx;
700 int zRC;
701 int return_code = APR_SUCCESS;
703 /* Do nothing if asked to filter nothing. */
704 if (APR_BRIGADE_EMPTY(bb)) {
705 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
706 return ap_pass_brigade(f->next, bb);
709 /* If we don't have a context, we need to ensure that it is okay to send
710 * the deflated content. If we have a context, that means we've done
711 * this before and we liked it.
712 * This could be not so nice if we always fail. But, if we succeed,
713 * we're in better shape.
715 if (ctx == NULL)
717 const char *encoding;
719 /* only work on main request/no subrequests */
720 if (r->main != NULL) {
721 ap_remove_output_filter(f);
722 return ap_pass_brigade(f->next, bb);
725 /* We can't operate on Content-Ranges */
726 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
727 ap_remove_output_filter(f);
728 return ap_pass_brigade(f->next, bb);
731 /* Let's see what our current Content-Encoding is.
732 * If it's already encoded by crccache: don't compress again.
733 * (We could, but let's not.)
735 encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
736 if (encoding && strcasecmp(CRCCACHE_ENCODING,encoding) == 0)
738 /* Even if we don't accept this request based on it not having
739 * the Accept-Encoding, we need to note that we were looking
740 * for this header and downstream proxies should be aware of that.
742 apr_table_mergen(r->headers_out, "Vary", "A-IM");
743 ap_remove_output_filter(f);
744 return ap_pass_brigade(f->next, bb);
747 /* For a 304 or 204 response there is no entity included in
748 * the response and hence nothing to deflate. */
749 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
751 ap_remove_output_filter(f);
752 return ap_pass_brigade(f->next, bb);
755 /* All Ok. We're cool with filtering this. */
756 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
757 ctx->debug_skip_writing = 0;
758 ctx->orig_length = 0;
759 ctx->tx_length = 0;
760 ctx->tx_uncompressed_length = 0;
761 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
763 /* If Content-Encoding present and differs from "identity", we can't handle it */
764 if (encoding && strcasecmp(encoding, "identity")) {
765 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
766 "Not encoding with crccache. It is already encoded with: %s", encoding);
767 ap_remove_output_filter(f);
768 return ap_pass_brigade(f->next, bb);
771 /* Parse the input headers */
772 const char * header;
773 header = apr_table_get(r->headers_in, BLOCK_HEADER);
774 int version;
775 size_t file_size;
776 char * hashes;
777 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
779 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to decode if-block header");
780 ap_remove_output_filter(f);
781 return ap_pass_brigade(f->next, bb);
784 ctx->block_size = file_size/FULL_BLOCK_COUNT;
785 ctx->tail_block_size = file_size % FULL_BLOCK_COUNT;
786 size_t block_count_including_final_block = FULL_BLOCK_COUNT + (ctx->tail_block_size != 0);
788 // Data come in at chunks that are potentially smaller then block_size
789 // Accumulate those chunks into a buffer.
790 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
791 // of the data alignment compared to the original page.
792 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
793 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
794 // Though, the larger the buffer, the bigger the memory demand.
795 // A size of 4*block_size (20% of original file size) seems to be a good balance
797 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
798 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
799 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
800 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
801 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
802 // data types.
803 ctx->buffer_size = ctx->block_size*4 + 1;
804 ctx->buffer_digest_getpos = 0;
805 ctx->buffer_read_getpos = 0;
806 ctx->buffer_putpos = 0;
807 ctx->crc_read_block_result = 0;
808 ctx->buffer = apr_palloc(r->pool, ctx->buffer_size);
810 // Decode the hashes
811 apr_base64_decode((char *)ctx->hashes, hashes);
812 free(hashes);
813 hashes = NULL;
814 // swap to network byte order
815 int i;
816 for (i = 0; i < block_count_including_final_block;++i)
818 htobe64(ctx->hashes[i]);
821 /* Setup deflate for compressing non-matched literal data */
822 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
823 // TODO: should I pass some apr_palloc based function to prevent memory leaks
824 //in case of unexpected errors?
826 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx->compression_stream)));
827 ctx->compression_stream = apr_palloc(r->pool, sizeof(*(ctx->compression_stream)));
828 ctx->compression_stream->zalloc = Z_NULL;
829 ctx->compression_stream->zfree = Z_NULL;
830 ctx->compression_stream->opaque = Z_NULL;
831 zRC = deflateInit(ctx->compression_stream, Z_DEFAULT_COMPRESSION); // TODO: make compression level configurable
832 if (zRC != Z_OK)
834 // Can't initialize the compression engine for compressing literal data
835 deflateEnd(ctx->compression_stream); // free memory used by deflate
836 free(ctx->compression_stream);
837 ctx->compression_stream = NULL;
838 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
839 "unable to init Zlib: "
840 "deflateInit returned %d: URL %s",
841 zRC, r->uri);
842 ap_remove_output_filter(f);
843 return ap_pass_brigade(f->next, bb);
846 // now initialise the crcsync context that will do the real work
847 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, block_count_including_final_block, ctx->tail_block_size);
849 // Register a cleanup function to cleanup internal libz and crcsync resources
850 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
851 apr_pool_cleanup_null);
853 // All checks and initializations are OK
854 // Modify headers that are impacted by this transformation
855 // TODO: the crccache-client could recalculate these headers once it has
856 // reconstructed the page, before handling the reconstructed page
857 // back to the client
858 apr_table_setn(r->headers_out, ENCODING_HEADER, CRCCACHE_ENCODING);
859 apr_table_setn(r->headers_out, VARY_HEADER, VARY_VALUE);
860 apr_table_unset(r->headers_out, "Content-Length");
861 apr_table_unset(r->headers_out, "Content-MD5");
862 crccache_check_etag(r, CRCCACHE_ENCODING);
864 // All is okay, so set response header to IM Used
865 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
866 r->status=226;
867 r->status_line="226 IM Used";
868 //return_code = 226;
872 while (!APR_BRIGADE_EMPTY(bb))
874 const char *data;
875 apr_size_t len;
876 apr_status_t rslt;
878 e = APR_BRIGADE_FIRST(bb);
880 if (APR_BUCKET_IS_EOS(e))
882 // Process end of stream: flush data buffers, compression buffers, etc.
883 // and calculate a strong hash.
884 rslt = process_eos(f);
886 /* Remove EOS from the old list, and insert into the new. */
887 APR_BUCKET_REMOVE(e);
888 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
890 /* This filter is done once it has served up its content */
891 ap_remove_output_filter(f);
893 if (rslt != APR_SUCCESS)
895 return rslt; // A problem occurred. Abort the processing
898 /* Okay, we've seen the EOS.
899 * Time to pass it along down the chain.
901 return ap_pass_brigade(f->next, ctx->bb);
904 if (APR_BUCKET_IS_FLUSH(e))
906 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
907 apr_status_t rv;
909 /* Remove flush bucket from old brigade and insert into the new. */
910 APR_BUCKET_REMOVE(e);
911 // TODO: optimize; do not insert two consecutive flushes when no intermediate
912 // output block was written
913 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
914 rv = ap_pass_brigade(f->next, ctx->bb);
915 if (rv != APR_SUCCESS) {
916 return rv;
918 continue;
921 if (APR_BUCKET_IS_METADATA(e)) {
922 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
924 * Remove meta data bucket from old brigade and insert into the
925 * new.
927 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
928 if (len > 2)
929 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
930 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len,data[0],data[1],data[2]);
931 else
932 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
933 "CRCCACHE-ENCODE Metadata, read %zu",len);
934 APR_BUCKET_REMOVE(e);
935 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
936 continue;
939 // Bucket is non of the above types. Assume it is a data bucket
940 // which means it can be encoded with the crcsync algorithm
941 rslt = process_data_bucket(f, e);
943 APR_BUCKET_REMOVE(e);
944 if (rslt != APR_SUCCESS)
946 break; // A problem occurred. Abort the processing
950 apr_brigade_cleanup(bb);
951 return return_code;
954 static void disk_cache_register_hook(apr_pool_t *p) {
955 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
956 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
958 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
959 APR_HOOK_MIDDLE);
961 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
962 NULL, AP_FTYPE_PROTOCOL);
964 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
965 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET);
968 module AP_MODULE_DECLARE_DATA crccache_server_module = {
969 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
970 NULL , /* merge per-directory config structures */
971 create_config, /* create per-server config structure */
972 NULL , /* merge per-server config structures */
973 disk_cache_cmds, /* command apr_table_t */
974 disk_cache_register_hook /* register hooks */