Add Capability header to server. Use ; as field-separater in headers
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blobb5865d3f2a26eee6d79f506e7f6323608928d8ab
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
29 #include <stdbool.h>
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
41 #include <http_log.h>
42 #include "ap_wrapper.h"
44 #include "crccache.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
48 #include <zlib.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module;
53 // Possible states for the output compression
54 typedef enum {
55 COMPRESSION_BUFFER_EMPTY,
56 COMPRESSION_FIRST_DATA_RECEIVED,
57 COMPRESSION_FIRST_BLOCK_WRITTEN,
58 COMPRESSION_ENDED
59 } compression_state_t;
61 static void *crccache_server_create_config(apr_pool_t *p, server_rec *s) {
62 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
63 conf->enabled = 0;
64 conf->decoder_modules = NULL;
65 conf->decoder_modules_cnt = 0;
66 return conf;
69 typedef enum { GS_INIT, GS_HEADERS_SAVED, GS_ENCODING } global_state_t;
71 typedef struct crccache_ctx_t {
72 global_state_t global_state;
73 char *old_content_encoding;
74 char *old_etag;
75 unsigned char *buffer;
76 size_t buffer_digest_getpos;
77 size_t buffer_read_getpos;
78 size_t buffer_putpos;
79 size_t buffer_size;
80 long crc_read_block_result;
81 size_t crc_read_block_ndigested;
82 apr_bucket_brigade *bb;
83 unsigned block_count;
84 size_t block_size;
85 size_t tail_block_size;
86 uint64_t *hashes;
87 struct crc_context *crcctx;
88 size_t orig_length;
89 size_t tx_length;
90 size_t tx_uncompressed_length;
91 compression_state_t compression_state;
92 z_stream *compression_stream;
93 EVP_MD_CTX mdctx;
94 int debug_skip_writing; // ____
95 } crccache_ctx;
99 * Only enable CRCCache Server when requested through the config file
100 * so that the user can switch CRCCache server on in a specific virtual server
102 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
104 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
105 &crccache_server_module);
106 conf->enabled = flag;
107 return NULL;
110 static const char *set_crccache_decoder_module(cmd_parms *parms, void *in_struct_ptr, const char *arg)
112 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
113 &crccache_server_module);
114 struct decoder_modules_t *decoder_module = malloc(sizeof(*decoder_module));
115 if (decoder_module == NULL)
117 return "Out of memory exception while allocating decoder_module structure";
119 char *tok;
120 char *last = NULL;
122 char *data = strdup(arg);
123 if (data == NULL)
125 return "Out of memory exception while parsing DecoderModule parameter";
128 tok = apr_strtok(data, ": ", &last);
129 if (tok == NULL)
131 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
134 decoder_module->name = strdup(tok);
135 if (decoder_module->name == NULL)
137 return "Out of memory exception while storing name in decoder_module structure";
140 tok = apr_strtok(NULL, ": ", &last);
141 if (tok == NULL)
143 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
146 for (tok = apr_strtok(tok, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last))
148 struct encodings_t *encoding = malloc(sizeof(*encoding));
149 if (encoding == NULL)
151 return "Out of memory exception while allocating encoding structure";
154 encoding->encoding = strdup(tok);
155 if (encoding->encoding == NULL)
157 return "Out of memory exception while storing encoding value in encoding structure";
160 // Insert new encoding to the head of the encodings list
161 encoding->next = decoder_module->encodings;
162 decoder_module->encodings = encoding;
165 // Insert (new) decoder module to the head of the decoder_modules list
166 decoder_module->next = conf->decoder_modules;
167 conf->decoder_modules = decoder_module;
168 conf->decoder_modules_cnt++;
170 return NULL;
173 static const command_rec crccache_server_cmds[] =
175 AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF, "Enable the CRCCache server in this virtual server"),
176 AP_INIT_TAKE1("DecoderModule", set_crccache_decoder_module, NULL, RSRC_CONF, "DecoderModules to decode content-types (e.g. INFLATE:gzip,x-gzip)"),
177 { NULL }
180 static ap_filter_rec_t *crccache_out_filter_handle;
181 static ap_filter_rec_t *crccache_out_save_headers_filter_handle;
184 int decode_if_block_header(const char * header, int * version, size_t * file_size, char ** hashes)
186 *version = 1;
187 *file_size = 0;
188 *hashes = NULL; // this will be allocated below, make sure we free it
189 int start = 0;
190 int ii;
191 size_t headerlen = strlen(header);
192 for (ii = 0; ii < headerlen;++ii)
194 if (header[ii] == ';' || ii == headerlen-1)
196 sscanf(&header[start]," v=%d",version);
197 sscanf(&header[start]," h=%as",hashes);
198 sscanf(&header[start]," fs=%zu",file_size);
199 start = ii + 1;
203 if (*hashes == NULL)
205 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no hashes reported in header");
206 return -1;
208 if (*version != 1)
210 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE Unsupported header version, %d",*version);
211 free(*hashes);
212 *hashes = NULL;
213 return -1;
215 if (*file_size == 0)
217 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no file size reported in header");
218 free(*hashes);
219 *hashes = NULL;
220 return -1;
222 return 0;
225 static int crccache_server_header_parser_handler(request_rec *r) {
226 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
227 &crccache_server_module);
228 if (conf->enabled)
230 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Checking for headers");
231 const char * header;
232 header = apr_table_get(r->headers_in, BLOCK_HEADER);
233 if (header)
235 crccache_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx));
236 ctx->global_state = GS_INIT;
237 ctx->old_content_encoding = NULL;
238 ctx->old_etag = NULL;
240 int version;
241 size_t file_size;
242 char * hashes;
243 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
245 // failed to decode if block header so only put the Capability header in the response
246 ap_add_output_filter_handle(crccache_out_filter_handle,
247 ctx, r, r->connection);
248 return OK;
250 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Block Hashes header found (hashes: %s)",hashes);
251 free (hashes);
252 hashes = NULL;
254 // Add the filter to save the headers, so that they can be restored after an optional INFLATE or other decoder module
255 ap_add_output_filter_handle(crccache_out_save_headers_filter_handle,
256 ctx, r, r->connection);
258 char *accept_encoding = apr_pstrdup(r->pool, apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER));
259 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Incoming Accept-Encoding header: %s", accept_encoding == NULL ? "NULL" : accept_encoding);
260 if (accept_encoding != NULL)
262 struct decoder_modules_t *required_dms[conf->decoder_modules_cnt];
263 unsigned required_dms_size = 0;
264 char *tok;
265 char *last = NULL;
266 struct decoder_modules_t *dm;
267 struct encodings_t *enc;
268 unsigned cnt;
269 // Build the list of filter modules to handle the requested encodings and
270 // remove all non-supported encodings from the header
271 apr_table_unset(r->headers_in, ACCEPT_ENCODING_HEADER);
272 for (tok = apr_strtok(accept_encoding, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last)) {
273 for (dm = conf->decoder_modules; dm != NULL; dm = dm->next) {
274 for (enc = dm->encodings; enc != NULL; enc = enc->next) {
275 if (strcmp(tok, enc->encoding) == 0)
277 // This module supports the requested encoding
278 // Add it to the list if it is not already present
279 for (cnt = 0; cnt != required_dms_size; cnt++)
281 if (required_dms[cnt] == dm)
282 break; // module is already inserted in list
284 if (cnt == required_dms_size)
286 required_dms[required_dms_size++] = dm;
288 apr_table_mergen(r->headers_in, ACCEPT_ENCODING_HEADER, tok);
293 // Enable the requested filter modules
294 for (cnt = 0; cnt != required_dms_size; cnt++) {
295 dm = required_dms[cnt];
296 ap_filter_t *filter = ap_add_output_filter(dm->name, NULL, r, r->connection);
297 if (filter == NULL) {
298 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Could not enable %s filter", dm->name);
299 // Remove the encodings handled by this filter from the list of accepted encodings
300 accept_encoding = apr_pstrdup(r->pool, apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER));
301 apr_table_unset(r->headers_in, ACCEPT_ENCODING_HEADER);
302 for (tok = apr_strtok(accept_encoding, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last)) {
303 for (enc = dm->encodings; enc != NULL; enc = enc->next) {
304 if (strcmp(tok, enc->encoding)==0) {
305 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Removing encoding %s", tok);
306 break;
309 if (enc == NULL) {
310 // Did not find the tok encoding in the list. It can be merged back into the header
311 apr_table_mergen(r->headers_in, ACCEPT_ENCODING_HEADER, tok);
315 else
317 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Successfully enabled %s filter", dm->name);
320 const char *updated_accept_encoding = apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER);
321 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Modified Accept-Encoding header: %s", updated_accept_encoding == NULL ? "NULL" : updated_accept_encoding);
323 // Add the crccache filter itself, after the decoder modules
324 ap_add_output_filter_handle(crccache_out_filter_handle,
325 ctx, r, r->connection);
327 else
329 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER);
332 /* // All is okay, so set response header to IM Used
333 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
334 r->status=226;
335 r->status_line="226 IM Used";
336 return 226;*/
338 return OK;
341 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
342 //request_rec *r)
343 request_rec *r = f->r;
345 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
347 // All is okay, so set response header to IM Used
348 r->status=226;
349 r->status_line="HTTP/1.1 226 IM Used";
350 return 226;
353 static void crccache_check_etag(request_rec *r, crccache_ctx *ctx, const char *transform) {
354 const char *etag = ctx->old_etag;
355 if (etag) {
356 apr_table_set(r->headers_out, ETAG_HEADER,
357 apr_pstrcat(
358 r->pool,
359 etag, "-",
360 transform, "-",
361 ctx->old_content_encoding == NULL ? "identity" : ctx->old_content_encoding,
362 NULL
365 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Changed ETag header to %s", apr_table_get(r->headers_out, ETAG_HEADER));
369 static apr_status_t write_compress_buffer(ap_filter_t *f, int flush)
371 unsigned char compress_buf[30000];
372 request_rec *r = f->r;
373 crccache_ctx *ctx = f->ctx;
374 z_stream *strm = ctx->compression_stream;
376 if (ctx->debug_skip_writing)
377 return APR_SUCCESS;
381 strm->avail_out = sizeof(compress_buf);
382 strm->next_out = compress_buf;
383 uInt avail_in_pre_deflate = strm->avail_in;
384 int zRC = deflate(strm, flush);
385 if (zRC == Z_STREAM_ERROR)
387 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate error: %d", zRC);
388 return APR_EGENERAL;
390 int have = sizeof(compress_buf) - strm->avail_out;
391 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
392 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
393 zRC, flush, avail_in_pre_deflate - strm->avail_in, have);
394 if (have != 0)
396 // output buffer contains some data to be written
397 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
398 unsigned bucket_size = have;
399 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
401 bucket_size += ENCODING_COMPRESSED_HEADER_SIZE;
403 ctx->tx_length += bucket_size;
404 char * buf = apr_palloc(r->pool, bucket_size);
406 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
408 buf[0] = ENCODING_COMPRESSED;
409 memcpy(buf + ENCODING_COMPRESSED_HEADER_SIZE, compress_buf, have);
410 ctx->compression_state = COMPRESSION_FIRST_BLOCK_WRITTEN;
412 else
414 memcpy(buf, compress_buf, have);
416 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
417 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
420 while (strm->avail_out == 0);
421 if (strm->avail_in != 0)
423 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm->avail_in);
424 return APR_EGENERAL;
427 return APR_SUCCESS;
431 static apr_status_t flush_compress_buffer(ap_filter_t *f)
433 crccache_ctx *ctx = f->ctx;
434 apr_status_t rslt = APR_SUCCESS; // assume all will be fine
436 if (ctx->debug_skip_writing)
437 return APR_SUCCESS;
439 if (ctx->compression_state != COMPRESSION_BUFFER_EMPTY)
441 rslt = write_compress_buffer(f, Z_FINISH); // take the real status
442 deflateReset(ctx->compression_stream);
443 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
444 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
446 return rslt;
450 * Write literal data
452 static apr_status_t write_literal(ap_filter_t *f, unsigned char *buffer, long count)
454 crccache_ctx *ctx = f->ctx;
456 if (ctx->debug_skip_writing)
457 return APR_SUCCESS;
459 apr_status_t rslt;
460 if (ctx->compression_state == COMPRESSION_BUFFER_EMPTY)
462 ctx->compression_state = COMPRESSION_FIRST_DATA_RECEIVED;
464 ctx->compression_stream->avail_in = count;
465 ctx->compression_stream->next_in = buffer;
466 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
467 rslt = write_compress_buffer(f, Z_NO_FLUSH);
468 ctx->tx_uncompressed_length += count;
469 return rslt;
473 * Write hash
475 static apr_status_t write_hash(ap_filter_t *f, unsigned char *buffer, long count)
477 request_rec *r = f->r;
478 crccache_ctx *ctx = f->ctx;
479 apr_status_t rslt;
481 rslt = flush_compress_buffer(f);
482 if (rslt != APR_SUCCESS)
484 return rslt;
487 if (ctx->debug_skip_writing)
488 return APR_SUCCESS;
490 unsigned bucket_size = count + 1;
491 ctx->tx_length += bucket_size;
492 ctx->tx_uncompressed_length += bucket_size;
493 char * buf = apr_palloc(r->pool, bucket_size);
495 buf[0] = ENCODING_HASH;
496 memcpy(&buf[1],buffer,count);
497 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE HASH");
498 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
499 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
500 return APR_SUCCESS;
505 * Write a block reference
507 static apr_status_t write_block_reference(ap_filter_t *f, long result)
509 request_rec *r = f->r;
510 crccache_ctx *ctx = f->ctx;
511 apr_status_t rslt;
513 rslt = flush_compress_buffer(f);
514 if (rslt != APR_SUCCESS)
516 return rslt;
519 if (ctx->debug_skip_writing)
520 return APR_SUCCESS;
522 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
523 ctx->tx_length += bucket_size;
524 ctx->tx_uncompressed_length += bucket_size;
525 char * buf = apr_palloc(r->pool, bucket_size);
527 buf[0] = ENCODING_BLOCK;
528 buf[1] = (unsigned char) ((-result)-1); // invert and get back to zero based
529 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE block %d",buf[1]);
530 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
531 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
532 return APR_SUCCESS;
536 * Process one block of data: try to match it against the CRC, append
537 * the result to the ouput ring and remember the result (e.g. was
538 * it a block-match or was a literal processed)
540 static apr_status_t process_block(ap_filter_t *f)
542 request_rec *r = f->r;
543 crccache_ctx *ctx = f->ctx;
544 apr_status_t rslt = APR_SUCCESS;
546 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
547 if (ctx->crcctx == NULL)
549 // This should never happen
550 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
551 return APR_EGENERAL;
554 long rd_block_rslt;
555 size_t ndigested = crc_read_block(
556 ctx->crcctx,
557 &rd_block_rslt,
558 ctx->buffer+ctx->buffer_digest_getpos,
559 ctx->buffer_putpos-ctx->buffer_digest_getpos
561 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
562 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested, rd_block_rslt);
565 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'tail_blocksize' bytes at the end of the buffer,
566 // it will have to be moved to the beginning of the moving window so that it can be written upon the next call to
567 // crc_read_block or crc_read_flush)
568 // rd_block_rslt > 0: send literal
569 // rd_block_rslt < 0: send block
570 if (rd_block_rslt > 0)
572 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_block_rslt);
573 ctx->buffer_read_getpos += rd_block_rslt;
575 else if (rd_block_rslt < 0)
577 rslt = write_block_reference(f, rd_block_rslt);
578 unsigned char blocknum = (unsigned char) ((-rd_block_rslt)-1);
579 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
582 // Update the context with the results
583 ctx->crc_read_block_result = rd_block_rslt;
584 ctx->crc_read_block_ndigested = ndigested;
585 ctx->buffer_digest_getpos += ndigested;
586 return rslt;
590 * Flush one block of data: get it from the crccontext, append
591 * the result to the ouput ring and remember the result (e.g. was
592 * it a block-match or was a literal processed)
594 static apr_status_t flush_block(ap_filter_t *f)
596 request_rec *r = f->r;
597 crccache_ctx *ctx = f->ctx;
598 apr_status_t rslt = APR_SUCCESS;
600 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
601 if (ctx->crcctx == NULL)
603 // This should never happen
604 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
605 return APR_EGENERAL;
607 long rd_flush_rslt = crc_read_flush(ctx->crcctx);
608 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt);
610 // rd_flush_rslt = 0: do nothing
611 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
612 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
613 if (rd_flush_rslt > 0)
615 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_flush_rslt);
616 ctx->buffer_read_getpos += rd_flush_rslt;
618 else if (rd_flush_rslt < 0)
620 rslt = write_block_reference(f, rd_flush_rslt);
621 unsigned char blocknum = (unsigned char) ((-rd_flush_rslt)-1);
622 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
625 // Update the context with the results
626 ctx->crc_read_block_result = rd_flush_rslt;
627 ctx->crc_read_block_ndigested = 0;
628 return rslt;
632 * Clean-up memory used by helper libraries, that don't know about apr_palloc
633 * and that (probably) use classical malloc/free
635 static apr_status_t deflate_ctx_cleanup(void *data)
637 crccache_ctx *ctx = (crccache_ctx *)data;
639 if (ctx != NULL)
641 if (ctx->compression_state != COMPRESSION_ENDED)
643 deflateEnd(ctx->compression_stream);
644 ctx->compression_state = COMPRESSION_ENDED;
646 if (ctx->crcctx != NULL)
648 crc_context_free(ctx->crcctx);
649 ctx->crcctx = NULL;
652 return APR_SUCCESS;
655 * End of stream has been reached:
656 * Process any data still in the buffer and flush all internal
657 * structures of crcsync and of zlib
658 * Furthermore, add a strong hash
660 static apr_status_t process_eos(ap_filter_t *f)
662 crccache_ctx *ctx = f->ctx;
663 apr_status_t rslt;
665 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,"CRCCACHE-ENCODE EOS reached for APR bucket");
668 while (ctx->buffer_digest_getpos < ctx->buffer_putpos)
670 // There is still data in the buffer. Process it.
671 rslt = process_block(f);
672 if (rslt != APR_SUCCESS)
674 return rslt;
680 // Flush remaining block in the crcctx
681 rslt = flush_block(f);
682 if (rslt != APR_SUCCESS)
684 return rslt;
687 while (ctx->crc_read_block_result != 0);
689 // Flush anything that is remaining in the compress buffer
690 rslt = flush_compress_buffer(f);
691 if (rslt != APR_SUCCESS)
693 return rslt;
696 unsigned md_len;
697 unsigned char md_value[EVP_MAX_MD_SIZE];
698 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
699 EVP_MD_CTX_cleanup(&ctx->mdctx);
700 write_hash(f, md_value, md_len);
702 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,
703 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length);
705 return APR_SUCCESS;
709 * Process a data bucket; append data into a moving window buffer
710 * and encode it with crcsync algorithm when window contains enough
711 * data for crcsync to find potential matches
713 static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e)
715 request_rec *r = f->r;
716 crccache_ctx *ctx = f->ctx;
718 const char *data;
719 apr_size_t len;
720 apr_status_t rslt;
722 /* read */
723 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
724 ctx->orig_length += len;
725 // update our sha1 hash
726 EVP_DigestUpdate(&ctx->mdctx, data, len);
727 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
729 // append data to the buffer and encode buffer content using the crc_read_block magic
730 size_t bucket_used_count = 0;
731 size_t bucket_data_left;
732 while(bucket_used_count < len)
734 /* Append as much data as possible into the buffer */
735 bucket_data_left = len - bucket_used_count;
736 size_t copy_size = MIN(ctx->buffer_size-ctx->buffer_putpos, bucket_data_left);
737 memcpy(ctx->buffer+ctx->buffer_putpos, data+bucket_used_count, copy_size);
738 bucket_used_count += copy_size;
739 bucket_data_left -= copy_size;
740 ctx->buffer_putpos += copy_size;
741 /* flush the buffer if it is appropriate */
742 if (ctx->buffer_putpos == ctx->buffer_size)
744 // Buffer is filled to the end. Flush as much as possible
745 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
746 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
747 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
748 while (ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size)
750 // We can still scan at least 1 tail block + 1 byte forward: try to flush next part
751 rslt = process_block(f);
752 if (rslt != APR_SUCCESS)
754 return rslt;
756 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
757 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
758 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
761 if (ctx->buffer_putpos != ctx->buffer_read_getpos)
763 // Copy the remaining part of the buffer to the start of the buffer,
764 // so that it can be filled again as new data arrive
765 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
766 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
767 ctx->buffer_putpos - ctx->buffer_read_getpos);
768 memcpy(ctx->buffer, ctx->buffer + ctx->buffer_read_getpos, ctx->buffer_putpos - ctx->buffer_read_getpos);
770 // Reset getpos to the beginning of the buffer and putpos accordingly
771 ctx->buffer_putpos -= ctx->buffer_read_getpos;
772 ctx->buffer_digest_getpos -= ctx->buffer_read_getpos;
773 ctx->buffer_read_getpos = 0;
775 while (ctx->crc_read_block_result < 0 && ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size)
777 // Previous block matched exactly. Let's hope the next block as well
778 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
779 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
780 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
781 rslt = process_block(f);
782 if (rslt != APR_SUCCESS)
784 return rslt;
788 return APR_SUCCESS; // Yahoo, all went well
792 * CACHE_OUT filter
793 * ----------------
795 * Deliver cached content (headers and body) up the stack.
797 static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
798 apr_bucket *e;
799 request_rec *r = f->r;
800 crccache_ctx *ctx = f->ctx;
801 int zRC;
802 int return_code = APR_SUCCESS;
804 /* Do nothing if asked to filter nothing. */
805 if (APR_BRIGADE_EMPTY(bb)) {
806 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
807 return ap_pass_brigade(f->next, bb);
810 /* If state is not yet GS_ENCODING content, we need to ensure that it is okay to send
811 * the encoded content. If the state is GS_ENCODING, that means we've done
812 * this before and we liked it.
813 * This could be not so nice if we always fail. But, if we succeed,
814 * we're in better shape.
816 if (ctx->global_state != GS_ENCODING)
818 const char *encoding;
820 /* only work on main request/no subrequests */
821 if (r->main != NULL) {
822 ap_remove_output_filter(f);
823 return ap_pass_brigade(f->next, bb);
826 /* We can't operate on Content-Ranges */
827 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
828 ap_remove_output_filter(f);
829 return ap_pass_brigade(f->next, bb);
832 // Advertise crcsync capability and preferred blocksize multiple
833 apr_table_mergen(r->headers_out, CAPABILITY_HEADER, "crcsync; m=1");
835 if (ctx->global_state == GS_INIT)
837 // Still in GS_INIT state implies there is no need to encode.
838 // It is sufficient that the capability header has been set
839 ap_remove_output_filter(f);
840 return ap_pass_brigade(f->next, bb);
843 if (ctx->global_state != GS_HEADERS_SAVED)
845 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_HEADERS_SAVED);
846 return APR_EGENERAL;
849 /* Indicate to caches that they may only re-use this response for a request
850 * with the same BLOCK_HEADER value as the current request
851 * Indicate to clients that the server supports crcsync, even if checks
852 * further down prevent this specific response from being crc-encoded
854 apr_table_mergen(r->headers_out, VARY_HEADER, BLOCK_HEADER);
856 /* If Content-Encoding is present and differs from "identity", we can't handle it */
857 encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
858 if (encoding && strcasecmp(encoding, "identity")) {
859 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
860 "Not encoding with crccache. It is already encoded with: %s", encoding);
861 ap_remove_output_filter(f);
862 return ap_pass_brigade(f->next, bb);
865 /* For a 304 or 204 response there is no entity included in
866 * the response and hence nothing to crc-encode. */
867 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
869 ap_remove_output_filter(f);
870 return ap_pass_brigade(f->next, bb);
873 /* All Ok. We're cool with filtering this. */
874 ctx->global_state = GS_ENCODING;
875 ctx->debug_skip_writing = 0;
876 ctx->orig_length = 0;
877 ctx->tx_length = 0;
878 ctx->tx_uncompressed_length = 0;
879 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
881 /* Parse the input headers */
882 const char * header;
883 header = apr_table_get(r->headers_in, BLOCK_HEADER);
884 int version;
885 size_t file_size;
886 char * hashes;
887 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
889 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to decode if-block header");
890 ap_remove_output_filter(f);
891 return ap_pass_brigade(f->next, bb);
893 // Decode the hashes
894 ctx->block_count = apr_base64_decode_len(hashes)/(HASH_SIZE/8);
895 // this may over allocate by a couple of bytes but no big deal
896 ctx->hashes = apr_palloc(r->pool, apr_base64_decode_len(hashes));
897 apr_base64_decode((char *)ctx->hashes, hashes);
898 free(hashes);
899 hashes = NULL;
901 ctx->block_size = file_size/ctx->block_count;
902 ctx->tail_block_size = ctx->block_size + file_size % ctx->block_count;
903 size_t block_count_including_final_block = ctx->block_count;// + (ctx->tail_block_size != 0);
904 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
905 "If-block header decoded, version %d: %d hashes of %d and one of %d", version, ctx->block_count-1,(int)ctx->block_size,(int)ctx->tail_block_size);
907 // swap to network byte order
908 int i;
909 for (i = 0; i < block_count_including_final_block;++i)
911 htobe64(ctx->hashes[i]);
914 // Data come in at chunks that are potentially smaller then block_size or tail_block_size
915 // Accumulate those chunks into a buffer.
916 // The buffer must be at least block_size+tail_block_size so that crc_read_block(...) can find a matching block, regardless
917 // of the data alignment compared to the original page.
918 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
919 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
920 // Though, the larger the buffer, the bigger the memory demand.
921 // A size of 3*block_size+tail_block_size+1 (20% of original file size) seems to be a good balance
923 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
924 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
925 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
926 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
927 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
928 // data types.
929 ctx->buffer_size = ctx->block_size*3 + ctx->tail_block_size + 1;
930 ctx->buffer_digest_getpos = 0;
931 ctx->buffer_read_getpos = 0;
932 ctx->buffer_putpos = 0;
933 ctx->crc_read_block_result = 0;
934 ctx->buffer = apr_palloc(r->pool, ctx->buffer_size);
936 /* Setup deflate for compressing non-matched literal data */
937 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
938 // TODO: should I pass some apr_palloc based function to prevent memory leaks
939 //in case of unexpected errors?
941 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx->compression_stream)));
942 ctx->compression_stream = apr_palloc(r->pool, sizeof(*(ctx->compression_stream)));
943 ctx->compression_stream->zalloc = Z_NULL;
944 ctx->compression_stream->zfree = Z_NULL;
945 ctx->compression_stream->opaque = Z_NULL;
946 zRC = deflateInit(ctx->compression_stream, Z_DEFAULT_COMPRESSION); // TODO: make compression level configurable
947 if (zRC != Z_OK)
949 // Can't initialize the compression engine for compressing literal data
950 deflateEnd(ctx->compression_stream); // free memory used by deflate
951 free(ctx->compression_stream);
952 ctx->compression_stream = NULL;
953 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
954 "unable to init Zlib: "
955 "deflateInit returned %d: URL %s",
956 zRC, r->uri);
957 ap_remove_output_filter(f);
958 return ap_pass_brigade(f->next, bb);
961 // initialise the context for our sha1 digest of the unencoded response
962 EVP_MD_CTX_init(&ctx->mdctx);
963 const EVP_MD *md = EVP_sha1();
964 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
966 // now initialise the crcsync context that will do the real work
967 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, block_count_including_final_block, ctx->tail_block_size);
969 // Register a cleanup function to cleanup internal libz and crcsync resources
970 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
971 apr_pool_cleanup_null);
973 // All checks and initializations are OK
974 // Modify headers that are impacted by this transformation
975 apr_table_setn(r->headers_out, ENCODING_HEADER, CRCCACHE_ENCODING);
976 apr_table_unset(r->headers_out, "Content-Length");
977 apr_table_unset(r->headers_out, "Content-MD5");
978 crccache_check_etag(r, ctx, CRCCACHE_ENCODING);
980 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE Server end of context setup");
983 if (ctx->global_state != GS_ENCODING)
985 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_ENCODING);
986 return APR_EGENERAL;
989 while (!APR_BRIGADE_EMPTY(bb))
991 const char *data;
992 apr_size_t len;
993 apr_status_t rslt;
995 e = APR_BRIGADE_FIRST(bb);
997 if (APR_BUCKET_IS_EOS(e))
999 // Process end of stream: flush data buffers, compression buffers, etc.
1000 // and calculate a strong hash.
1001 rslt = process_eos(f);
1003 /* Remove EOS from the old list, and insert into the new. */
1004 APR_BUCKET_REMOVE(e);
1005 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1007 /* This filter is done once it has served up its content */
1008 ap_remove_output_filter(f);
1010 if (rslt != APR_SUCCESS)
1012 return rslt; // A problem occurred. Abort the processing
1015 /* Okay, we've seen the EOS.
1016 * Time to pass it along down the chain.
1018 return ap_pass_brigade(f->next, ctx->bb);
1021 if (APR_BUCKET_IS_FLUSH(e))
1023 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
1024 apr_status_t rv;
1026 /* Remove flush bucket from old brigade and insert into the new. */
1027 APR_BUCKET_REMOVE(e);
1028 // TODO: optimize; do not insert two consecutive flushes when no intermediate
1029 // output block was written
1030 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1031 rv = ap_pass_brigade(f->next, ctx->bb);
1032 if (rv != APR_SUCCESS) {
1033 return rv;
1035 continue;
1038 if (APR_BUCKET_IS_METADATA(e)) {
1040 * Remove meta data bucket from old brigade and insert into the
1041 * new.
1043 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
1044 if (len > 2)
1045 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1046 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len,data[0],data[1],data[2]);
1047 else
1048 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1049 "CRCCACHE-ENCODE Metadata, read %zu",len);
1050 APR_BUCKET_REMOVE(e);
1051 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1052 continue;
1055 // Bucket is non of the above types. Assume it is a data bucket
1056 // which means it can be encoded with the crcsync algorithm
1057 rslt = process_data_bucket(f, e);
1059 APR_BUCKET_REMOVE(e);
1060 if (rslt != APR_SUCCESS)
1062 break; // A problem occurred. Abort the processing
1066 apr_brigade_cleanup(bb);
1067 return return_code;
1072 * CACHE_OUT_SAVE_HEADERS filter
1073 * ----------------
1075 * Save headers into the context
1077 static apr_status_t crccache_out_save_headers_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
1078 request_rec *r = f->r;
1079 crccache_ctx *ctx = f->ctx;
1081 /* Do nothing if asked to filter nothing. */
1082 if (APR_BRIGADE_EMPTY(bb)) {
1083 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE (save headers) bucket brigade is empty -> nothing todo");
1084 return ap_pass_brigade(f->next, bb);
1087 if (ctx->global_state != GS_INIT)
1089 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE (save headers) unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_INIT);
1090 return APR_EGENERAL;
1093 /* only work on main request/no subrequests */
1094 if (r->main != NULL) {
1095 ap_remove_output_filter(f);
1096 return ap_pass_brigade(f->next, bb);
1099 /* We can't operate on Content-Ranges */
1100 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
1101 ap_remove_output_filter(f);
1102 return ap_pass_brigade(f->next, bb);
1105 /* Save content-encoding and etag header for later usage by the crcsync
1106 * encoder
1108 const char *encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
1109 if (encoding != NULL)
1111 ctx->old_content_encoding = apr_pstrdup(r->pool, encoding);
1112 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
1113 "Saved old content-encoding: %s", encoding);
1115 const char *etag = apr_table_get(r->headers_out, ETAG_HEADER);
1116 if (etag != NULL)
1118 ctx->old_etag = apr_pstrdup(r->pool, etag);
1119 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
1120 "Saved old etag: %s", etag);
1122 ctx->global_state = GS_HEADERS_SAVED;
1124 /* Done saving headers. Nothing left to do */
1125 ap_remove_output_filter(f);
1126 return ap_pass_brigade(f->next, bb);
1130 static void crccache_server_register_hook(apr_pool_t *p) {
1131 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
1132 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
1134 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
1135 APR_HOOK_MIDDLE);
1137 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
1138 NULL, AP_FTYPE_PROTOCOL);
1140 crccache_out_save_headers_filter_handle = ap_register_output_filter("CRCCACHE_OUT_SAVE_HEADERS",
1141 crccache_out_save_headers_filter, NULL, AP_FTYPE_RESOURCE-1); // make sure to handle it *before* INFLATE filter (or other decode modules)
1143 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
1144 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET);
1147 module AP_MODULE_DECLARE_DATA crccache_server_module = {
1148 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
1149 NULL , /* merge per-directory config structures */
1150 crccache_server_create_config, /* create per-server config structure */
1151 NULL , /* merge per-server config structures */
1152 crccache_server_cmds, /* command apr_table_t */
1153 crccache_server_register_hook /* register hooks */