1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
42 #include "ap_wrapper.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module
;
53 // Possible states for the output compression
55 COMPRESSION_BUFFER_EMPTY
,
56 COMPRESSION_FIRST_DATA_RECEIVED
,
57 COMPRESSION_FIRST_BLOCK_WRITTEN
,
59 } compression_state_t
;
61 static void *crccache_server_create_config(apr_pool_t
*p
, server_rec
*s
) {
62 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
64 conf
->decoder_modules
= NULL
;
65 conf
->decoder_modules_cnt
= 0;
69 typedef enum { GS_INIT
, GS_HEADERS_SAVED
, GS_ENCODING
} global_state_t
;
71 typedef struct crccache_ctx_t
{
72 global_state_t global_state
;
73 char *old_content_encoding
;
75 unsigned char *buffer
;
76 size_t buffer_digest_getpos
;
77 size_t buffer_read_getpos
;
80 long crc_read_block_result
;
81 size_t crc_read_block_ndigested
;
82 apr_bucket_brigade
*bb
;
85 size_t tail_block_size
;
87 struct crc_context
*crcctx
;
90 size_t tx_uncompressed_length
;
91 compression_state_t compression_state
;
92 z_stream
*compression_stream
;
94 int debug_skip_writing
; // ____
99 * Only enable CRCCache Server when requested through the config file
100 * so that the user can switch CRCCache server on in a specific virtual server
102 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
104 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
105 &crccache_server_module
);
106 conf
->enabled
= flag
;
110 static const char *set_crccache_decoder_module(cmd_parms
*parms
, void *in_struct_ptr
, const char *arg
)
112 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
113 &crccache_server_module
);
114 struct decoder_modules_t
*decoder_module
= malloc(sizeof(*decoder_module
));
115 if (decoder_module
== NULL
)
117 return "Out of memory exception while allocating decoder_module structure";
122 char *data
= strdup(arg
);
125 return "Out of memory exception while parsing DecoderModule parameter";
128 tok
= apr_strtok(data
, ": ", &last
);
131 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
134 decoder_module
->name
= strdup(tok
);
135 if (decoder_module
->name
== NULL
)
137 return "Out of memory exception while storing name in decoder_module structure";
140 tok
= apr_strtok(NULL
, ": ", &last
);
143 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
146 for (tok
= apr_strtok(tok
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
))
148 struct encodings_t
*encoding
= malloc(sizeof(*encoding
));
149 if (encoding
== NULL
)
151 return "Out of memory exception while allocating encoding structure";
154 encoding
->encoding
= strdup(tok
);
155 if (encoding
->encoding
== NULL
)
157 return "Out of memory exception while storing encoding value in encoding structure";
160 // Insert new encoding to the head of the encodings list
161 encoding
->next
= decoder_module
->encodings
;
162 decoder_module
->encodings
= encoding
;
165 // Insert (new) decoder module to the head of the decoder_modules list
166 decoder_module
->next
= conf
->decoder_modules
;
167 conf
->decoder_modules
= decoder_module
;
168 conf
->decoder_modules_cnt
++;
173 static const command_rec crccache_server_cmds
[] =
175 AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
, "Enable the CRCCache server in this virtual server"),
176 AP_INIT_TAKE1("DecoderModule", set_crccache_decoder_module
, NULL
, RSRC_CONF
, "DecoderModules to decode content-types (e.g. INFLATE:gzip,x-gzip)"),
180 static ap_filter_rec_t
*crccache_out_filter_handle
;
181 static ap_filter_rec_t
*crccache_out_save_headers_filter_handle
;
184 int decode_if_block_header(const char * header
, int * version
, size_t * file_size
, char ** hashes
)
188 *hashes
= NULL
; // this will be allocated below, make sure we free it
191 size_t headerlen
= strlen(header
);
192 for (ii
= 0; ii
< headerlen
;++ii
)
194 if (header
[ii
] == ';' || ii
== headerlen
-1)
196 sscanf(&header
[start
]," v=%d",version
);
197 sscanf(&header
[start
]," h=%as",hashes
);
198 sscanf(&header
[start
]," fs=%zu",file_size
);
205 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no hashes reported in header");
210 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE Unsupported header version, %d",*version
);
217 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no file size reported in header");
225 static int crccache_server_header_parser_handler(request_rec
*r
) {
226 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
227 &crccache_server_module
);
230 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Checking for headers");
232 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
235 crccache_ctx
*ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
236 ctx
->global_state
= GS_INIT
;
237 ctx
->old_content_encoding
= NULL
;
238 ctx
->old_etag
= NULL
;
243 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
245 // failed to decode if block header so only put the Capability header in the response
246 ap_add_output_filter_handle(crccache_out_filter_handle
,
247 ctx
, r
, r
->connection
);
250 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found (hashes: %s)",hashes
);
254 // Add the filter to save the headers, so that they can be restored after an optional INFLATE or other decoder module
255 ap_add_output_filter_handle(crccache_out_save_headers_filter_handle
,
256 ctx
, r
, r
->connection
);
258 char *accept_encoding
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
));
259 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Incoming Accept-Encoding header: %s", accept_encoding
== NULL
? "NULL" : accept_encoding
);
260 if (accept_encoding
!= NULL
)
262 struct decoder_modules_t
*required_dms
[conf
->decoder_modules_cnt
];
263 unsigned required_dms_size
= 0;
266 struct decoder_modules_t
*dm
;
267 struct encodings_t
*enc
;
269 // Build the list of filter modules to handle the requested encodings and
270 // remove all non-supported encodings from the header
271 apr_table_unset(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
272 for (tok
= apr_strtok(accept_encoding
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
)) {
273 for (dm
= conf
->decoder_modules
; dm
!= NULL
; dm
= dm
->next
) {
274 for (enc
= dm
->encodings
; enc
!= NULL
; enc
= enc
->next
) {
275 if (strcmp(tok
, enc
->encoding
) == 0)
277 // This module supports the requested encoding
278 // Add it to the list if it is not already present
279 for (cnt
= 0; cnt
!= required_dms_size
; cnt
++)
281 if (required_dms
[cnt
] == dm
)
282 break; // module is already inserted in list
284 if (cnt
== required_dms_size
)
286 required_dms
[required_dms_size
++] = dm
;
288 apr_table_mergen(r
->headers_in
, ACCEPT_ENCODING_HEADER
, tok
);
293 // Enable the requested filter modules
294 for (cnt
= 0; cnt
!= required_dms_size
; cnt
++) {
295 dm
= required_dms
[cnt
];
296 ap_filter_t
*filter
= ap_add_output_filter(dm
->name
, NULL
, r
, r
->connection
);
297 if (filter
== NULL
) {
298 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable %s filter", dm
->name
);
299 // Remove the encodings handled by this filter from the list of accepted encodings
300 accept_encoding
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
));
301 apr_table_unset(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
302 for (tok
= apr_strtok(accept_encoding
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
)) {
303 for (enc
= dm
->encodings
; enc
!= NULL
; enc
= enc
->next
) {
304 if (strcmp(tok
, enc
->encoding
)==0) {
305 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Removing encoding %s", tok
);
310 // Did not find the tok encoding in the list. It can be merged back into the header
311 apr_table_mergen(r
->headers_in
, ACCEPT_ENCODING_HEADER
, tok
);
317 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled %s filter", dm
->name
);
320 const char *updated_accept_encoding
= apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
321 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Modified Accept-Encoding header: %s", updated_accept_encoding
== NULL
? "NULL" : updated_accept_encoding
);
323 // Add the crccache filter itself, after the decoder modules
324 ap_add_output_filter_handle(crccache_out_filter_handle
,
325 ctx
, r
, r
->connection
);
329 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER
);
332 /* // All is okay, so set response header to IM Used
333 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
335 r->status_line="226 IM Used";
341 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
343 request_rec *r = f->r;
345 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
347 // All is okay, so set response header to IM Used
349 r->status_line="HTTP/1.1 226 IM Used";
353 static void crccache_check_etag(request_rec
*r
, crccache_ctx
*ctx
, const char *transform
) {
354 const char *etag
= ctx
->old_etag
;
356 apr_table_set(r
->headers_out
, ETAG_HEADER
,
361 ctx
->old_content_encoding
== NULL
? "identity" : ctx
->old_content_encoding
,
365 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Changed ETag header to %s", apr_table_get(r
->headers_out
, ETAG_HEADER
));
369 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
371 unsigned char compress_buf
[30000];
372 request_rec
*r
= f
->r
;
373 crccache_ctx
*ctx
= f
->ctx
;
374 z_stream
*strm
= ctx
->compression_stream
;
376 if (ctx
->debug_skip_writing
)
381 strm
->avail_out
= sizeof(compress_buf
);
382 strm
->next_out
= compress_buf
;
383 uInt avail_in_pre_deflate
= strm
->avail_in
;
384 int zRC
= deflate(strm
, flush
);
385 if (zRC
== Z_STREAM_ERROR
)
387 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
390 int have
= sizeof(compress_buf
) - strm
->avail_out
;
391 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
392 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
393 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
396 // output buffer contains some data to be written
397 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
398 unsigned bucket_size
= have
;
399 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
401 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
403 ctx
->tx_length
+= bucket_size
;
404 char * buf
= apr_palloc(r
->pool
, bucket_size
);
406 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
408 buf
[0] = ENCODING_COMPRESSED
;
409 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
410 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
414 memcpy(buf
, compress_buf
, have
);
416 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
417 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
420 while (strm
->avail_out
== 0);
421 if (strm
->avail_in
!= 0)
423 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
431 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
433 crccache_ctx
*ctx
= f
->ctx
;
434 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
436 if (ctx
->debug_skip_writing
)
439 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
441 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
442 deflateReset(ctx
->compression_stream
);
443 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
444 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
452 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
454 crccache_ctx
*ctx
= f
->ctx
;
456 if (ctx
->debug_skip_writing
)
460 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
462 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
464 ctx
->compression_stream
->avail_in
= count
;
465 ctx
->compression_stream
->next_in
= buffer
;
466 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
467 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
468 ctx
->tx_uncompressed_length
+= count
;
475 static apr_status_t
write_hash(ap_filter_t
*f
, unsigned char *buffer
, long count
)
477 request_rec
*r
= f
->r
;
478 crccache_ctx
*ctx
= f
->ctx
;
481 rslt
= flush_compress_buffer(f
);
482 if (rslt
!= APR_SUCCESS
)
487 if (ctx
->debug_skip_writing
)
490 unsigned bucket_size
= count
+ 1;
491 ctx
->tx_length
+= bucket_size
;
492 ctx
->tx_uncompressed_length
+= bucket_size
;
493 char * buf
= apr_palloc(r
->pool
, bucket_size
);
495 buf
[0] = ENCODING_HASH
;
496 memcpy(&buf
[1],buffer
,count
);
497 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE HASH");
498 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
499 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
505 * Write a block reference
507 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
509 request_rec
*r
= f
->r
;
510 crccache_ctx
*ctx
= f
->ctx
;
513 rslt
= flush_compress_buffer(f
);
514 if (rslt
!= APR_SUCCESS
)
519 if (ctx
->debug_skip_writing
)
522 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
523 ctx
->tx_length
+= bucket_size
;
524 ctx
->tx_uncompressed_length
+= bucket_size
;
525 char * buf
= apr_palloc(r
->pool
, bucket_size
);
527 buf
[0] = ENCODING_BLOCK
;
528 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
529 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
530 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
531 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
536 * Process one block of data: try to match it against the CRC, append
537 * the result to the ouput ring and remember the result (e.g. was
538 * it a block-match or was a literal processed)
540 static apr_status_t
process_block(ap_filter_t
*f
)
542 request_rec
*r
= f
->r
;
543 crccache_ctx
*ctx
= f
->ctx
;
544 apr_status_t rslt
= APR_SUCCESS
;
546 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
547 if (ctx
->crcctx
== NULL
)
549 // This should never happen
550 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
555 size_t ndigested
= crc_read_block(
558 ctx
->buffer
+ctx
->buffer_digest_getpos
,
559 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
561 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
562 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
565 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'tail_blocksize' bytes at the end of the buffer,
566 // it will have to be moved to the beginning of the moving window so that it can be written upon the next call to
567 // crc_read_block or crc_read_flush)
568 // rd_block_rslt > 0: send literal
569 // rd_block_rslt < 0: send block
570 if (rd_block_rslt
> 0)
572 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
573 ctx
->buffer_read_getpos
+= rd_block_rslt
;
575 else if (rd_block_rslt
< 0)
577 rslt
= write_block_reference(f
, rd_block_rslt
);
578 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
579 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
582 // Update the context with the results
583 ctx
->crc_read_block_result
= rd_block_rslt
;
584 ctx
->crc_read_block_ndigested
= ndigested
;
585 ctx
->buffer_digest_getpos
+= ndigested
;
590 * Flush one block of data: get it from the crccontext, append
591 * the result to the ouput ring and remember the result (e.g. was
592 * it a block-match or was a literal processed)
594 static apr_status_t
flush_block(ap_filter_t
*f
)
596 request_rec
*r
= f
->r
;
597 crccache_ctx
*ctx
= f
->ctx
;
598 apr_status_t rslt
= APR_SUCCESS
;
600 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
601 if (ctx
->crcctx
== NULL
)
603 // This should never happen
604 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
607 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
608 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
610 // rd_flush_rslt = 0: do nothing
611 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
612 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
613 if (rd_flush_rslt
> 0)
615 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
616 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
618 else if (rd_flush_rslt
< 0)
620 rslt
= write_block_reference(f
, rd_flush_rslt
);
621 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
622 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
625 // Update the context with the results
626 ctx
->crc_read_block_result
= rd_flush_rslt
;
627 ctx
->crc_read_block_ndigested
= 0;
632 * Clean-up memory used by helper libraries, that don't know about apr_palloc
633 * and that (probably) use classical malloc/free
635 static apr_status_t
deflate_ctx_cleanup(void *data
)
637 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
641 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
643 deflateEnd(ctx
->compression_stream
);
644 ctx
->compression_state
= COMPRESSION_ENDED
;
646 if (ctx
->crcctx
!= NULL
)
648 crc_context_free(ctx
->crcctx
);
655 * End of stream has been reached:
656 * Process any data still in the buffer and flush all internal
657 * structures of crcsync and of zlib
658 * Furthermore, add a strong hash
660 static apr_status_t
process_eos(ap_filter_t
*f
)
662 crccache_ctx
*ctx
= f
->ctx
;
665 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
668 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
670 // There is still data in the buffer. Process it.
671 rslt
= process_block(f
);
672 if (rslt
!= APR_SUCCESS
)
680 // Flush remaining block in the crcctx
681 rslt
= flush_block(f
);
682 if (rslt
!= APR_SUCCESS
)
687 while (ctx
->crc_read_block_result
!= 0);
689 // Flush anything that is remaining in the compress buffer
690 rslt
= flush_compress_buffer(f
);
691 if (rslt
!= APR_SUCCESS
)
697 unsigned char md_value
[EVP_MAX_MD_SIZE
];
698 EVP_DigestFinal_ex(&ctx
->mdctx
, md_value
, &md_len
);
699 EVP_MD_CTX_cleanup(&ctx
->mdctx
);
700 write_hash(f
, md_value
, md_len
);
702 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
703 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
709 * Process a data bucket; append data into a moving window buffer
710 * and encode it with crcsync algorithm when window contains enough
711 * data for crcsync to find potential matches
713 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
715 request_rec
*r
= f
->r
;
716 crccache_ctx
*ctx
= f
->ctx
;
723 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
724 ctx
->orig_length
+= len
;
725 // update our sha1 hash
726 EVP_DigestUpdate(&ctx
->mdctx
, data
, len
);
727 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
729 // append data to the buffer and encode buffer content using the crc_read_block magic
730 size_t bucket_used_count
= 0;
731 size_t bucket_data_left
;
732 while(bucket_used_count
< len
)
734 /* Append as much data as possible into the buffer */
735 bucket_data_left
= len
- bucket_used_count
;
736 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
737 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
738 bucket_used_count
+= copy_size
;
739 bucket_data_left
-= copy_size
;
740 ctx
->buffer_putpos
+= copy_size
;
741 /* flush the buffer if it is appropriate */
742 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
744 // Buffer is filled to the end. Flush as much as possible
745 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
746 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
747 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
748 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->tail_block_size
)
750 // We can still scan at least 1 tail block + 1 byte forward: try to flush next part
751 rslt
= process_block(f
);
752 if (rslt
!= APR_SUCCESS
)
756 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
757 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
758 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
761 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
763 // Copy the remaining part of the buffer to the start of the buffer,
764 // so that it can be filled again as new data arrive
765 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
766 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
767 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
768 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
770 // Reset getpos to the beginning of the buffer and putpos accordingly
771 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
772 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
773 ctx
->buffer_read_getpos
= 0;
775 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->tail_block_size
)
777 // Previous block matched exactly. Let's hope the next block as well
778 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
779 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
780 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
781 rslt
= process_block(f
);
782 if (rslt
!= APR_SUCCESS
)
788 return APR_SUCCESS
; // Yahoo, all went well
795 * Deliver cached content (headers and body) up the stack.
797 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
799 request_rec
*r
= f
->r
;
800 crccache_ctx
*ctx
= f
->ctx
;
802 int return_code
= APR_SUCCESS
;
804 /* Do nothing if asked to filter nothing. */
805 if (APR_BRIGADE_EMPTY(bb
)) {
806 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
807 return ap_pass_brigade(f
->next
, bb
);
810 /* If state is not yet GS_ENCODING content, we need to ensure that it is okay to send
811 * the encoded content. If the state is GS_ENCODING, that means we've done
812 * this before and we liked it.
813 * This could be not so nice if we always fail. But, if we succeed,
814 * we're in better shape.
816 if (ctx
->global_state
!= GS_ENCODING
)
818 const char *encoding
;
820 /* only work on main request/no subrequests */
821 if (r
->main
!= NULL
) {
822 ap_remove_output_filter(f
);
823 return ap_pass_brigade(f
->next
, bb
);
826 /* We can't operate on Content-Ranges */
827 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
828 ap_remove_output_filter(f
);
829 return ap_pass_brigade(f
->next
, bb
);
832 // Advertise crcsync capability and preferred blocksize multiple
833 apr_table_mergen(r
->headers_out
, CAPABILITY_HEADER
, "crcsync; m=1");
835 if (ctx
->global_state
== GS_INIT
)
837 // Still in GS_INIT state implies there is no need to encode.
838 // It is sufficient that the capability header has been set
839 ap_remove_output_filter(f
);
840 return ap_pass_brigade(f
->next
, bb
);
843 if (ctx
->global_state
!= GS_HEADERS_SAVED
)
845 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_HEADERS_SAVED
);
849 /* Indicate to caches that they may only re-use this response for a request
850 * with the same BLOCK_HEADER value as the current request
851 * Indicate to clients that the server supports crcsync, even if checks
852 * further down prevent this specific response from being crc-encoded
854 apr_table_mergen(r
->headers_out
, VARY_HEADER
, BLOCK_HEADER
);
856 /* If Content-Encoding is present and differs from "identity", we can't handle it */
857 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
858 if (encoding
&& strcasecmp(encoding
, "identity")) {
859 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
860 "Not encoding with crccache. It is already encoded with: %s", encoding
);
861 ap_remove_output_filter(f
);
862 return ap_pass_brigade(f
->next
, bb
);
865 /* For a 304 or 204 response there is no entity included in
866 * the response and hence nothing to crc-encode. */
867 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
869 ap_remove_output_filter(f
);
870 return ap_pass_brigade(f
->next
, bb
);
873 /* All Ok. We're cool with filtering this. */
874 ctx
->global_state
= GS_ENCODING
;
875 ctx
->debug_skip_writing
= 0;
876 ctx
->orig_length
= 0;
878 ctx
->tx_uncompressed_length
= 0;
879 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
881 /* Parse the input headers */
883 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
887 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
889 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to decode if-block header");
890 ap_remove_output_filter(f
);
891 return ap_pass_brigade(f
->next
, bb
);
894 ctx
->block_count
= apr_base64_decode_len(hashes
)/(HASH_SIZE
/8);
895 // this may over allocate by a couple of bytes but no big deal
896 ctx
->hashes
= apr_palloc(r
->pool
, apr_base64_decode_len(hashes
));
897 apr_base64_decode((char *)ctx
->hashes
, hashes
);
901 ctx
->block_size
= file_size
/ctx
->block_count
;
902 ctx
->tail_block_size
= ctx
->block_size
+ file_size
% ctx
->block_count
;
903 size_t block_count_including_final_block
= ctx
->block_count
;// + (ctx->tail_block_size != 0);
904 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
905 "If-block header decoded, version %d: %d hashes of %d and one of %d", version
, ctx
->block_count
-1,(int)ctx
->block_size
,(int)ctx
->tail_block_size
);
907 // swap to network byte order
909 for (i
= 0; i
< block_count_including_final_block
;++i
)
911 htobe64(ctx
->hashes
[i
]);
914 // Data come in at chunks that are potentially smaller then block_size or tail_block_size
915 // Accumulate those chunks into a buffer.
916 // The buffer must be at least block_size+tail_block_size so that crc_read_block(...) can find a matching block, regardless
917 // of the data alignment compared to the original page.
918 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
919 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
920 // Though, the larger the buffer, the bigger the memory demand.
921 // A size of 3*block_size+tail_block_size+1 (20% of original file size) seems to be a good balance
923 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
924 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
925 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
926 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
927 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
929 ctx
->buffer_size
= ctx
->block_size
*3 + ctx
->tail_block_size
+ 1;
930 ctx
->buffer_digest_getpos
= 0;
931 ctx
->buffer_read_getpos
= 0;
932 ctx
->buffer_putpos
= 0;
933 ctx
->crc_read_block_result
= 0;
934 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
936 /* Setup deflate for compressing non-matched literal data */
937 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
938 // TODO: should I pass some apr_palloc based function to prevent memory leaks
939 //in case of unexpected errors?
941 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
942 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
943 ctx
->compression_stream
->zalloc
= Z_NULL
;
944 ctx
->compression_stream
->zfree
= Z_NULL
;
945 ctx
->compression_stream
->opaque
= Z_NULL
;
946 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
949 // Can't initialize the compression engine for compressing literal data
950 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
951 free(ctx
->compression_stream
);
952 ctx
->compression_stream
= NULL
;
953 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
954 "unable to init Zlib: "
955 "deflateInit returned %d: URL %s",
957 ap_remove_output_filter(f
);
958 return ap_pass_brigade(f
->next
, bb
);
961 // initialise the context for our sha1 digest of the unencoded response
962 EVP_MD_CTX_init(&ctx
->mdctx
);
963 const EVP_MD
*md
= EVP_sha1();
964 EVP_DigestInit_ex(&ctx
->mdctx
, md
, NULL
);
966 // now initialise the crcsync context that will do the real work
967 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
969 // Register a cleanup function to cleanup internal libz and crcsync resources
970 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
971 apr_pool_cleanup_null
);
973 // All checks and initializations are OK
974 // Modify headers that are impacted by this transformation
975 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
976 apr_table_unset(r
->headers_out
, "Content-Length");
977 apr_table_unset(r
->headers_out
, "Content-MD5");
978 crccache_check_etag(r
, ctx
, CRCCACHE_ENCODING
);
980 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE Server end of context setup");
983 if (ctx
->global_state
!= GS_ENCODING
)
985 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_ENCODING
);
989 while (!APR_BRIGADE_EMPTY(bb
))
995 e
= APR_BRIGADE_FIRST(bb
);
997 if (APR_BUCKET_IS_EOS(e
))
999 // Process end of stream: flush data buffers, compression buffers, etc.
1000 // and calculate a strong hash.
1001 rslt
= process_eos(f
);
1003 /* Remove EOS from the old list, and insert into the new. */
1004 APR_BUCKET_REMOVE(e
);
1005 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1007 /* This filter is done once it has served up its content */
1008 ap_remove_output_filter(f
);
1010 if (rslt
!= APR_SUCCESS
)
1012 return rslt
; // A problem occurred. Abort the processing
1015 /* Okay, we've seen the EOS.
1016 * Time to pass it along down the chain.
1018 return ap_pass_brigade(f
->next
, ctx
->bb
);
1021 if (APR_BUCKET_IS_FLUSH(e
))
1023 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
1026 /* Remove flush bucket from old brigade and insert into the new. */
1027 APR_BUCKET_REMOVE(e
);
1028 // TODO: optimize; do not insert two consecutive flushes when no intermediate
1029 // output block was written
1030 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1031 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
1032 if (rv
!= APR_SUCCESS
) {
1038 if (APR_BUCKET_IS_METADATA(e
)) {
1040 * Remove meta data bucket from old brigade and insert into the
1043 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
1045 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1046 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
1048 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1049 "CRCCACHE-ENCODE Metadata, read %zu",len
);
1050 APR_BUCKET_REMOVE(e
);
1051 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1055 // Bucket is non of the above types. Assume it is a data bucket
1056 // which means it can be encoded with the crcsync algorithm
1057 rslt
= process_data_bucket(f
, e
);
1059 APR_BUCKET_REMOVE(e
);
1060 if (rslt
!= APR_SUCCESS
)
1062 break; // A problem occurred. Abort the processing
1066 apr_brigade_cleanup(bb
);
1072 * CACHE_OUT_SAVE_HEADERS filter
1075 * Save headers into the context
1077 static apr_status_t
crccache_out_save_headers_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
1078 request_rec
*r
= f
->r
;
1079 crccache_ctx
*ctx
= f
->ctx
;
1081 /* Do nothing if asked to filter nothing. */
1082 if (APR_BRIGADE_EMPTY(bb
)) {
1083 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE (save headers) bucket brigade is empty -> nothing todo");
1084 return ap_pass_brigade(f
->next
, bb
);
1087 if (ctx
->global_state
!= GS_INIT
)
1089 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE (save headers) unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_INIT
);
1090 return APR_EGENERAL
;
1093 /* only work on main request/no subrequests */
1094 if (r
->main
!= NULL
) {
1095 ap_remove_output_filter(f
);
1096 return ap_pass_brigade(f
->next
, bb
);
1099 /* We can't operate on Content-Ranges */
1100 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
1101 ap_remove_output_filter(f
);
1102 return ap_pass_brigade(f
->next
, bb
);
1105 /* Save content-encoding and etag header for later usage by the crcsync
1108 const char *encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
1109 if (encoding
!= NULL
)
1111 ctx
->old_content_encoding
= apr_pstrdup(r
->pool
, encoding
);
1112 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
1113 "Saved old content-encoding: %s", encoding
);
1115 const char *etag
= apr_table_get(r
->headers_out
, ETAG_HEADER
);
1118 ctx
->old_etag
= apr_pstrdup(r
->pool
, etag
);
1119 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
1120 "Saved old etag: %s", etag
);
1122 ctx
->global_state
= GS_HEADERS_SAVED
;
1124 /* Done saving headers. Nothing left to do */
1125 ap_remove_output_filter(f
);
1126 return ap_pass_brigade(f
->next
, bb
);
1130 static void crccache_server_register_hook(apr_pool_t
*p
) {
1131 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
1132 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
1134 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
1137 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
1138 NULL, AP_FTYPE_PROTOCOL);
1140 crccache_out_save_headers_filter_handle
= ap_register_output_filter("CRCCACHE_OUT_SAVE_HEADERS",
1141 crccache_out_save_headers_filter
, NULL
, AP_FTYPE_RESOURCE
-1); // make sure to handle it *before* INFLATE filter (or other decode modules)
1143 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
1144 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
1147 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
1148 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
1149 NULL
, /* merge per-directory config structures */
1150 crccache_server_create_config
, /* create per-server config structure */
1151 NULL
, /* merge per-server config structures */
1152 crccache_server_cmds
, /* command apr_table_t */
1153 crccache_server_register_hook
/* register hooks */