1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
30 #include "apr_file_io.h"
31 #include "apr_strings.h"
32 #include "mod_cache.h"
33 #include "mod_disk_cache.h"
34 #include "ap_provider.h"
35 #include "util_filter.h"
36 #include "util_script.h"
37 #include "util_charset.h"
40 #include "ap_wrapper.h"
41 #include "mod_crccache_server.h"
43 #include <crcsync/crcsync.h>
46 module AP_MODULE_DECLARE_DATA crccache_server_module
;
48 // Possible states for the output compression
50 COMPRESSION_BUFFER_EMPTY
,
51 COMPRESSION_FIRST_DATA_RECEIVED
,
52 COMPRESSION_FIRST_BLOCK_WRITTEN
,
54 } compression_state_t
;
56 //#define MIN(X,Y) (X<Y?X:Y)
58 static void *create_config(apr_pool_t
*p
, server_rec
*s
) {
59 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
60 conf
->disk_cache_conf
= apr_pcalloc(p
, sizeof(disk_cache_conf
));
62 /* XXX: Set default values */
64 conf
->disk_cache_conf
->dirlevels
= DEFAULT_DIRLEVELS
;
65 conf
->disk_cache_conf
->dirlength
= DEFAULT_DIRLENGTH
;
66 conf
->disk_cache_conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
67 conf
->disk_cache_conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
69 conf
->disk_cache_conf
->cache_root
= NULL
;
70 conf
->disk_cache_conf
->cache_root_len
= 0;
75 typedef struct crccache_ctx_t
{
76 unsigned char *buffer
;
77 size_t buffer_digest_getpos
;
78 size_t buffer_read_getpos
;
81 long crc_read_block_result
;
82 size_t crc_read_block_ndigested
;
83 apr_bucket_brigade
*bb
;
85 size_t tail_block_size
;
86 unsigned hashes
[FULL_BLOCK_COUNT
+1];
87 struct crc_context
*crcctx
;
90 size_t tx_uncompressed_length
;
91 compression_state_t compression_state
;
92 z_stream
*compression_stream
;
93 int debug_skip_writing
; // ____
98 * mod_disk_cache configuration directives handlers.
100 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
102 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
103 &crccache_server_module
);
104 conf
->disk_cache_conf
->cache_root
= arg
;
105 conf
->disk_cache_conf
->cache_root_len
= strlen(arg
);
106 /* TODO: canonicalize cache_root and strip off any trailing slashes */
112 * Only enable CRCCache Server when requested through the config file
113 * so that the user can switch CRCCache server on in a specific virtual server
115 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
117 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
118 &crccache_server_module
);
119 conf
->enabled
= flag
;
125 * Consider eliminating the next two directives in favor of
126 * Ian's prime number hash...
127 * key = hash_fn( r->uri)
128 * filename = "/key % prime1 /key %prime2/key %prime3"
130 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
132 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
133 &crccache_server_module
);
136 return "CacheDirLevelsServer value must be an integer greater than 0";
137 if (val
* conf
->disk_cache_conf
->dirlength
> CACHEFILE_LEN
)
138 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
139 conf
->disk_cache_conf
->dirlevels
= val
;
142 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
144 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
145 &crccache_server_module
);
148 return "CacheDirLengthServer value must be an integer greater than 0";
149 if (val
* conf
->disk_cache_conf
->dirlevels
> CACHEFILE_LEN
)
150 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
152 conf
->disk_cache_conf
->dirlength
= val
;
156 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
158 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
159 &crccache_server_module
);
161 if (apr_strtoff(&conf
->disk_cache_conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->minfs
163 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
168 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
170 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
171 &crccache_server_module
);
172 if (apr_strtoff(&conf
->disk_cache_conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->maxfs
174 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
179 static const command_rec disk_cache_cmds
[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root
, NULL
, RSRC_CONF
,
180 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels
, NULL
, RSRC_CONF
,
181 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength
, NULL
, RSRC_CONF
,
182 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs
, NULL
, RSRC_CONF
,
183 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs
, NULL
, RSRC_CONF
,
184 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
,
185 "Enable the CRCCache server in this virtual server"),{ NULL
} };
187 static ap_filter_rec_t
*crccache_out_filter_handle
;
189 static int crccache_server_header_parser_handler(request_rec
*r
) {
190 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
191 &crccache_server_module
);
195 const char * hashes
, *file_size_header
;
196 hashes
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
197 file_size_header
= apr_table_get(r
->headers_in
, FILE_SIZE_HEADER
);
198 if (hashes
&& file_size_header
)
201 int ret
= sscanf(file_size_header
,"%zu",&file_size
);
204 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
, "CRCCACHE-ENCODE Failed to convert file size header to size_t, %s",file_size_header
);
208 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes
);
209 // Insert mod_deflate's INFLATE filter in the chain to unzip content
210 // so that there is clear text available for the delta algorithm
211 ap_filter_t
*inflate_filter
= ap_add_output_filter("INFLATE", NULL
, r
, r
->connection
);
212 if (inflate_filter
== NULL
)
214 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
218 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
220 // And the crccache filter itself ofcourse
221 ap_add_output_filter_handle(crccache_out_filter_handle
,
222 NULL
, r
, r
->connection
);
231 /* PR 39727: we're screwing up our clients if we leave a strong ETag
232 * header while transforming content. Henrik Nordstrom suggests
235 * Pending a more thorough review of our Etag handling, let's just
236 * implement his suggestion. It fixes the bug, or at least turns it
237 * from a showstopper to an inefficiency. And it breaks nothing that
238 * wasn't already broken.
239 * TODO: the crccache_client should undo this once the reconstructed page has been saved in the cache
241 static void crccache_check_etag(request_rec
*r
, const char *transform
) {
242 const char *etag
= apr_table_get(r
->headers_out
, "ETag");
243 if (etag
&& (((etag
[0] != 'W') && (etag
[0] != 'w')) || (etag
[1] != '/'))) {
244 apr_table_set(r
->headers_out
, "ETag", apr_pstrcat(r
->pool
, etag
, "-",
249 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
251 unsigned char compress_buf
[30000];
252 request_rec
*r
= f
->r
;
253 crccache_ctx
*ctx
= f
->ctx
;
254 z_stream
*strm
= ctx
->compression_stream
;
256 if (ctx
->debug_skip_writing
)
261 strm
->avail_out
= sizeof(compress_buf
);
262 strm
->next_out
= compress_buf
;
263 uInt avail_in_pre_deflate
= strm
->avail_in
;
264 int zRC
= deflate(strm
, flush
);
265 if (zRC
== Z_STREAM_ERROR
)
267 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
270 int have
= sizeof(compress_buf
) - strm
->avail_out
;
271 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
272 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
273 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
276 // output buffer contains some data to be written
277 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
278 unsigned bucket_size
= have
;
279 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
281 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
283 ctx
->tx_length
+= bucket_size
;
284 char * buf
= apr_palloc(r
->pool
, bucket_size
);
286 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
288 buf
[0] = ENCODING_COMPRESSED
;
289 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
290 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
294 memcpy(buf
, compress_buf
, have
);
296 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
297 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
300 while (strm
->avail_out
== 0);
301 if (strm
->avail_in
!= 0)
303 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
311 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
313 crccache_ctx
*ctx
= f
->ctx
;
314 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
316 if (ctx
->debug_skip_writing
)
319 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
321 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
322 deflateReset(ctx
->compression_stream
);
323 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
324 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
332 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
334 crccache_ctx
*ctx
= f
->ctx
;
336 if (ctx
->debug_skip_writing
)
340 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
342 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
344 ctx
->compression_stream
->avail_in
= count
;
345 ctx
->compression_stream
->next_in
= buffer
;
346 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
347 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
348 ctx
->tx_uncompressed_length
+= count
;
353 * Write a block reference
355 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
357 request_rec
*r
= f
->r
;
358 crccache_ctx
*ctx
= f
->ctx
;
361 rslt
= flush_compress_buffer(f
);
362 if (rslt
!= APR_SUCCESS
)
367 if (ctx
->debug_skip_writing
)
370 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
371 ctx
->tx_length
+= bucket_size
;
372 ctx
->tx_uncompressed_length
+= bucket_size
;
373 char * buf
= apr_palloc(r
->pool
, bucket_size
);
375 buf
[0] = ENCODING_BLOCK
;
376 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
377 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
378 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
379 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
384 * Process one block of data: try to match it against the CRC, append
385 * the result to the ouput ring and remember the result (e.g. was
386 * it a block-match or was a literal processed)
388 static apr_status_t
process_block(ap_filter_t
*f
)
390 request_rec
*r
= f
->r
;
391 crccache_ctx
*ctx
= f
->ctx
;
392 apr_status_t rslt
= APR_SUCCESS
;
394 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
395 if (ctx
->crcctx
== NULL
)
397 // This should never happen
398 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
403 size_t ndigested
= crc_read_block(
406 ctx
->buffer
+ctx
->buffer_digest_getpos
,
407 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
409 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
410 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
413 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
414 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
415 // rd_block_rslt > 0: send literal
416 // rd_block_rslt < 0: send block
417 if (rd_block_rslt
> 0)
419 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
420 ctx
->buffer_read_getpos
+= rd_block_rslt
;
422 else if (rd_block_rslt
< 0)
424 rslt
= write_block_reference(f
, rd_block_rslt
);
425 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
426 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
429 // Update the context with the results
430 ctx
->crc_read_block_result
= rd_block_rslt
;
431 ctx
->crc_read_block_ndigested
= ndigested
;
432 ctx
->buffer_digest_getpos
+= ndigested
;
437 * Flush one block of data: get it from the crccontext, append
438 * the result to the ouput ring and remember the result (e.g. was
439 * it a block-match or was a literal processed)
441 static apr_status_t
flush_block(ap_filter_t
*f
)
443 request_rec
*r
= f
->r
;
444 crccache_ctx
*ctx
= f
->ctx
;
445 apr_status_t rslt
= APR_SUCCESS
;
447 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
448 if (ctx
->crcctx
== NULL
)
450 // This should never happen
451 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
454 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
455 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
457 // rd_flush_rslt = 0: do nothing
458 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
459 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
460 if (rd_flush_rslt
> 0)
462 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
463 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
465 else if (rd_flush_rslt
< 0)
467 rslt
= write_block_reference(f
, rd_flush_rslt
);
468 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
469 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
472 // Update the context with the results
473 ctx
->crc_read_block_result
= rd_flush_rslt
;
474 ctx
->crc_read_block_ndigested
= 0;
479 * Clean-up memory used by helper libraries, that don't know about apr_palloc
480 * and that (probably) use classical malloc/free
482 static apr_status_t
deflate_ctx_cleanup(void *data
)
484 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
488 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
490 deflateEnd(ctx
->compression_stream
);
491 ctx
->compression_state
= COMPRESSION_ENDED
;
493 if (ctx
->crcctx
!= NULL
)
495 crc_context_free(ctx
->crcctx
);
502 * End of stream has been reached:
503 * Process any data still in the buffer and flush all internal
504 * structures of crcsync and of zlib
505 * Furthermore, add a strong hash
507 static apr_status_t
process_eos(ap_filter_t
*f
)
509 crccache_ctx
*ctx
= f
->ctx
;
512 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
515 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
517 // There is still data in the buffer. Process it.
518 rslt
= process_block(f
);
519 if (rslt
!= APR_SUCCESS
)
527 // Flush remaining block in the crcctx
528 rslt
= flush_block(f
);
529 if (rslt
!= APR_SUCCESS
)
534 while (ctx
->crc_read_block_result
!= 0);
536 // Flush anything that is remaining in the compress buffer
537 rslt
= flush_compress_buffer(f
);
538 if (rslt
!= APR_SUCCESS
)
543 // TODO: add strong hash here
545 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
546 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
552 * Process a data bucket; append data into a moving window buffer
553 * and encode it with crcsync algorithm when window contains enough
554 * data for crcsync to find potential matches
556 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
558 request_rec
*r
= f
->r
;
559 crccache_ctx
*ctx
= f
->ctx
;
566 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
567 ctx
->orig_length
+= len
;
568 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
570 // append data to the buffer and encode buffer content using the crc_read_block magic
571 size_t bucket_used_count
= 0;
572 size_t bucket_data_left
;
573 while(bucket_used_count
< len
)
575 /* Append as much data as possible into the buffer */
576 bucket_data_left
= len
- bucket_used_count
;
577 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
578 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
579 bucket_used_count
+= copy_size
;
580 bucket_data_left
-= copy_size
;
581 ctx
->buffer_putpos
+= copy_size
;
582 /* flush the buffer if it is appropriate */
583 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
585 // Buffer is filled to the end. Flush as much as possible
586 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
587 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
588 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
589 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
591 // We can still scan at least 1 block + 1 byte forward: try to flush next part
592 rslt
= process_block(f
);
593 if (rslt
!= APR_SUCCESS
)
597 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
598 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
599 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
602 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
604 // Copy the remaining part of the buffer to the start of the buffer,
605 // so that it can be filled again as new data arrive
606 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
607 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
608 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
609 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
611 // Reset getpos to the beginning of the buffer and putpos accordingly
612 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
613 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
614 ctx
->buffer_read_getpos
= 0;
616 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
618 // Previous block matched exactly. Let's hope the next block as well
619 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
620 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
621 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
622 rslt
= process_block(f
);
623 if (rslt
!= APR_SUCCESS
)
629 return APR_SUCCESS
; // Yahoo, all went well
636 * Deliver cached content (headers and body) up the stack.
638 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
640 request_rec
*r
= f
->r
;
641 crccache_ctx
*ctx
= f
->ctx
;
644 /* Do nothing if asked to filter nothing. */
645 if (APR_BRIGADE_EMPTY(bb
)) {
646 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
647 return ap_pass_brigade(f
->next
, bb
);
650 /* If we don't have a context, we need to ensure that it is okay to send
651 * the deflated content. If we have a context, that means we've done
652 * this before and we liked it.
653 * This could be not so nice if we always fail. But, if we succeed,
654 * we're in better shape.
658 const char *encoding
;
660 /* only work on main request/no subrequests */
661 if (r
->main
!= NULL
) {
662 ap_remove_output_filter(f
);
663 return ap_pass_brigade(f
->next
, bb
);
666 /* We can't operate on Content-Ranges */
667 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
668 ap_remove_output_filter(f
);
669 return ap_pass_brigade(f
->next
, bb
);
672 /* Let's see what our current Content-Encoding is.
673 * If it's already encoded by crccache: don't compress again.
674 * (We could, but let's not.)
676 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
677 if (encoding
&& strcasecmp(CRCCACHE_ENCODING
,encoding
) == 0)
679 /* Even if we don't accept this request based on it not having
680 * the Accept-Encoding, we need to note that we were looking
681 * for this header and downstream proxies should be aware of that.
683 apr_table_mergen(r
->headers_out
, "Vary", "A-IM");
684 ap_remove_output_filter(f
);
685 return ap_pass_brigade(f
->next
, bb
);
688 /* For a 304 or 204 response there is no entity included in
689 * the response and hence nothing to deflate. */
690 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
692 ap_remove_output_filter(f
);
693 return ap_pass_brigade(f
->next
, bb
);
696 /* All Ok. We're cool with filtering this. */
697 ctx
= f
->ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
698 ctx
->debug_skip_writing
= 0;
699 ctx
->orig_length
= 0;
701 ctx
->tx_uncompressed_length
= 0;
702 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
704 /* If Content-Encoding present and differs from "identity", we can't handle it */
705 if (encoding
&& strcasecmp(encoding
, "identity")) {
706 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
707 "Not encoding with crccache. It is already encoded with: %s", encoding
);
708 ap_remove_output_filter(f
);
709 return ap_pass_brigade(f
->next
, bb
);
712 /* Parse the input headers */
713 const char * hashes
, *file_size_header
;
714 hashes
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
715 file_size_header
= apr_table_get(r
->headers_in
, FILE_SIZE_HEADER
);
717 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
718 "CRCCACHE-ENCODE encoding file size header %s", file_size_header
);
721 size_t file_size
= strtoull(file_size_header
,NULL
,0);
722 if (errno
|| file_size
<= 0)
724 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to convert file size header to size_t, %s",file_size_header
);
725 ap_remove_output_filter(f
);
726 return ap_pass_brigade(f
->next
, bb
);
728 ctx
->block_size
= file_size
/FULL_BLOCK_COUNT
;
729 ctx
->tail_block_size
= file_size
% FULL_BLOCK_COUNT
;
730 size_t block_count_including_final_block
= FULL_BLOCK_COUNT
+ (ctx
->tail_block_size
!= 0);
732 // Data come in at chunks that are potentially smaller then block_size
733 // Accumulate those chunks into a buffer.
734 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
735 // of the data alignment compared to the original page.
736 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
737 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
738 // Though, the larger the buffer, the bigger the memory demand.
739 // A size of 4*block_size (20% of original file size) seems to be a good balance
741 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
742 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
743 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
744 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
745 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
747 ctx
->buffer_size
= ctx
->block_size
*4 + 1;
748 ctx
->buffer_digest_getpos
= 0;
749 ctx
->buffer_read_getpos
= 0;
750 ctx
->buffer_putpos
= 0;
751 ctx
->crc_read_block_result
= 0;
752 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
756 for (ii
= 0; ii
< block_count_including_final_block
; ++ii
)
758 ctx
->hashes
[ii
] = decode_30bithash(&hashes
[ii
*HASH_BASE64_SIZE_TX
]);
759 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE decoded hash[%d] %08X",ii,ctx->hashes[ii]);
762 /* Setup deflate for compressing non-matched literal data */
763 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
764 // TODO: should I pass some apr_palloc based function to prevent memory leaks
765 //in case of unexpected errors?
767 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
768 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
769 ctx
->compression_stream
->zalloc
= Z_NULL
;
770 ctx
->compression_stream
->zfree
= Z_NULL
;
771 ctx
->compression_stream
->opaque
= Z_NULL
;
772 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
775 // Can't initialize the compression engine for compressing literal data
776 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
777 free(ctx
->compression_stream
);
778 ctx
->compression_stream
= NULL
;
779 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
780 "unable to init Zlib: "
781 "deflateInit returned %d: URL %s",
783 ap_remove_output_filter(f
);
784 return ap_pass_brigade(f
->next
, bb
);
787 // now initialise the crcsync context that will do the real work
788 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
790 // Register a cleanup function to cleanup internal libz and crcsync resources
791 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
792 apr_pool_cleanup_null
);
794 // All checks and initializations are OK
795 // Modify headers that are impacted by this transformation
796 // TODO: the crccache-client could recalculate these headers once it has
797 // reconstructed the page, before handling the reconstructed page
798 // back to the client
799 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
800 apr_table_unset(r
->headers_out
, "Content-Length");
801 apr_table_unset(r
->headers_out
, "Content-MD5");
802 crccache_check_etag(r
, CRCCACHE_ENCODING
);
807 while (!APR_BRIGADE_EMPTY(bb
))
813 e
= APR_BRIGADE_FIRST(bb
);
815 if (APR_BUCKET_IS_EOS(e
))
817 // Process end of stream: flush data buffers, compression buffers, etc.
818 // and calculate a strong hash.
819 rslt
= process_eos(f
);
821 /* Remove EOS from the old list, and insert into the new. */
822 APR_BUCKET_REMOVE(e
);
823 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
825 /* This filter is done once it has served up its content */
826 ap_remove_output_filter(f
);
828 if (rslt
!= APR_SUCCESS
)
830 return rslt
; // A problem occurred. Abort the processing
833 /* Okay, we've seen the EOS.
834 * Time to pass it along down the chain.
836 return ap_pass_brigade(f
->next
, ctx
->bb
);
839 if (APR_BUCKET_IS_FLUSH(e
))
841 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
844 /* Remove flush bucket from old brigade and insert into the new. */
845 APR_BUCKET_REMOVE(e
);
846 // TODO: optimize; do not insert two consecutive flushes when no intermediate
847 // output block was written
848 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
849 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
850 if (rv
!= APR_SUCCESS
) {
856 if (APR_BUCKET_IS_METADATA(e
)) {
857 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
859 * Remove meta data bucket from old brigade and insert into the
862 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
864 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
865 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
867 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
868 "CRCCACHE-ENCODE Metadata, read %zu",len
);
869 APR_BUCKET_REMOVE(e
);
870 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
874 // Bucket is non of the above types. Assume it is a data bucket
875 // which means it can be encoded with the crcsync algorithm
876 rslt
= process_data_bucket(f
, e
);
878 APR_BUCKET_REMOVE(e
);
879 if (rslt
!= APR_SUCCESS
)
881 break; // A problem occurred. Abort the processing
885 apr_brigade_cleanup(bb
);
889 static void disk_cache_register_hook(apr_pool_t
*p
) {
890 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
891 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
893 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
896 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
897 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
900 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
901 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
902 NULL
, /* merge per-directory config structures */
903 create_config
, /* create per-server config structure */
904 NULL
, /* merge per-server config structures */
905 disk_cache_cmds
, /* command apr_table_t */
906 disk_cache_register_hook
/* register hooks */