1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
30 #include "apr_file_io.h"
31 #include "apr_strings.h"
32 #include <apr_base64.h>
33 #include "mod_cache.h"
34 #include "mod_disk_cache.h"
35 #include "ap_provider.h"
36 #include "util_filter.h"
37 #include "util_script.h"
38 #include "util_charset.h"
41 #include "ap_wrapper.h"
42 #include "mod_crccache_server.h"
44 #include <crcsync/crcsync.h>
47 module AP_MODULE_DECLARE_DATA crccache_server_module
;
49 // Possible states for the output compression
51 COMPRESSION_BUFFER_EMPTY
,
52 COMPRESSION_FIRST_DATA_RECEIVED
,
53 COMPRESSION_FIRST_BLOCK_WRITTEN
,
55 } compression_state_t
;
57 //#define MIN(X,Y) (X<Y?X:Y)
59 static void *create_config(apr_pool_t
*p
, server_rec
*s
) {
60 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
61 conf
->disk_cache_conf
= apr_pcalloc(p
, sizeof(disk_cache_conf
));
63 /* XXX: Set default values */
65 conf
->disk_cache_conf
->dirlevels
= DEFAULT_DIRLEVELS
;
66 conf
->disk_cache_conf
->dirlength
= DEFAULT_DIRLENGTH
;
67 conf
->disk_cache_conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
68 conf
->disk_cache_conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
70 conf
->disk_cache_conf
->cache_root
= NULL
;
71 conf
->disk_cache_conf
->cache_root_len
= 0;
76 typedef struct crccache_ctx_t
{
77 unsigned char *buffer
;
78 size_t buffer_digest_getpos
;
79 size_t buffer_read_getpos
;
82 long crc_read_block_result
;
83 size_t crc_read_block_ndigested
;
84 apr_bucket_brigade
*bb
;
86 size_t tail_block_size
;
87 uint64_t hashes
[FULL_BLOCK_COUNT
+1];
88 struct crc_context
*crcctx
;
91 size_t tx_uncompressed_length
;
92 compression_state_t compression_state
;
93 z_stream
*compression_stream
;
94 int debug_skip_writing
; // ____
99 * mod_disk_cache configuration directives handlers.
101 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
103 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
104 &crccache_server_module
);
105 conf
->disk_cache_conf
->cache_root
= arg
;
106 conf
->disk_cache_conf
->cache_root_len
= strlen(arg
);
107 /* TODO: canonicalize cache_root and strip off any trailing slashes */
113 * Only enable CRCCache Server when requested through the config file
114 * so that the user can switch CRCCache server on in a specific virtual server
116 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
118 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
119 &crccache_server_module
);
120 conf
->enabled
= flag
;
126 * Consider eliminating the next two directives in favor of
127 * Ian's prime number hash...
128 * key = hash_fn( r->uri)
129 * filename = "/key % prime1 /key %prime2/key %prime3"
131 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
133 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
134 &crccache_server_module
);
137 return "CacheDirLevelsServer value must be an integer greater than 0";
138 if (val
* conf
->disk_cache_conf
->dirlength
> CACHEFILE_LEN
)
139 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
140 conf
->disk_cache_conf
->dirlevels
= val
;
143 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
145 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
146 &crccache_server_module
);
149 return "CacheDirLengthServer value must be an integer greater than 0";
150 if (val
* conf
->disk_cache_conf
->dirlevels
> CACHEFILE_LEN
)
151 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
153 conf
->disk_cache_conf
->dirlength
= val
;
157 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
159 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
160 &crccache_server_module
);
162 if (apr_strtoff(&conf
->disk_cache_conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->minfs
164 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
169 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
171 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
172 &crccache_server_module
);
173 if (apr_strtoff(&conf
->disk_cache_conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->maxfs
175 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
180 static const command_rec disk_cache_cmds
[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root
, NULL
, RSRC_CONF
,
181 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels
, NULL
, RSRC_CONF
,
182 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength
, NULL
, RSRC_CONF
,
183 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs
, NULL
, RSRC_CONF
,
184 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs
, NULL
, RSRC_CONF
,
185 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
,
186 "Enable the CRCCache server in this virtual server"),{ NULL
} };
188 static ap_filter_rec_t
*crccache_out_filter_handle
;
190 int decode_if_block_header(const char * header
, int * version
, size_t * file_size
, char ** hashes
)
194 *hashes
= NULL
; // this will be allocated below, make sure we free it
197 for (ii
= 0; ii
< strlen(header
);++ii
)
199 if (header
[ii
] == ',' || ii
== strlen(header
)-1)
201 sscanf(&header
[start
]," v=%d",version
);
202 sscanf(&header
[start
]," h=%as",hashes
);
203 sscanf(&header
[start
]," fs=%zu",file_size
);
210 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no hashes reported in header");
215 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE Unsupported header version, %d",*version
);
222 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no file size reported in header");
230 static int crccache_server_header_parser_handler(request_rec
*r
) {
231 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
232 &crccache_server_module
);
233 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE handler");
236 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Checking for headers");
238 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
244 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
246 // failed to decode if block header so just process request normally
251 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes
);
252 // Insert mod_deflate's INFLATE filter in the chain to unzip content
253 // so that there is clear text available for the delta algorithm
254 ap_filter_t
*inflate_filter
= ap_add_output_filter("INFLATE", NULL
, r
, r
->connection
);
255 if (inflate_filter
== NULL
)
257 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
261 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
263 // And the crccache filter itself ofcourse
264 ap_add_output_filter_handle(crccache_out_filter_handle
,
265 NULL
, r
, r
->connection
);
268 /* // All is okay, so set response header to IM Used
269 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
271 r->status_line="226 IM Used";
277 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
279 request_rec *r = f->r;
281 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
283 // All is okay, so set response header to IM Used
285 r->status_line="HTTP/1.1 226 IM Used";
289 /* PR 39727: we're screwing up our clients if we leave a strong ETag
290 * header while transforming content. Henrik Nordstrom suggests
293 * Pending a more thorough review of our Etag handling, let's just
294 * implement his suggestion. It fixes the bug, or at least turns it
295 * from a showstopper to an inefficiency. And it breaks nothing that
296 * wasn't already broken.
297 * TODO: the crccache_client should undo this once the reconstructed page has been saved in the cache
299 static void crccache_check_etag(request_rec
*r
, const char *transform
) {
300 const char *etag
= apr_table_get(r
->headers_out
, "ETag");
301 if (etag
&& (((etag
[0] != 'W') && (etag
[0] != 'w')) || (etag
[1] != '/'))) {
302 apr_table_set(r
->headers_out
, "ETag", apr_pstrcat(r
->pool
, etag
, "-",
307 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
309 unsigned char compress_buf
[30000];
310 request_rec
*r
= f
->r
;
311 crccache_ctx
*ctx
= f
->ctx
;
312 z_stream
*strm
= ctx
->compression_stream
;
314 if (ctx
->debug_skip_writing
)
319 strm
->avail_out
= sizeof(compress_buf
);
320 strm
->next_out
= compress_buf
;
321 uInt avail_in_pre_deflate
= strm
->avail_in
;
322 int zRC
= deflate(strm
, flush
);
323 if (zRC
== Z_STREAM_ERROR
)
325 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
328 int have
= sizeof(compress_buf
) - strm
->avail_out
;
329 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
330 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
331 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
334 // output buffer contains some data to be written
335 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
336 unsigned bucket_size
= have
;
337 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
339 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
341 ctx
->tx_length
+= bucket_size
;
342 char * buf
= apr_palloc(r
->pool
, bucket_size
);
344 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
346 buf
[0] = ENCODING_COMPRESSED
;
347 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
348 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
352 memcpy(buf
, compress_buf
, have
);
354 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
355 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
358 while (strm
->avail_out
== 0);
359 if (strm
->avail_in
!= 0)
361 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
369 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
371 crccache_ctx
*ctx
= f
->ctx
;
372 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
374 if (ctx
->debug_skip_writing
)
377 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
379 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
380 deflateReset(ctx
->compression_stream
);
381 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
382 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
390 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
392 crccache_ctx
*ctx
= f
->ctx
;
394 if (ctx
->debug_skip_writing
)
398 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
400 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
402 ctx
->compression_stream
->avail_in
= count
;
403 ctx
->compression_stream
->next_in
= buffer
;
404 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
405 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
406 ctx
->tx_uncompressed_length
+= count
;
411 * Write a block reference
413 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
415 request_rec
*r
= f
->r
;
416 crccache_ctx
*ctx
= f
->ctx
;
419 rslt
= flush_compress_buffer(f
);
420 if (rslt
!= APR_SUCCESS
)
425 if (ctx
->debug_skip_writing
)
428 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
429 ctx
->tx_length
+= bucket_size
;
430 ctx
->tx_uncompressed_length
+= bucket_size
;
431 char * buf
= apr_palloc(r
->pool
, bucket_size
);
433 buf
[0] = ENCODING_BLOCK
;
434 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
435 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
436 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
437 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
442 * Process one block of data: try to match it against the CRC, append
443 * the result to the ouput ring and remember the result (e.g. was
444 * it a block-match or was a literal processed)
446 static apr_status_t
process_block(ap_filter_t
*f
)
448 request_rec
*r
= f
->r
;
449 crccache_ctx
*ctx
= f
->ctx
;
450 apr_status_t rslt
= APR_SUCCESS
;
452 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
453 if (ctx
->crcctx
== NULL
)
455 // This should never happen
456 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
461 size_t ndigested
= crc_read_block(
464 ctx
->buffer
+ctx
->buffer_digest_getpos
,
465 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
467 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
468 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
471 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
472 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
473 // rd_block_rslt > 0: send literal
474 // rd_block_rslt < 0: send block
475 if (rd_block_rslt
> 0)
477 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
478 ctx
->buffer_read_getpos
+= rd_block_rslt
;
480 else if (rd_block_rslt
< 0)
482 rslt
= write_block_reference(f
, rd_block_rslt
);
483 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
484 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
487 // Update the context with the results
488 ctx
->crc_read_block_result
= rd_block_rslt
;
489 ctx
->crc_read_block_ndigested
= ndigested
;
490 ctx
->buffer_digest_getpos
+= ndigested
;
495 * Flush one block of data: get it from the crccontext, append
496 * the result to the ouput ring and remember the result (e.g. was
497 * it a block-match or was a literal processed)
499 static apr_status_t
flush_block(ap_filter_t
*f
)
501 request_rec
*r
= f
->r
;
502 crccache_ctx
*ctx
= f
->ctx
;
503 apr_status_t rslt
= APR_SUCCESS
;
505 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
506 if (ctx
->crcctx
== NULL
)
508 // This should never happen
509 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
512 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
513 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
515 // rd_flush_rslt = 0: do nothing
516 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
517 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
518 if (rd_flush_rslt
> 0)
520 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
521 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
523 else if (rd_flush_rslt
< 0)
525 rslt
= write_block_reference(f
, rd_flush_rslt
);
526 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
527 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
530 // Update the context with the results
531 ctx
->crc_read_block_result
= rd_flush_rslt
;
532 ctx
->crc_read_block_ndigested
= 0;
537 * Clean-up memory used by helper libraries, that don't know about apr_palloc
538 * and that (probably) use classical malloc/free
540 static apr_status_t
deflate_ctx_cleanup(void *data
)
542 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
546 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
548 deflateEnd(ctx
->compression_stream
);
549 ctx
->compression_state
= COMPRESSION_ENDED
;
551 if (ctx
->crcctx
!= NULL
)
553 crc_context_free(ctx
->crcctx
);
560 * End of stream has been reached:
561 * Process any data still in the buffer and flush all internal
562 * structures of crcsync and of zlib
563 * Furthermore, add a strong hash
565 static apr_status_t
process_eos(ap_filter_t
*f
)
567 crccache_ctx
*ctx
= f
->ctx
;
570 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
573 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
575 // There is still data in the buffer. Process it.
576 rslt
= process_block(f
);
577 if (rslt
!= APR_SUCCESS
)
585 // Flush remaining block in the crcctx
586 rslt
= flush_block(f
);
587 if (rslt
!= APR_SUCCESS
)
592 while (ctx
->crc_read_block_result
!= 0);
594 // Flush anything that is remaining in the compress buffer
595 rslt
= flush_compress_buffer(f
);
596 if (rslt
!= APR_SUCCESS
)
601 // TODO: add strong hash here
603 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
604 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
610 * Process a data bucket; append data into a moving window buffer
611 * and encode it with crcsync algorithm when window contains enough
612 * data for crcsync to find potential matches
614 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
616 request_rec
*r
= f
->r
;
617 crccache_ctx
*ctx
= f
->ctx
;
624 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
625 ctx
->orig_length
+= len
;
626 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
628 // append data to the buffer and encode buffer content using the crc_read_block magic
629 size_t bucket_used_count
= 0;
630 size_t bucket_data_left
;
631 while(bucket_used_count
< len
)
633 /* Append as much data as possible into the buffer */
634 bucket_data_left
= len
- bucket_used_count
;
635 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
636 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
637 bucket_used_count
+= copy_size
;
638 bucket_data_left
-= copy_size
;
639 ctx
->buffer_putpos
+= copy_size
;
640 /* flush the buffer if it is appropriate */
641 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
643 // Buffer is filled to the end. Flush as much as possible
644 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
645 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
646 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
647 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
649 // We can still scan at least 1 block + 1 byte forward: try to flush next part
650 rslt
= process_block(f
);
651 if (rslt
!= APR_SUCCESS
)
655 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
656 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
657 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
660 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
662 // Copy the remaining part of the buffer to the start of the buffer,
663 // so that it can be filled again as new data arrive
664 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
665 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
666 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
667 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
669 // Reset getpos to the beginning of the buffer and putpos accordingly
670 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
671 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
672 ctx
->buffer_read_getpos
= 0;
674 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
676 // Previous block matched exactly. Let's hope the next block as well
677 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
678 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
679 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
680 rslt
= process_block(f
);
681 if (rslt
!= APR_SUCCESS
)
687 return APR_SUCCESS
; // Yahoo, all went well
694 * Deliver cached content (headers and body) up the stack.
696 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
698 request_rec
*r
= f
->r
;
699 crccache_ctx
*ctx
= f
->ctx
;
701 int return_code
= APR_SUCCESS
;
703 /* Do nothing if asked to filter nothing. */
704 if (APR_BRIGADE_EMPTY(bb
)) {
705 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
706 return ap_pass_brigade(f
->next
, bb
);
709 /* If we don't have a context, we need to ensure that it is okay to send
710 * the deflated content. If we have a context, that means we've done
711 * this before and we liked it.
712 * This could be not so nice if we always fail. But, if we succeed,
713 * we're in better shape.
717 const char *encoding
;
719 /* only work on main request/no subrequests */
720 if (r
->main
!= NULL
) {
721 ap_remove_output_filter(f
);
722 return ap_pass_brigade(f
->next
, bb
);
725 /* We can't operate on Content-Ranges */
726 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
727 ap_remove_output_filter(f
);
728 return ap_pass_brigade(f
->next
, bb
);
731 /* Let's see what our current Content-Encoding is.
732 * If it's already encoded by crccache: don't compress again.
733 * (We could, but let's not.)
735 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
736 if (encoding
&& strcasecmp(CRCCACHE_ENCODING
,encoding
) == 0)
738 /* Even if we don't accept this request based on it not having
739 * the Accept-Encoding, we need to note that we were looking
740 * for this header and downstream proxies should be aware of that.
742 apr_table_mergen(r
->headers_out
, "Vary", "A-IM");
743 ap_remove_output_filter(f
);
744 return ap_pass_brigade(f
->next
, bb
);
747 /* For a 304 or 204 response there is no entity included in
748 * the response and hence nothing to deflate. */
749 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
751 ap_remove_output_filter(f
);
752 return ap_pass_brigade(f
->next
, bb
);
755 /* All Ok. We're cool with filtering this. */
756 ctx
= f
->ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
757 ctx
->debug_skip_writing
= 0;
758 ctx
->orig_length
= 0;
760 ctx
->tx_uncompressed_length
= 0;
761 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
763 /* If Content-Encoding present and differs from "identity", we can't handle it */
764 if (encoding
&& strcasecmp(encoding
, "identity")) {
765 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
766 "Not encoding with crccache. It is already encoded with: %s", encoding
);
767 ap_remove_output_filter(f
);
768 return ap_pass_brigade(f
->next
, bb
);
771 /* Parse the input headers */
773 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
777 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
779 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to decode if-block header");
780 ap_remove_output_filter(f
);
781 return ap_pass_brigade(f
->next
, bb
);
784 ctx
->block_size
= file_size
/FULL_BLOCK_COUNT
;
785 ctx
->tail_block_size
= file_size
% FULL_BLOCK_COUNT
;
786 size_t block_count_including_final_block
= FULL_BLOCK_COUNT
+ (ctx
->tail_block_size
!= 0);
788 // Data come in at chunks that are potentially smaller then block_size
789 // Accumulate those chunks into a buffer.
790 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
791 // of the data alignment compared to the original page.
792 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
793 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
794 // Though, the larger the buffer, the bigger the memory demand.
795 // A size of 4*block_size (20% of original file size) seems to be a good balance
797 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
798 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
799 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
800 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
801 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
803 ctx
->buffer_size
= ctx
->block_size
*4 + 1;
804 ctx
->buffer_digest_getpos
= 0;
805 ctx
->buffer_read_getpos
= 0;
806 ctx
->buffer_putpos
= 0;
807 ctx
->crc_read_block_result
= 0;
808 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
811 apr_base64_decode((char *)ctx
->hashes
, hashes
);
814 // swap to network byte order
816 for (i
= 0; i
< block_count_including_final_block
;++i
)
818 htobe64(ctx
->hashes
[i
]);
821 /* Setup deflate for compressing non-matched literal data */
822 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
823 // TODO: should I pass some apr_palloc based function to prevent memory leaks
824 //in case of unexpected errors?
826 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
827 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
828 ctx
->compression_stream
->zalloc
= Z_NULL
;
829 ctx
->compression_stream
->zfree
= Z_NULL
;
830 ctx
->compression_stream
->opaque
= Z_NULL
;
831 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
834 // Can't initialize the compression engine for compressing literal data
835 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
836 free(ctx
->compression_stream
);
837 ctx
->compression_stream
= NULL
;
838 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
839 "unable to init Zlib: "
840 "deflateInit returned %d: URL %s",
842 ap_remove_output_filter(f
);
843 return ap_pass_brigade(f
->next
, bb
);
846 // now initialise the crcsync context that will do the real work
847 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
849 // Register a cleanup function to cleanup internal libz and crcsync resources
850 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
851 apr_pool_cleanup_null
);
853 // All checks and initializations are OK
854 // Modify headers that are impacted by this transformation
855 // TODO: the crccache-client could recalculate these headers once it has
856 // reconstructed the page, before handling the reconstructed page
857 // back to the client
858 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
859 apr_table_setn(r
->headers_out
, VARY_HEADER
, VARY_VALUE
);
860 apr_table_unset(r
->headers_out
, "Content-Length");
861 apr_table_unset(r
->headers_out
, "Content-MD5");
862 crccache_check_etag(r
, CRCCACHE_ENCODING
);
864 // All is okay, so set response header to IM Used
865 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Setting 226 header");
867 r
->status_line
="226 IM Used";
872 while (!APR_BRIGADE_EMPTY(bb
))
878 e
= APR_BRIGADE_FIRST(bb
);
880 if (APR_BUCKET_IS_EOS(e
))
882 // Process end of stream: flush data buffers, compression buffers, etc.
883 // and calculate a strong hash.
884 rslt
= process_eos(f
);
886 /* Remove EOS from the old list, and insert into the new. */
887 APR_BUCKET_REMOVE(e
);
888 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
890 /* This filter is done once it has served up its content */
891 ap_remove_output_filter(f
);
893 if (rslt
!= APR_SUCCESS
)
895 return rslt
; // A problem occurred. Abort the processing
898 /* Okay, we've seen the EOS.
899 * Time to pass it along down the chain.
901 return ap_pass_brigade(f
->next
, ctx
->bb
);
904 if (APR_BUCKET_IS_FLUSH(e
))
906 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
909 /* Remove flush bucket from old brigade and insert into the new. */
910 APR_BUCKET_REMOVE(e
);
911 // TODO: optimize; do not insert two consecutive flushes when no intermediate
912 // output block was written
913 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
914 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
915 if (rv
!= APR_SUCCESS
) {
921 if (APR_BUCKET_IS_METADATA(e
)) {
922 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
924 * Remove meta data bucket from old brigade and insert into the
927 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
929 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
930 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
932 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
933 "CRCCACHE-ENCODE Metadata, read %zu",len
);
934 APR_BUCKET_REMOVE(e
);
935 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
939 // Bucket is non of the above types. Assume it is a data bucket
940 // which means it can be encoded with the crcsync algorithm
941 rslt
= process_data_bucket(f
, e
);
943 APR_BUCKET_REMOVE(e
);
944 if (rslt
!= APR_SUCCESS
)
946 break; // A problem occurred. Abort the processing
950 apr_brigade_cleanup(bb
);
954 static void disk_cache_register_hook(apr_pool_t
*p
) {
955 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
956 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
958 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
961 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
962 NULL, AP_FTYPE_PROTOCOL);
964 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
965 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
968 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
969 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
970 NULL
, /* merge per-directory config structures */
971 create_config
, /* create per-server config structure */
972 NULL
, /* merge per-server config structures */
973 disk_cache_cmds
, /* command apr_table_t */
974 disk_cache_register_hook
/* register hooks */