fixed incorrect bucket buffer copy
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blob4e5f33e85e650d10c2470b3c2d02c77c055250ee
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_file_io.h"
18 #include "apr_strings.h"
19 #include "mod_cache.h"
20 #include "mod_disk_cache.h"
21 #include "ap_provider.h"
22 #include "util_filter.h"
23 #include "util_script.h"
24 #include "util_charset.h"
26 #include "crccache.h"
27 #include "mod_crccache_server.h"
29 #include <crcsync/crcsync.h>
31 const int bufferSize = 1024;
33 module AP_MODULE_DECLARE_DATA crccache_server_module;
35 //#define MIN(X,Y) (X<Y?X:Y)
37 static void *create_config(apr_pool_t *p, server_rec *s) {
38 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
39 conf->disk_cache_conf = apr_pcalloc(p, sizeof(disk_cache_conf));
41 /* XXX: Set default values */
42 conf->enabled = 0;
43 conf->disk_cache_conf->dirlevels = DEFAULT_DIRLEVELS;
44 conf->disk_cache_conf->dirlength = DEFAULT_DIRLENGTH;
45 conf->disk_cache_conf->maxfs = DEFAULT_MAX_FILE_SIZE;
46 conf->disk_cache_conf->minfs = DEFAULT_MIN_FILE_SIZE;
48 conf->disk_cache_conf->cache_root = NULL;
49 conf->disk_cache_conf->cache_root_len = 0;
51 return conf;
54 typedef struct crccache_ctx_t {
55 unsigned char *buffer;
56 size_t buffer_count;
57 apr_bucket_brigade *bb;
58 size_t block_size;
59 unsigned hashes[BLOCK_COUNT];
60 struct crc_context *crcctx;
61 size_t orig_length;
62 size_t tx_length;
63 } crccache_ctx;
66 * mod_disk_cache configuration directives handlers.
68 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
69 const char *arg) {
70 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
71 &crccache_server_module);
72 conf->disk_cache_conf->cache_root = arg;
73 conf->disk_cache_conf->cache_root_len = strlen(arg);
74 /* TODO: canonicalize cache_root and strip off any trailing slashes */
76 return NULL;
80 * Only enable CRCCache Server when requested through the config file
81 * so that the user can switch CRCCache server on in a specific virtual server
83 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
85 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
86 &crccache_server_module);
87 conf->enabled = flag;
88 return NULL;
93 * Consider eliminating the next two directives in favor of
94 * Ian's prime number hash...
95 * key = hash_fn( r->uri)
96 * filename = "/key % prime1 /key %prime2/key %prime3"
98 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
99 const char *arg) {
100 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
101 &crccache_server_module);
102 int val = atoi(arg);
103 if (val < 1)
104 return "CacheDirLevelsServer value must be an integer greater than 0";
105 if (val * conf->disk_cache_conf->dirlength > CACHEFILE_LEN)
106 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
107 conf->disk_cache_conf->dirlevels = val;
108 return NULL;
110 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
111 const char *arg) {
112 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
113 &crccache_server_module);
114 int val = atoi(arg);
115 if (val < 1)
116 return "CacheDirLengthServer value must be an integer greater than 0";
117 if (val * conf->disk_cache_conf->dirlevels > CACHEFILE_LEN)
118 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
120 conf->disk_cache_conf->dirlength = val;
121 return NULL;
124 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
125 const char *arg) {
126 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
127 &crccache_server_module);
129 if (apr_strtoff(&conf->disk_cache_conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->minfs
130 < 0) {
131 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
133 return NULL;
136 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
137 const char *arg) {
138 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
139 &crccache_server_module);
140 if (apr_strtoff(&conf->disk_cache_conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->maxfs
141 < 0) {
142 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
144 return NULL;
147 static const command_rec disk_cache_cmds[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root, NULL, RSRC_CONF,
148 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels, NULL, RSRC_CONF,
149 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength, NULL, RSRC_CONF,
150 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs, NULL, RSRC_CONF,
151 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs, NULL, RSRC_CONF,
152 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF,
153 "Enable the CRCCache server in this virtual server"),{ NULL } };
155 static ap_filter_rec_t *crccache_out_filter_handle;
157 static int crccache_server_header_parser_handler(request_rec *r) {
158 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
159 &crccache_server_module);
160 if (conf->enabled)
162 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,"check if we have a Block-Hashes header here");
163 const char * hashes, *block_size_header;
164 hashes = apr_table_get(r->headers_in, "Block-Hashes");
165 block_size_header = apr_table_get(r->headers_in, "Block-Size");
166 if (hashes && block_size_header)
168 size_t block_size;
169 int ret = sscanf(block_size_header,"%ld",&block_size);
170 if (ret < 0)
172 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, "crccache: failed to convert block size header to int, %s",block_size_header);
173 return OK;
176 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "we have a Block-Hashes header here, we should response in kind: %s",hashes);
177 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "Need to attache a filter here so we can set the content encoding for the return");
178 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
179 r->server, "Adding CRCCACHE_ENCODE filter for %s",
180 r->uri);
181 ap_add_output_filter_handle(crccache_out_filter_handle,
182 NULL, r, r->connection);
187 return OK;
190 /* PR 39727: we're screwing up our clients if we leave a strong ETag
191 * header while transforming content. Henrik Nordstrom suggests
192 * appending ";gzip".
194 * Pending a more thorough review of our Etag handling, let's just
195 * implement his suggestion. It fixes the bug, or at least turns it
196 * from a showstopper to an inefficiency. And it breaks nothing that
197 * wasn't already broken.
199 static void crccache_check_etag(request_rec *r, const char *transform) {
200 const char *etag = apr_table_get(r->headers_out, "ETag");
201 if (etag && (((etag[0] != 'W') && (etag[0] != 'w')) || (etag[1] != '/'))) {
202 apr_table_set(r->headers_out, "ETag", apr_pstrcat(r->pool, etag, "-",
203 transform, NULL));
208 * CACHE_OUT filter
209 * ----------------
211 * Deliver cached content (headers and body) up the stack.
213 static int crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
214 apr_bucket *e;
215 request_rec *r = f->r;
216 crccache_ctx *ctx = f->ctx;
218 /* Do nothing if asked to filter nothing. */
219 if (APR_BRIGADE_EMPTY(bb)) {
220 return ap_pass_brigade(f->next, bb);
223 /* If we don't have a context, we need to ensure that it is okay to send
224 * the deflated content. If we have a context, that means we've done
225 * this before and we liked it.
226 * This could be not so nice if we always fail. But, if we succeed,
227 * we're in better shape.
229 if (!ctx)
231 const char *encoding;
233 /* only work on main request/no subrequests */
234 if (r->main != NULL) {
235 ap_remove_output_filter(f);
236 return ap_pass_brigade(f->next, bb);
239 /* We can't operate on Content-Ranges */
240 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
241 ap_remove_output_filter(f);
242 return ap_pass_brigade(f->next, bb);
245 /* Let's see what our current Content-Encoding is.
246 * If it's already encoded, don't compress again.
247 * (We could, but let's not.)
249 encoding = apr_table_get(r->headers_out, "Content-Encoding");
250 if (encoding && strcasecmp(CRCCACHE_ENCODING,encoding) == 0)
252 /* Even if we don't accept this request based on it not having
253 * the Accept-Encoding, we need to note that we were looking
254 * for this header and downstream proxies should be aware of that.
256 apr_table_mergen(r->headers_out, "Vary", "Accept-Encoding");
257 ap_remove_output_filter(f);
258 return ap_pass_brigade(f->next, bb);
261 /* For a 304 or 204 response there is no entity included in
262 * the response and hence nothing to deflate. */
263 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
265 ap_remove_output_filter(f);
266 return ap_pass_brigade(f->next, bb);
269 /* We're cool with filtering this. */
270 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
271 ctx->orig_length = 0;
272 ctx->tx_length = 0;
273 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
275 /* If the entire Content-Encoding is "identity", we can replace it. */
276 if (!encoding || !strcasecmp(encoding, "identity")) {
277 apr_table_setn(r->headers_out, "Content-Encoding", CRCCACHE_ENCODING);
279 else {
280 apr_table_mergen(r->headers_out, "Content-Encoding", CRCCACHE_ENCODING);
282 apr_table_unset(r->headers_out, "Content-Length");
283 apr_table_unset(r->headers_out, "Content-MD5");
284 crccache_check_etag(r, CRCCACHE_ENCODING);
286 const char * hashes, *block_size_header;
287 hashes = apr_table_get(r->headers_in, "Block-Hashes");
288 block_size_header = apr_table_get(r->headers_in, "Block-Size");
290 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
291 "crccache encoding block size %s", block_size_header);
293 errno=0;
294 ctx->block_size = strtoull(block_size_header,NULL,0);
295 if (errno || ctx->block_size <= 0)
297 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to convert block size header to int, %s",block_size_header);
298 ap_remove_output_filter(f);
299 return ap_pass_brigade(f->next, bb);
302 // allocate a buffer of twice our block size so we can store non matching parts of data as it comes in
303 ctx->buffer_count = 0;
304 ctx->buffer = apr_palloc(r->pool, ctx->block_size*2);
306 int ii;
307 for (ii = 0; ii < BLOCK_COUNT; ++ii)
309 ctx->hashes[ii] = decode_30bithash(&hashes[ii*HASH_BASE64_SIZE_TX]);
310 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
311 "cache: decoded hash[%d] %08X",ii,ctx->hashes[ii]);
314 // now initialise the crcsync context that will do the real work
315 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, BLOCK_COUNT);
322 while (!APR_BRIGADE_EMPTY(bb))
324 const char *data;
325 apr_size_t len;
327 e = APR_BRIGADE_FIRST(bb);
329 if (APR_BUCKET_IS_EOS(e))
331 // send one last literal if we still have unmatched data
332 if (ctx->buffer_count > 0)
334 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
335 "CRCCACHE-ENCODE final literal %ld bytes",ctx->buffer_count);
336 unsigned bucket_size = ctx->buffer_count + ENCODING_LITERAL_HEADER_SIZE;
337 ctx->tx_length += bucket_size;
338 char * buf = apr_palloc(r->pool, bucket_size);
340 buf[0] = ENCODING_LITERAL;
341 *(unsigned *)&buf[1] = htonl(ctx->buffer_count);
342 memcpy(&buf[5], ctx->buffer,ctx->buffer_count);
344 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
345 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
349 // TODO: add strong hash here
352 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
353 "CRCCACHE-ENCODE complete TX length=%ld original length=%ld",ctx->tx_length, ctx->orig_length);
356 /* Remove EOS from the old list, and insert into the new. */
357 APR_BUCKET_REMOVE(e);
358 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
360 /* This filter is done once it has served up its content */
361 ap_remove_output_filter(f);
363 /* Okay, we've seen the EOS.
364 * Time to pass it along down the chain.
366 return ap_pass_brigade(f->next, ctx->bb);
369 if (APR_BUCKET_IS_FLUSH(e))
371 apr_status_t rv;
373 /* Remove flush bucket from old brigade and insert into the new. */
374 APR_BUCKET_REMOVE(e);
375 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
376 rv = ap_pass_brigade(f->next, ctx->bb);
377 if (rv != APR_SUCCESS) {
378 return rv;
380 continue;
383 if (APR_BUCKET_IS_METADATA(e)) {
385 * Remove meta data bucket from old brigade and insert into the
386 * new.
388 // TODO: do we need to encode metadata
389 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
390 if (len > 2)
391 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
392 "CRCCACHE-ENCODE: Metadata, read %ld, %d %d %d",len,data[0],data[1],data[2]);
393 else
394 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
395 "CRCCACHE-ENCODE: Metadata, read %ld",len);
396 APR_BUCKET_REMOVE(e);
397 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
398 continue;
401 /* read */
402 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
403 ctx->orig_length += len;
405 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
406 "cache: running CRCCACHE_OUT filter, read %ld bytes",len);
408 // TODO: make this a little more efficient so we need to copy less data around
409 size_t bucket_used_count = 0;
410 size_t data_left;
411 while(bucket_used_count < len)
413 const char * source_array = data;
414 size_t source_offset = bucket_used_count;
415 data_left = len - bucket_used_count;
416 size_t source_length = data_left;
417 // if we have some data in our buffer, we need to full up the buffer until we have enough to match a block
418 if (ctx->buffer_count > 0 || data_left < ctx->block_size)
420 size_t copy_size = MIN(ctx->block_size*2-ctx->buffer_count,data_left);
421 memcpy(&ctx->buffer[ctx->buffer_count],&data[bucket_used_count],copy_size);
422 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
423 "crccache: CRCSYNC, copying data on to buffer");
424 ctx->buffer_count += copy_size;
425 bucket_used_count += copy_size;
426 data_left = len - bucket_used_count;
427 source_array = (char *)ctx->buffer;
428 source_offset = 0;
429 source_length = ctx->buffer_count;
430 // not enough to match a block so stop here
431 if (ctx->buffer_count < ctx->block_size)
432 break;
435 long result;
436 size_t count = crc_read_block(ctx->crcctx, &result,
437 &source_array[source_offset], source_length);;
439 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
440 "crccache: CRCSYNC, processed %ld, used %ld bytes, result was %ld",source_length,count,result);
442 // do different things if we match a literal or block
443 if (result > 0)
445 // didnt match a block, send a literal
447 // if we matched all our data as a literal
448 // update our used byte count, we can only be sure that 1+count-blocksize bytes are not in a block
449 // as the tail end of the buffer could match when more data is added to it.
450 if (count == source_length)
452 if (count > (ctx->block_size -1))
453 count -=(ctx->block_size -1);
454 else
455 count = 0;
458 if (count > 0)
460 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
461 "CRCCACHE-ENCODE literal %ld bytes",count);
462 unsigned bucket_size = count + ENCODING_LITERAL_HEADER_SIZE;
463 ctx->tx_length += bucket_size;
464 char * buf = apr_palloc(r->pool, bucket_size);
466 buf[0] = ENCODING_LITERAL;
467 *(unsigned *)&buf[1] = htonl(count);
468 memcpy(&buf[5],&source_array[source_offset],count);
470 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
471 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
474 else if (result < 0)
476 // matched send a block
477 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
478 ctx->tx_length += bucket_size;
479 char * buf = apr_palloc(r->pool, bucket_size);
481 // we used a block of data
482 count = ctx->block_size;
484 buf[0] = ENCODING_BLOCK;
485 buf[1] = (unsigned char) (result * -1 - 1); // invert and get back to zero based
486 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
487 "CRCCACHE-ENCODE block %d",buf[1]);
488 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
489 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
491 else
493 // something odd happened here
494 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
495 "crccache: CRCSYNC, no data, processed %ld bytes, result was %ld",count,result);
498 if (ctx->buffer_count > 0)
500 // if we have used up all of our buffer, stop using it and use the bucket directly
501 if (ctx->buffer_count - count < bucket_used_count)
503 size_t extra_data = ctx->buffer_count - bucket_used_count;
504 bucket_used_count = count - extra_data;
505 ctx->buffer_count = 0;
507 else
509 // otherwise memmove the unused data to the start of the buffer
510 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
511 "crccache: CRCSYNC, memmoving buffer");
512 memmove(ctx->buffer,&ctx->buffer[count],ctx->buffer_count - count);
513 ctx->buffer_count -= count;
514 bucket_used_count += count;
517 else
519 bucket_used_count += count;
523 APR_BUCKET_REMOVE(e);
527 apr_brigade_cleanup(bb);
528 return APR_SUCCESS;
531 static void disk_cache_register_hook(apr_pool_t *p) {
532 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
533 APR_HOOK_MIDDLE);
536 * CACHE_OUT must go into the filter chain after a possible DEFLATE
537 * filter to ensure that already compressed cache objects do not
538 * get compressed again. Incrementing filter type by 1 ensures
539 * his happens.
541 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
542 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET + 1);
545 module AP_MODULE_DECLARE_DATA crccache_server_module = {
546 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
547 NULL , /* merge per-directory config structures */
548 create_config, /* create per-server config structure */
549 NULL , /* merge per-server config structures */
550 disk_cache_cmds, /* command apr_table_t */
551 disk_cache_register_hook /* register hooks */