From a8130cd95a08517b8d8bbc00ca07f814dda6729b Mon Sep 17 00:00:00 2001 From: Alex Wulms Date: Thu, 5 Aug 2010 10:23:04 +0200 Subject: [PATCH] Implement 'find similar page'. Fix some small bug --- ccan/crcsync/crcsync.c | 14 +- ccan/crcsync/crcsync.h | 10 +- crccache/.cproject | 136 +-- crccache/.project | 27 +- crccache/Makefile | 24 +- crccache/ap_log_helper.c | 26 + crccache/{ap_wrapper.h => ap_log_helper.h} | 6 +- crccache/ap_wrapper.c | 38 - crccache/benchmark.c | 2 +- crccache/cache/cache.c | 33 +- crccache/cache/cache.h | 21 +- crccache/cache/cache_util.c | 2 +- crccache/crccache.h | 102 +- crccache/mod_crccache_client.c | 459 +++++--- crccache/mod_crccache_client.h | 48 +- crccache/mod_crccache_client_find_similar.c | 1540 +++++++++++++++++++++++++++ crccache/mod_crccache_client_find_similar.h | 34 + crccache/mod_crccache_server.c | 206 +++- crccache/mod_crccache_server.h | 60 +- crccache/rmm_hash.c | 531 +++++++++ crccache/rmm_hash.h | 169 +++ crccache/test_hash_small_invokations.c | 4 +- 22 files changed, 3026 insertions(+), 466 deletions(-) create mode 100644 crccache/ap_log_helper.c rename crccache/{ap_wrapper.h => ap_log_helper.h} (61%) delete mode 100644 crccache/ap_wrapper.c rewrite crccache/crccache.h (60%) create mode 100644 crccache/mod_crccache_client_find_similar.c create mode 100644 crccache/mod_crccache_client_find_similar.h rewrite crccache/mod_crccache_server.h (65%) create mode 100644 crccache/rmm_hash.c create mode 100644 crccache/rmm_hash.h diff --git a/ccan/crcsync/crcsync.c b/ccan/crcsync/crcsync.c index 61bf38e97..f24b9809f 100644 --- a/ccan/crcsync/crcsync.c +++ b/ccan/crcsync/crcsync.c @@ -12,22 +12,22 @@ static uint64_t mask_of(unsigned int crcbits) } void crc_of_blocks(const void *data, size_t len, unsigned int normal_block_size, - unsigned int crcbits, bool merge_trailing_bytes_in_last_block, uint64_t crc[]) + unsigned int tail_block_size, + unsigned int crcbits, uint64_t crc[]) { - unsigned int nblocks = len/normal_block_size; + unsigned int n_normalblocks = (len-tail_block_size)/normal_block_size; unsigned int i; const uint8_t *buf = data; uint64_t crcmask = mask_of(crcbits); - if (len%normal_block_size && !merge_trailing_bytes_in_last_block) - nblocks++; - - for (i = 0; i < nblocks - 1; i++) { + for (i = 0; i < n_normalblocks; i++) { crc[i] = (crc64_iso(0, buf, normal_block_size) & crcmask); buf += normal_block_size; len -= normal_block_size; } - crc[i] = (crc64_iso(0, buf, len) & crcmask); + if (tail_block_size != 0) { + crc[i] = (crc64_iso(0, buf, len) & crcmask); + } } struct crc_hash_record { diff --git a/ccan/crcsync/crcsync.h b/ccan/crcsync/crcsync.h index 38f90b408..e1568c6af 100644 --- a/ccan/crcsync/crcsync.h +++ b/ccan/crcsync/crcsync.h @@ -8,15 +8,17 @@ * crc_of_blocks - calculate the crc of the blocks. * @data: pointer to the buffer to CRC * @len: length of the buffer - * @blocksize: CRC of each block (final block may be shorter) + * @normal_block_size: Size of non-tail blocks + * @tail_block_size: Size of tail block (might be 0) * @crcbits: the number of bits of crc you want (currently 64 maximum) - * @crc: the crcs (array will have (len + blocksize-1)/blocksize entries). + * @crc: the crcs (array will have (len - tail_block_size)/blocksize + (tail_block_size != 0) entries). * * Calculates the CRC of each block, and output the lower @crcbits to * @crc array. */ -void crc_of_blocks(const void *data, size_t len, unsigned int blocksize, - unsigned int crcbits, bool merge_trailing_bytes_in_last_block, uint64_t crc[]); +void crc_of_blocks(const void *data, size_t len, unsigned int normal_block_size, + unsigned int tail_block_size, + unsigned int crcbits, uint64_t crc[]); /** * crc_context_new - allocate and initialize state for crc_find_block diff --git a/crccache/.cproject b/crccache/.cproject index d03898f9e..f50492eee 100644 --- a/crccache/.cproject +++ b/crccache/.cproject @@ -19,14 +19,14 @@ - + - + @@ -35,14 +35,18 @@ @@ -57,6 +61,70 @@ + + + + + + + +make + +all +true +true + + +make + +clean +true +true + + +make + +test +true +true + + +make + +restart +true +true + + +make + +run-test +true +true + + +make + +run-t_sh +true +true + + +make + +run-test_bad_mismatch +true +true + + +make + +run-benchmark +true +true + + + @@ -226,60 +294,6 @@ - - - - -make - -all -true -true - - -make - -clean -true -true - - -make - -test -true -true - - -make - -restart -true -true - - -make - -run-test -true -true - - -make - -run-t_sh -true -true - - -make - -run-test_bad_mismatch -true -true - - - diff --git a/crccache/.project b/crccache/.project index 0b633a36c..4ed6f3e51 100644 --- a/crccache/.project +++ b/crccache/.project @@ -18,14 +18,14 @@ true - ?name? - - - org.eclipse.cdt.make.core.append_environment true + ?name? + + + org.eclipse.cdt.make.core.stopOnError true @@ -46,22 +46,22 @@ true - org.eclipse.cdt.make.core.enableAutoBuild - false - - org.eclipse.cdt.make.core.enableFullBuild true - org.eclipse.cdt.make.core.buildArguments - + org.eclipse.cdt.make.core.enableAutoBuild + false org.eclipse.cdt.make.core.fullBuildTarget all + org.eclipse.cdt.make.core.buildArguments + + + org.eclipse.cdt.make.core.autoBuildTarget all @@ -79,4 +79,11 @@ org.eclipse.cdt.core.ccnature org.eclipse.cdt.managedbuilder.core.managedBuildNature + + + apache-2.2.16 + 2 + /home/awulms/Ontwikkel/olpc/httpd-2.2.16 + + diff --git a/crccache/Makefile b/crccache/Makefile index 3f4e2ec54..3c4bf3b3f 100644 --- a/crccache/Makefile +++ b/crccache/Makefile @@ -10,26 +10,24 @@ CFLAGS=`apr-config --cflags --includes` $(EXTRA_FLAGS) CXXFLAGS=`apr-config --cppflags --includes` $(EXTRA_FLAGS) LDFLAGS=`apr-1-config --link-ld --libs` -laprutil-1 -lz -all: mod_crccache_client.so mod_crccache_server.so benchmark test_hash_small_invokations +all: mod_crccache_client.so mod_crccache_server.so benchmark test_hash_small_invokations done -CACHE_SRC=cache/cache.o cache/cache_cache.o \ +CCAN_CRC=$(CCAN_PATH)/crc/crc.o $(CCAN_PATH)/crcsync/crcsync.o + +COMMON=ap_log_helper.o $(CCAN_CRC) + +CACHE=cache/cache.o cache/cache_cache.o \ cache/cache_hash.o cache/cache_pqueue.o \ cache/cache_util.o cache/cache_storage.o -#cache/mod_cache.o -#cache/mod_disk_cache.o -#cache/mod_file_cache.o -#cache/mod_socache_dbm.c -#cache/mod_socache_dc.c -#cache/mod_socache_memcache.c -#cache/mod_socache_shmcb.c - +mod_crccache_client.so: mod_crccache_client.o $(COMMON) $(CACHE) rmm_hash.o mod_crccache_client_find_similar.o + gcc $^ -o $@ $(LDFLAGS) -shared -%.so: %.o ap_wrapper.o $(CCAN_PATH)/crc/crc.o $(CCAN_PATH)/crcsync/crcsync.o $(CACHE_SRC) +mod_crccache_server.so: mod_crccache_server.o $(COMMON) gcc $^ -o $@ $(LDFLAGS) -shared clean: - rm -rf *.so *.o $(CCAN_PATH)/crc/crc.o $(CCAN_PATH)/crcsync/crcsync.o + rm -rf *.so *.o $(COMMON) $(CACHE) benchmark: benchmark.o $(CCAN_PATH)/crc/crc.o $(CCAN_PATH)/crcsync/crcsync.o gcc $^ -o $@ $(LDFLAGS) @@ -40,3 +38,5 @@ test_hash_small_invokations: test_hash_small_invokations.o $(CCAN_PATH)/crc/crc. restart: sudo /etc/init.d/apache2 restart +done: + echo "All done" diff --git a/crccache/ap_log_helper.c b/crccache/ap_log_helper.c new file mode 100644 index 000000000..f940e98fc --- /dev/null +++ b/crccache/ap_log_helper.c @@ -0,0 +1,26 @@ +#include + +#include "httpd.h" +#include "http_log.h" +#include + +void ap_log_hex(const char *file, int line, int level, apr_status_t status, const server_rec *s, unsigned char *buf, size_t len) +{ + size_t cnt; + for (cnt=0; cnt < len; cnt += 32) + { + size_t cnt2; + char hexbuf[3*32+1]; + for (cnt2=cnt; cnt2 != cnt+32 && cnt2 != len; cnt2++) + { + sprintf(hexbuf+3*(cnt2-cnt), "%02x.", buf[cnt2]); + } + ap_log_error(file, line, level, status, s, "%s", hexbuf); + } +} + +char *format_hostinfo(apr_pool_t *p, server_rec *s) +{ + return s->is_virtual ? apr_psprintf(p, "virtual host %s:%d", s->addrs->virthost, s->addrs->host_port) : "main server"; +} + diff --git a/crccache/ap_wrapper.h b/crccache/ap_log_helper.h similarity index 61% rename from crccache/ap_wrapper.h rename to crccache/ap_log_helper.h index d9671f813..b49b7dd7e 100644 --- a/crccache/ap_wrapper.h +++ b/crccache/ap_log_helper.h @@ -5,12 +5,10 @@ extern "C" { #endif -void ap_log_error_wrapper(const char *file, int line, int level, apr_status_t status, const server_rec *s, - const char *fmt, ...) - __attribute__((format(printf,6,7))); - void ap_log_hex(const char *file, int line, int level, apr_status_t status, const server_rec *s, unsigned char *buf, size_t len); +char *format_hostinfo(apr_pool_t *p, server_rec *s); + #ifdef __cplusplus } #endif diff --git a/crccache/ap_wrapper.c b/crccache/ap_wrapper.c deleted file mode 100644 index 397dbf645..000000000 --- a/crccache/ap_wrapper.c +++ /dev/null @@ -1,38 +0,0 @@ -#include - -#include "httpd.h" -#include "http_log.h" - -/** - * ap_log_error does not support %zd or %zu conversion for type_t arguments - * So with ap_log_error one would have to specify either %d or %ld, depending on the - * platform (32-bit or 64-bit). This violates the whole purpose of type_t, which - * was introduced in C exactly to provide cross-platform compatibility... - * This wrapper function supports %zd and %zu conversion parameters. - * Note that it truncates the logged message to 1000 bytes, so don't use it to log messages that might - * be longer - */ -void ap_log_error_wrapper(const char *file, int line, int level, apr_status_t status, const server_rec *s, - const char *fmt, ...) -{ - char msg[1000]; - va_list ap; - va_start(ap, fmt); - vsnprintf(msg, sizeof(msg), fmt, ap); - ap_log_error(file, line, level, status, s, "%s", msg); -} - -void ap_log_hex(const char *file, int line, int level, apr_status_t status, const server_rec *s, unsigned char *buf, size_t len) -{ - size_t cnt; - for (cnt=0; cnt < len; cnt += 32) - { - size_t cnt2; - char hexbuf[3*32+1]; - for (cnt2=cnt; cnt2 != cnt+32 && cnt2 != len; cnt2++) - { - sprintf(hexbuf+3*(cnt2-cnt), "%02x.", buf[cnt2]); - } - ap_log_error(file, line, level, status, s, "%s", hexbuf); - } -} diff --git a/crccache/benchmark.c b/crccache/benchmark.c index 2938992be..45f35e732 100644 --- a/crccache/benchmark.c +++ b/crccache/benchmark.c @@ -247,7 +247,7 @@ int main(int argc, char *argv[]) bm_crccalculate.start = clock(); for (cnt=0; cnt != TEST_ITERATIONS_COUNT; cnt++) { - crc_of_blocks(original_data->buf, original_data->datasize, block_size, HASH_SIZE, merge_trailing_blocks_in_last_block, match_hashes); + crc_of_blocks(original_data->buf, original_data->datasize, block_size, tail_block_size, HASH_SIZE, match_hashes); } bm_crccalculate.end = clock(); diff --git a/crccache/cache/cache.c b/crccache/cache/cache.c index f64155439..45c651332 100644 --- a/crccache/cache/cache.c +++ b/crccache/cache/cache.c @@ -172,10 +172,9 @@ static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, * file for an ap_cache_el, this state information will be read * and written transparent to clients of this module */ -static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info, - disk_cache_object_t *dobj, request_rec *r) { +int file_cache_recall_mydata(apr_pool_t *ptemp, apr_file_t *fd, cache_info_t *info, + disk_cache_object_t *dobj, /*request_rec *r, */int validate_url) { apr_status_t rv; - char *urlbuff; disk_cache_info_t disk_info; apr_size_t len; @@ -197,17 +196,22 @@ static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info, /* Note that we could optimize this by conditionally doing the palloc * depending upon the size. */ - urlbuff = apr_palloc(r->pool, disk_info.name_len + 1); + char *uribuf = apr_palloc(ptemp, disk_info.name_len + 1); + if (uribuf == NULL) + { + return APR_EGENERAL; + } len = disk_info.name_len; - rv = apr_file_read_full(fd, urlbuff, len, &len); + rv = apr_file_read_full(fd, uribuf, len, &len); if (rv != APR_SUCCESS) { return rv; } - urlbuff[disk_info.name_len] = '\0'; + uribuf[disk_info.name_len] = '\0'; + info->uri = uribuf; /* check that we have the same URL */ /* Would strncmp be correct? */ - if (strcmp(urlbuff, dobj->name) != 0) { + if (validate_url && strcmp(info->uri, dobj->name) != 0) { return APR_EGENERAL; } @@ -327,7 +331,7 @@ int open_entity(cache_handle_t *h, request_rec *r, const char *key) { &crccache_client_module); apr_finfo_t finfo; cache_object_t *obj; - cache_info *info; + cache_info_t *info; disk_cache_object_t *dobj; int flags; h->cache_obj = NULL; @@ -436,7 +440,7 @@ int open_entity(cache_handle_t *h, request_rec *r, const char *key) { } /* Read the bytes to setup the cache_info fields */ - rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r); + rc = file_cache_recall_mydata(r->pool, dobj->hfd, info, dobj, 1); if (rc != APR_SUCCESS) { /* XXX log message */ return DECLINED; @@ -605,7 +609,7 @@ static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr) { &amt); } -apr_status_t read_table(cache_handle_t *handle, request_rec *r, +apr_status_t read_table(/*cache_handle_t *handle, request_rec *r,*/server_rec *s, apr_table_t *table, apr_file_t *file) { char w[MAX_STRING_LEN]; char *l; @@ -617,7 +621,7 @@ apr_status_t read_table(cache_handle_t *handle, request_rec *r, /* ### What about APR_EOF? */ rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); if (rv != APR_SUCCESS) { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, "Premature end of cache headers."); return rv; } @@ -737,7 +741,7 @@ apr_status_t store_table(apr_file_t *fd, apr_table_t *table) { } apr_status_t store_headers(cache_handle_t *h, request_rec *r, - cache_info *info) { + cache_info_t *info) { crccache_client_conf *conf = ap_get_module_config(r->server->module_config, &crccache_client_module); @@ -903,7 +907,7 @@ apr_status_t store_headers(cache_handle_t *h, request_rec *r, } apr_status_t store_body(cache_handle_t *h, request_rec *r, - apr_bucket_brigade *bb) { + apr_bucket_brigade *bb, void (*post_store_body_callback)(disk_cache_object_t *dobj, request_rec *r)) { apr_bucket *e; apr_status_t rv; @@ -984,6 +988,9 @@ apr_status_t store_body(cache_handle_t *h, request_rec *r, file_cache_el_final(dobj, r); ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "disk_cache: Body for URL %s cached.", dobj->name); + if (post_store_body_callback != NULL) { + (*post_store_body_callback)(dobj, r); + } } return APR_SUCCESS; diff --git a/crccache/cache/cache.h b/crccache/cache/cache.h index ad5ec15e6..922007a8b 100644 --- a/crccache/cache/cache.h +++ b/crccache/cache/cache.h @@ -18,8 +18,8 @@ #include /* cache info information */ -typedef struct cache_info cache_info; -struct cache_info { +typedef struct cache_info_s cache_info_t; +struct cache_info_s { /** * HTTP status code of the cached entity. Though not neccessarily the * status code finally issued to the request. @@ -36,6 +36,8 @@ struct cache_info { apr_time_t request_time; /** apr_time_now() at the time the entity was acutally cached */ apr_time_t response_time; + + const char *uri; // Canonilized URI }; @@ -49,7 +51,7 @@ typedef struct cache_object cache_object_t; struct cache_object { const char *key; cache_object_t *next; - cache_info info; + cache_info_t info; /* Opaque portion (specific to the implementation) of the cache object */ void *vobj; /* FIXME: These are only required for mod_mem_cache. */ @@ -137,8 +139,8 @@ typedef struct { apr_off_t saved_size; /* length of saved_brigade */ apr_time_t exp; /* expiration */ apr_time_t lastmod; /* last-modified time */ - cache_info *info; /* current cache info */ - ap_filter_t *remove_url_filter; /* Enable us to remove the filter */ + cache_info_t *info; /* current cache info */ + // ap_filter_t *remove_url_filter; /* Enable us to remove the filter */ char *key; /* The cache key created for this * request */ @@ -178,7 +180,7 @@ typedef struct { /* cache_util.c */ /* do a HTTP/1.1 age calculation */ -CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_t age_value, +CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info_t *info, const apr_time_t age_value, apr_time_t now); /** @@ -257,9 +259,9 @@ const char* cache_create_key( request_rec*r ); /* Forward declarations */ int remove_entity(cache_handle_t *h); apr_status_t store_headers(cache_handle_t *h, request_rec *r, - cache_info *i); + cache_info_t *i); apr_status_t store_body(cache_handle_t *h, request_rec *r, - apr_bucket_brigade *b); + apr_bucket_brigade *b, void (*post_store_body_callback)(disk_cache_object_t *dobj, request_rec *r)); apr_status_t recall_headers(cache_handle_t *h, request_rec *r); apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb); @@ -269,7 +271,8 @@ apr_status_t read_array(request_rec *r, apr_array_header_t* arr, int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len); int open_entity(cache_handle_t *h, request_rec *r, const char *key); -apr_status_t read_table(cache_handle_t *handle, request_rec *r, apr_table_t *table, apr_file_t *file); +apr_status_t read_table(/*cache_handle_t *handle, request_rec *r, */server_rec *s, apr_table_t *table, apr_file_t *file); +int file_cache_recall_mydata(apr_pool_t *ptemp, apr_file_t *fd, cache_info_t *info, disk_cache_object_t *dobj, /*request_rec *r, */int validate_url); #endif /* CACHE_H_ */ diff --git a/crccache/cache/cache_util.c b/crccache/cache/cache_util.c index bf1756fd1..9d9a0ffb1 100644 --- a/crccache/cache/cache_util.c +++ b/crccache/cache/cache_util.c @@ -143,7 +143,7 @@ CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, #endif /* do a HTTP/1.1 age calculation */ -CACHE_DECLARE(apr_int64_t) ap_cache_current_age(cache_info *info, +CACHE_DECLARE(apr_int64_t) ap_cache_current_age(cache_info_t *info, const apr_time_t age_value, apr_time_t now) { diff --git a/crccache/crccache.h b/crccache/crccache.h dissimilarity index 60% index a80033706..7c52e17c1 100644 --- a/crccache/crccache.h +++ b/crccache/crccache.h @@ -1,49 +1,53 @@ -/* - * crccache.h - * - * Common files for crccache client and server apache modules - * - * Created on: 21/02/2009 - * Author: Toby Collett - * Contributor: Alex Wulms - */ - -#ifndef CRCCACHE_H_ -#define CRCCACHE_H_ - -#include - -#ifndef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) -#endif -#ifndef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#endif - -#define CRCCACHE_ENCODING "crcsync" -const char * ACCEPT_ENCODING_HEADER = "Accept-Encoding"; -const char * CAPABILITY_HEADER = "Capability"; -const char * CRCSYNC_SIMILAR_HEADER = "Crcsync-Similar"; -const char * ENCODING_HEADER = "Content-Encoding"; -const char * BLOCK_HEADER = "If-Block"; -const char * VARY_HEADER = "Vary"; -const char * VARY_VALUE = "If-Block"; -const char * ETAG_HEADER = "ETag"; - -const int HASH_SIZE=64; // bits per has, 30 bits is 5 bytes base 64 - -// HASH_SIZE_BYTES*FULL_BLOCK_COUNT*4/3 rounded up to the nearest multiple of 3 -// 8*40*4/3 = 438 -const int HASH_HEADER_SIZE=427; - - -const unsigned char ENCODING_LITERAL='L'; -const unsigned char ENCODING_BLOCK='B'; -const unsigned char ENCODING_COMPRESSED='Z'; -const unsigned char ENCODING_HASH='S'; - -const int ENCODING_COMPRESSED_HEADER_SIZE = 1;// 1 byte indicator -const int ENCODING_LITERAL_HEADER_SIZE = 1+4;// 1 byte indicator + 4 bytes length -const int ENCODING_BLOCK_HEADER_SIZE = 1+1;// 1 byte indicator + 1 byte block - -#endif /* CRCCACHE_H_ */ +/* + * crccache.h + * + * Common files for crccache client and server apache modules + * + * Created on: 21/02/2009 + * Author: Toby Collett + * Contributor: Alex Wulms + */ + +#ifndef CRCCACHE_H_ +#define CRCCACHE_H_ + +#include + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#define CRCCACHE_ENCODING "crcsync" +#define ACCEPT_ENCODING_HEADER "Accept-Encoding" +#define CAPABILITY_HEADER "Capability" +#define CRCSYNC_SIMILAR_HEADER "Crcsync-Similar" +#define ENCODING_HEADER "Content-Encoding" +#define BLOCK_HEADER "If-Block" +#define VARY_HEADER "Vary" +#define VARY_VALUE "If-Block" +#define ETAG_HEADER "ETag" +#define HOST_HEADER "Host" +#define CONTENT_TYPE_HEADER "Content-Type" +#define ACCEPT_HEADER "Accept" + + // bits per hash, 30 bits is 5 bytes base 64 +#define HASH_SIZE 64 + +// HASH_SIZE_BYTES*FULL_BLOCK_COUNT*4/3 rounded up to the nearest multiple of 3 +// 8*40*4/3 = 438 +#define HASH_HEADER_SIZE 427 + + +#define ENCODING_LITERAL 'L' +#define ENCODING_BLOCK 'B' +#define ENCODING_COMPRESSED 'Z' +#define ENCODING_HASH 'S' + +#define ENCODING_COMPRESSED_HEADER_SIZE 1 /* 1 byte indicator */ +#define ENCODING_LITERAL_HEADER_SIZE (1+4) /* 1 byte indicator + 4 bytes length */ +#define ENCODING_BLOCK_HEADER_SIZE (1+1) /* 1 byte indicator + 1 byte block */ + +#endif /* CRCCACHE_H_ */ diff --git a/crccache/mod_crccache_client.c b/crccache/mod_crccache_client.c index f3ee0ca66..797eabcff 100644 --- a/crccache/mod_crccache_client.c +++ b/crccache/mod_crccache_client.c @@ -32,12 +32,14 @@ #include #include +#include #include #include #include #include #include #include "ap_provider.h" +#include #include "util_filter.h" #include "util_script.h" #include "util_charset.h" @@ -45,32 +47,148 @@ #include #include "crccache.h" -#include "ap_wrapper.h" +#include "ap_log_helper.h" #include #include #include #include "mod_crccache_client.h" +#include "mod_crccache_client_find_similar.h" static ap_filter_rec_t *crccache_decode_filter_handle; static ap_filter_rec_t *cache_save_filter_handle; static ap_filter_rec_t *cache_save_subreq_filter_handle; module AP_MODULE_DECLARE_DATA crccache_client_module; + + +APR_DECLARE_OPTIONAL_FN(apr_status_t, + ap_cache_generate_key, + (request_rec *r, apr_pool_t*p, char**key )); APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key; +// const char* cache_create_key( request_rec*r ); + + +// extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key; + +/* + * Private strucures and constants only used in mod_crccache_client + */ + +// hashes per file +#define FULL_BLOCK_COUNT 40 + +typedef enum decoding_state { + DECODING_NEW_SECTION, + DECODING_COMPRESSED, + DECODING_LITERAL_BODY, + DECODING_LITERAL_SIZE, + DECODING_HASH, + DECODING_BLOCK_HEADER, + DECODING_BLOCK +} decoding_state; + +typedef enum { + DECOMPRESSION_INITIALIZED, + DECOMPRESSION_ENDED +} decompression_state_t; + +typedef struct crccache_client_ctx_t { + apr_bucket_brigade *bb; + size_t block_size; + size_t tail_block_size; + apr_bucket * cached_bucket;// original data so we can fill in the matched blocks + + decoding_state state; + decompression_state_t decompression_state; + z_stream *decompression_stream; + int headers_checked; + struct apr_sha1_ctx_t sha1_ctx; + unsigned char sha1_value_rx[APR_SHA1_DIGESTSIZE]; + unsigned rx_count; + unsigned literal_size; + unsigned char * partial_literal;// original data so we can fill in the matched blocks +} crccache_client_ctx; + +static int crccache_client_post_config_per_virtual_host(apr_pool_t *p, apr_pool_t *plog, + apr_pool_t *ptemp, server_rec *s) +{ + /** + * The cache can be configured (differently) per virtual host, hence make the correct settings + * at the virtual host level + */ + crccache_client_conf *conf = ap_get_module_config(s->module_config, + &crccache_client_module); + ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, s, + "mod_crccache_client.post_config_per_vhost (%s): Number of cacheable URLs: %d", + format_hostinfo(ptemp, s), conf->cacheenable->nelts); + + if (conf->cacheenable->nelts != 0) { + // Cache client is enabled in this (virtual) server. Initialize it + if (!conf->cache_root) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, s, + "mod_crccache_client.post_config_per_vhost (%s): Please set parameter CacheRootClient in (virtual) server config %s", + format_hostinfo(ptemp, s), s->defn_name); + return APR_EGENERAL; + } + return crccache_client_fsp_post_config_per_virtual_host(p, plog, ptemp, s, conf->similar_page_cache, conf->cache_root); + } + + return OK; +} static int crccache_client_post_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s) { - /* This is the means by which unusual (non-unix) os's may find alternate - * means to run a given command (e.g. shebang/registry parsing on Win32) + int result; + + /* + * TODO: AW: Why is the cache_generate_key function looked-up as an optional function? + * As far as I can see, this function is: + * 1) Very platform agnostic (so the need to ever write a platform specific module + * in order to export a platform specific implementation hardly exists...) + * 2) Very closely coupled to the way it is used in the cache client (an externally + * exported function could break the client by implementing other semantics!) + * 3) There are no existing modules registering/exporting an ap_cache_generate_key function + * (which confirmes the two above points, that there is no need for it) + * Nevertheless, I assume the original author of mod_cache knows what he is doing so I + * leave this indirect look-up here for the time being */ cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key); if (!cache_generate_key) { cache_generate_key = cache_generate_key_default; } + + // Perform the post_config logic for the 'find similar page' feature + server_rec *current_server = s; + while (current_server != NULL) + { + result = crccache_client_post_config_per_virtual_host(p, plog, ptemp, current_server); + if (result != OK) + return result; + + current_server = current_server->next; + } return OK; + +} + +static void crccache_client_child_init_per_virtual_host(apr_pool_t *p, server_rec *s) +{ + crccache_client_conf *conf = ap_get_module_config(s->module_config, + &crccache_client_module); + crccache_client_fsp_child_init_per_virtual_host(p, s, conf->similar_page_cache); +} + +static void crccache_client_child_init(apr_pool_t *p, server_rec *s) +{ + server_rec *current_server = s; + while (current_server != NULL) + { + crccache_client_child_init_per_virtual_host(p, current_server); + current_server = current_server->next; + } } @@ -93,135 +211,173 @@ apr_status_t deflate_ctx_cleanup(void *data) return APR_SUCCESS; } +// ______ continue with refactoring -/* - * Reads headers from a buffer and returns an array of headers. - * Returns NULL on file error - * This routine tries to deal with too long lines and continuation lines. - * @@@: XXX: FIXME: currently the headers are passed thru un-merged. - * Is that okay, or should they be collapsed where possible? +/** + * Request CRCSYNC/delta-http encoding for a page: open the previous data file from the cache, preferably + * for the exact URL but if not present then for a similar URL. Then calculate the CRC blocks for the + * opened page and generate the header + * Returns APR_SUCCESS if the delta-http could be prepared and APR_EGENERAL or APR_NOTFOUND in case of + * error conditions (APR_NOTFOUND if no body could be found, APR_EGENERAL for all other errors) */ -apr_status_t recall_headers(cache_handle_t *h, request_rec *r) { +static apr_status_t request_crcsync_encoding(cache_handle_t *h, request_rec *r, crccache_client_conf *client_conf) +{ const char *data; apr_size_t len; apr_bucket *e; unsigned i; int z_RC; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Preparing CRCSYNC/delta-http for %s", r->unparsed_uri); disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; - - /* This case should not happen... */ - if (!dobj->hfd) { - /* XXX log message */ - return APR_NOTFOUND; + if (!dobj->fd) + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "No page found in cache for requested URL. Trying to find page for similar URLs"); + dobj = apr_palloc(r->pool, sizeof(disk_cache_object_t)); + if (find_similar_page(dobj, r, client_conf->similar_page_cache) != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Failed to prepare CRCSYNC/delta-http. No (similar) page found in cache"); + return APR_NOTFOUND; + } } - - h->req_hdrs = apr_table_make(r->pool, 20); - h->resp_hdrs = apr_table_make(r->pool, 20); - - /* Call routine to read the header lines/status line */ - read_table(h, r, h->resp_hdrs, dobj->hfd); - read_table(h, r, h->req_hdrs, dobj->hfd); - + e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, r->pool, - r->connection->bucket_alloc); + r->connection->bucket_alloc); - /* read */ + /* Read file into bucket. Hopefully the entire file fits in the bucket */ apr_bucket_read(e, &data, &len, APR_BLOCK_READ); - + if (len != dobj->file_size) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, + "crccache_client: Only read %" APR_SIZE_T_FMT " bytes out of %" APR_SIZE_T_FMT " bytes from the " + "original response data into the bucket cache", + len, dobj->file_size); + return APR_EGENERAL; + } // this will be rounded down, but thats okay size_t blocksize = len/FULL_BLOCK_COUNT; size_t tail_block_size = blocksize + len % FULL_BLOCK_COUNT; size_t block_count_including_final_block = FULL_BLOCK_COUNT; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Read file into bucket, len: %" APR_SIZE_T_FMT ", blocksize: %" APR_SIZE_T_FMT ", tail_block_size: %" APR_SIZE_T_FMT, + len, blocksize, tail_block_size); + // sanity check for very small files - if (blocksize> 4) + if (blocksize <= 4) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes, one block of %ld bytes",FULL_BLOCK_COUNT-1,blocksize,tail_block_size); - - crccache_client_ctx * ctx; - ctx = apr_pcalloc(r->pool, sizeof(*ctx)); - ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc); - ctx->block_size = blocksize; - ctx->tail_block_size = tail_block_size; - ctx->state = DECODING_NEW_SECTION; - ctx->cached_bucket = e; - - // Setup inflate for decompressing non-matched literal data - ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream))); - ctx->decompression_stream->zalloc = Z_NULL; - ctx->decompression_stream->zfree = Z_NULL; - ctx->decompression_stream->opaque = Z_NULL; - ctx->decompression_stream->avail_in = 0; - ctx->decompression_stream->next_in = Z_NULL; - z_RC = inflateInit(ctx->decompression_stream); - if (z_RC != Z_OK) - { - ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, - "Can not initialize decompression engine, return code: %d", z_RC); - return APR_SUCCESS; - } - ctx->decompression_state = DECOMPRESSION_INITIALIZED; + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "Skipped CRCSYNC/delta-http, due to too small blocksize"); + return APR_SUCCESS; + } + + crccache_client_ctx * ctx; + ctx = apr_pcalloc(r->pool, sizeof(*ctx)); + ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc); + ctx->block_size = blocksize; + ctx->tail_block_size = tail_block_size; + ctx->state = DECODING_NEW_SECTION; + ctx->cached_bucket = e; + + // Setup inflate for decompressing non-matched literal data + ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream))); + ctx->decompression_stream->zalloc = Z_NULL; + ctx->decompression_stream->zfree = Z_NULL; + ctx->decompression_stream->opaque = Z_NULL; + ctx->decompression_stream->avail_in = 0; + ctx->decompression_stream->next_in = Z_NULL; + z_RC = inflateInit(ctx->decompression_stream); + if (z_RC != Z_OK) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "Can not initialize decompression engine, return code: %d", z_RC); + return APR_EGENERAL; + } + ctx->decompression_state = DECOMPRESSION_INITIALIZED; - // Register a cleanup function to cleanup internal libz resources - apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup, - apr_pool_cleanup_null); + // Register a cleanup function to cleanup internal libz resources + apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup, + apr_pool_cleanup_null); - // All OK to go for the crcsync decoding: add the headers - // and set-up the decoding filter + // All OK to go for the crcsync decoding: add the headers + // and set-up the decoding filter - // add one for base 64 overflow and null terminator - char hash_set[HASH_HEADER_SIZE+1]; + // add one for base 64 overflow and null terminator + char hash_set[HASH_HEADER_SIZE+1]; - uint64_t crcs[block_count_including_final_block]; - crc_of_blocks(data, len, blocksize, HASH_SIZE, true, crcs); + uint64_t crcs[block_count_including_final_block]; + crc_of_blocks(data, len, blocksize, tail_block_size, HASH_SIZE, crcs); - // swap to network byte order - for (i = 0; i < block_count_including_final_block;++i) - { - htobe64(crcs[i]); - } + // swap to network byte order + for (i = 0; i < block_count_including_final_block;++i) + { + htobe64(crcs[i]); + } + + apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0])); + hash_set[HASH_HEADER_SIZE] = '\0'; + //apr_bucket_delete(e); + + // TODO; bit of a safety margin here, could calculate exact size + const int block_header_max_size = HASH_HEADER_SIZE+40; + char block_header_txt[block_header_max_size]; + apr_snprintf(block_header_txt, block_header_max_size,"v=1; fs=%" APR_SIZE_T_FMT "; h=%s",len,hash_set); + apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt); + // TODO: do we want to cache the hashes here? + + // initialise the context for our sha1 digest of the unencoded response + apr_sha1_init(&ctx->sha1_ctx); + + // we want to add a filter here so that we can decode the response. + // we need access to the original cached data when we get the response as + // we need that to fill in the matched blocks. + // TODO: does the original cached data file remain open between this request + // and the subsequent response or do we run the risk that a concurrent + // request modifies it? + ap_add_output_filter_handle(crccache_decode_filter_handle, ctx, r, r->connection); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "Successfully prepared CRCSYNC/delta-http"); - apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0])); - hash_set[HASH_HEADER_SIZE] = '\0'; - //apr_bucket_delete(e); - - // TODO; bit of a safety margin here, could calculate exact size - const int block_header_max_size = HASH_HEADER_SIZE+40; - char block_header_txt[block_header_max_size]; - snprintf(block_header_txt, block_header_max_size,"v=1; fs=%zu; h=%s",len,hash_set); - apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt); - // TODO: do we want to cache the hashes here? - - // initialise the context for our sha1 digest of the unencoded response - apr_sha1_init(&ctx->sha1_ctx); - - // we want to add a filter here so that we can decode the response. - // we need access to the original cached data when we get the response as - // we need that to fill in the matched blocks. - // TODO: does the original cached data file remain open between this request - // and the subsequent response or do we run the risk that a concurrent - // request modifies it? - ap_add_output_filter_handle(crccache_decode_filter_handle, - ctx, r, r->connection); - - // TODO: why is hfd file only closed in this case? - apr_file_close(dobj->hfd); + return APR_SUCCESS; +} + + +/* + * Reads headers from a buffer and returns an array of headers. + * Returns NULL on file error + * This routine tries to deal with too long lines and continuation lines. + * @@@: XXX: FIXME: currently the headers are passed thru un-merged. + * Is that okay, or should they be collapsed where possible? + */ +apr_status_t recall_headers(cache_handle_t *h, request_rec *r) { + disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; + + /* This case should not happen... */ + if (!dobj->hfd) { + /* XXX log message */ + return APR_NOTFOUND; } + + h->req_hdrs = apr_table_make(r->pool, 20); + h->resp_hdrs = apr_table_make(r->pool, 20); + + /* Call routine to read the header lines/status line */ + read_table(/*h, r, */r->server, h->resp_hdrs, dobj->hfd); + read_table(/*h, r, */r->server, h->req_hdrs, dobj->hfd); + apr_file_close(dobj->hfd); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "crccache_client: Recalled headers for URL %s", dobj->name); + "crccache_client: Recalled headers for URL %s", dobj->name); return APR_SUCCESS; } /* * CACHE_DECODE filter * ---------------- - * - * Deliver cached content (headers and body) up the stack. + * Deliver crc decoded content (headers and body) up the stack. */ static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) { apr_bucket *e; request_rec *r = f->r; - // TODO: set up context type struct crccache_client_ctx *ctx = f->ctx; // if this is the first pass in decoding we should check the headers etc @@ -320,13 +476,13 @@ static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) { apr_sha1_final(sha1_value, &ctx->sha1_ctx); if (memcmp(sha1_value, ctx->sha1_value_rx, APR_SHA1_DIGESTSIZE) != 0) { - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED for uri %s", r->unparsed_uri); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED for uri %s", r->unparsed_uri); apr_brigade_cleanup(bb); return APR_EGENERAL; } else { - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED for uri %s", r->unparsed_uri); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED for uri %s", r->unparsed_uri); } /* Okay, we've seen the EOS. @@ -360,12 +516,12 @@ static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) { /* read */ apr_bucket_read(e, &data, &len, APR_BLOCK_READ); - //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len); + //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %" APR_SIZE_T_FMT " bytes",len); apr_size_t consumed_bytes = 0; while (consumed_bytes < len) { - //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes); + //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %" APR_SIZE_T_FMT " bytes",len - consumed_bytes); // no guaruntee that our buckets line up with our encoding sections // so we need a processing state machine stored in our context switch (ctx->state) @@ -407,8 +563,8 @@ static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) { // TODO: Output the indicated block here size_t current_block_size = block_number < FULL_BLOCK_COUNT-1 ? ctx->block_size : ctx->tail_block_size; - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number, current_block_size); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCSYNC-DECODE block section, block %d, size %" APR_SIZE_T_FMT, block_number, current_block_size); char * buf = apr_palloc(r->pool, current_block_size); const char * source_data; @@ -547,6 +703,11 @@ static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) { } static void *crccache_client_create_config(apr_pool_t *p, server_rec *s) { + ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, s, + "mod_crccache_client: entering crccache_client_create_config (%s)", + format_hostinfo(p, s)); + + crccache_client_conf *conf = apr_pcalloc(p, sizeof(crccache_client_conf)); /* array of URL prefixes for which caching is enabled */ conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable)); @@ -560,7 +721,11 @@ static void *crccache_client_create_config(apr_pool_t *p, server_rec *s) { conf->minfs = DEFAULT_MIN_FILE_SIZE; conf->cache_root = NULL; - conf->cache_root_len = 0; + // apr_pcalloc has already initialized everyting to 0 + // conf->cache_root_len = 0; + + // TODO: ____ rename once it works + conf->similar_page_cache = create_similar_page_cache(p); return conf; } @@ -656,14 +821,25 @@ static const char *add_crc_client_enable(cmd_parms *parms, void *dummy, return NULL; } +static const char *set_cache_bytes(cmd_parms *parms, void *in_struct_ptr, + const char *arg) { + crccache_client_conf *conf = ap_get_module_config(parms->server->module_config, + &crccache_client_module); + return crccache_client_fsp_set_cache_bytes(parms, in_struct_ptr, arg, conf->similar_page_cache); +} + static const command_rec crccache_client_cmds[] = { - AP_INIT_TAKE1("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A cache type and partial URL prefix below which caching is enabled"), + AP_INIT_TAKE1("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A partial URL prefix below which caching is enabled"), AP_INIT_TAKE1("CacheRootClient", set_cache_root, NULL, RSRC_CONF,"The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"), + AP_INIT_TAKE1("CRCClientSharedCacheSize", set_cache_bytes, NULL, RSRC_CONF, "Set the size of the shared memory cache (in bytes). Use 0 " + "to disable the shared memory cache. (default: 10MB). " + "Disabling the shared memory cache prevents the cache client from using a similar page as base for the delta-request " + "when an exact match (on the URL) can not be found in the cache"), { NULL } }; @@ -738,22 +914,36 @@ int crccache_client_url_handler(request_rec *r, int lookup) return DECLINED; } h = apr_palloc(r->pool, sizeof(cache_handle_t)); - if (open_entity(h, r, key) != OK) + if (open_entity(h, r, key) == OK) + { + if (recall_headers(h, r) == APR_SUCCESS) { + cache->handle = h; + } + else { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, + r->server, "Failed to recall headers"); + } + + } + else { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, - r->server, "Failed to open entity not good"); - return DECLINED; + r->server, "Failed to open entity"); } - if (recall_headers(h, r) != APR_SUCCESS) { - /* TODO: Handle this error */ + if (request_crcsync_encoding(h, r, conf) != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, - r->server, "Failed to recall headers"); - return DECLINED; - } - cache->handle = h; + r->server, "Failed to request CRCSYNC/delta-http encoding"); + } return DECLINED; } +void post_store_body_callback(disk_cache_object_t *dobj, request_rec *r) +{ + // ____ + crccache_client_conf *conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config, + &crccache_client_module); + update_or_add_similar_page(dobj, r, conf->similar_page_cache); +} /* @@ -771,7 +961,6 @@ int crccache_client_url_handler(request_rec *r, int lookup) * If we can, call cache_create_entity() and save the headers and body * Finally, pass the data to the next filter (the network or whatever) */ - int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) { int rv = !OK; @@ -783,7 +972,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) const char *exps, /* *lastmods,*/ *dates;//, *etag; apr_time_t exp, date,/* lastmod,*/ now; apr_off_t size; - cache_info *info = NULL; + cache_info_t *info = NULL; char *reason; apr_pool_t *p; @@ -796,6 +985,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) if (!cache) { /* user likely configured CACHE_SAVE manually; they should really use * mod_cache configuration to do that + * TODO: Don't support this situation. In stead, write a WARNING to the log and abort the filter processing */ cache = apr_pcalloc(r->pool, sizeof(cache_request_rec)); ap_set_module_config(r->request_config, &crccache_client_module, cache); @@ -823,7 +1013,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) /* pass the brigades into the cache, then pass them * up the filter stack */ - rv = store_body(cache->handle, r, in); + rv = store_body(cache->handle, r, in, &post_store_body_callback); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: Cache provider's store_body failed!"); @@ -1002,7 +1192,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) return rv; } cache->handle = h; - info = apr_pcalloc(r->pool, sizeof(cache_info)); + info = apr_pcalloc(r->pool, sizeof(cache_info_t)); /* We only set info->status upon the initial creation. */ info->status = r->status; } @@ -1021,7 +1211,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) */ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "cache: Removing CACHE_REMOVE_URL filter."); - //ap_remove_output_filter(cache->remove_url_filter); + // ap_remove_output_filter(cache->remove_url_filter); /* * We now want to update the cache file header information with @@ -1098,7 +1288,7 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) return ap_pass_brigade(f->next, in); } - rv = store_body(cache->handle, r, in); + rv = store_body(cache->handle, r, in, &post_store_body_callback); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: store_body failed"); @@ -1109,14 +1299,25 @@ int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) } static void crccache_client_register_hook(apr_pool_t *p) { - ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, + ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, NULL, "Registering crccache client module, (C) 2009, Toby Collett"); /* cache initializer */ ap_hook_post_config(crccache_client_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST); /* cache handler */ ap_hook_quick_handler(crccache_client_url_handler, NULL, NULL, APR_HOOK_FIRST); - /* cache filters + /* child initializer */ + ap_hook_child_init(crccache_client_child_init, NULL, NULL, APR_HOOK_REALLY_FIRST); + + /* + * CRCCACHE_DECODE must go into the filter chain before the cache save filter, + * so that the cache saves the decoded response + */ + crccache_decode_filter_handle = ap_register_output_filter( + "CRCCACHE_DECODE", crccache_decode_filter, NULL, + AP_FTYPE_CONTENT_SET); + + /* cache filters * XXX The cache filters need to run right after the handlers and before * any other filters. Consider creating AP_FTYPE_CACHE for this purpose. * @@ -1153,24 +1354,14 @@ static void crccache_client_register_hook(apr_pool_t *p) { cache_save_filter, NULL, AP_FTYPE_CONTENT_SET-1); - /* - * CRCCACHE_DECODE must go into the filter chain after a possible DEFLATE - * filter to ensure that already compressed cache objects do not - * get compressed again. Incrementing filter type by 1 ensures - * his happens. - */ - crccache_decode_filter_handle = ap_register_output_filter( - "CRCCACHE_DECODE", crccache_decode_filter, NULL, - AP_FTYPE_CONTENT_SET + 1); - - } module AP_MODULE_DECLARE_DATA crccache_client_module = { - STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */ - NULL , /* merge per-directory config structures */ + STANDARD20_MODULE_STUFF, + NULL, /* create per-directory config structure */ + NULL, /* merge per-directory config structures */ crccache_client_create_config, /* create per-server config structure */ - NULL , /* merge per-server config structures */ + NULL, /* merge per-server config structures */ crccache_client_cmds, /* command apr_table_t */ crccache_client_register_hook /* register hooks */ }; diff --git a/crccache/mod_crccache_client.h b/crccache/mod_crccache_client.h index 6ff883f7f..0088c39c0 100644 --- a/crccache/mod_crccache_client.h +++ b/crccache/mod_crccache_client.h @@ -14,53 +14,10 @@ #include #include #include +#include "mod_crccache_client_find_similar.h" extern module AP_MODULE_DECLARE_DATA crccache_client_module; -const char* cache_create_key( request_rec*r ); - -APR_DECLARE_OPTIONAL_FN(apr_status_t, - ap_cache_generate_key, - (request_rec *r, apr_pool_t*p, char**key )); - -extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key; - - -// hashes per file -#define FULL_BLOCK_COUNT 40 - -typedef enum decoding_state { - DECODING_NEW_SECTION, - DECODING_COMPRESSED, - DECODING_LITERAL_BODY, - DECODING_LITERAL_SIZE, - DECODING_HASH, - DECODING_BLOCK_HEADER, - DECODING_BLOCK -} decoding_state; - -typedef enum { - DECOMPRESSION_INITIALIZED, - DECOMPRESSION_ENDED -} decompression_state_t; - -typedef struct crccache_client_ctx_t { - apr_bucket_brigade *bb; - size_t block_size; - size_t tail_block_size; - apr_bucket * cached_bucket;// original data so we can fill in the matched blocks - - decoding_state state; - decompression_state_t decompression_state; - z_stream *decompression_stream; - int headers_checked; - struct apr_sha1_ctx_t sha1_ctx; - unsigned char sha1_value_rx[APR_SHA1_DIGESTSIZE]; - unsigned rx_count; - unsigned literal_size; - unsigned char * partial_literal;// original data so we can fill in the matched blocks -} crccache_client_ctx; - struct cache_enable { apr_uri_t url; const char *type; @@ -72,6 +29,7 @@ struct cache_disable { apr_size_t pathlen; }; + /* static information about the local cache */ typedef struct { // from mod cache @@ -85,6 +43,8 @@ typedef struct { int dirlength; /* Length of subdirectory names */ apr_off_t minfs; /* minimum file size for cached files */ apr_off_t maxfs; /* maximum file size for cached files */ + + similar_page_cache_t *similar_page_cache; } crccache_client_conf; #endif /*MOD_CRCCACHE_CLIENT_H*/ diff --git a/crccache/mod_crccache_client_find_similar.c b/crccache/mod_crccache_client_find_similar.c new file mode 100644 index 000000000..6e08fc2b3 --- /dev/null +++ b/crccache/mod_crccache_client_find_similar.c @@ -0,0 +1,1540 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Find a page for a similar URL as the newly requested page + * Created on: 02/08/2010 + * Author: Alex Wulms + */ + +#include + +#include +#include + +#if APR_HAVE_UNISTD_H +/* for getpid() */ +#include +#endif + + +#include +#include + + +#ifdef AP_NEED_SET_MUTEX_PERMS +#include "unixd.h" +#endif + +#include "crccache.h" +#include "mod_crccache_client_find_similar.h" +#include "ap_log_helper.h" +#include "rmm_hash.h" + +RMM_OFF_T_DECLARE(char); + +typedef struct vary_headers_s vary_headers_t; +RMM_OFF_T_DECLARE(vary_headers_t); +struct vary_headers_s { + RMM_OFF_T(vary_headers_t) next; + RMM_OFF_T(char) name; + RMM_OFF_T(char) value; +}; + + +typedef struct cached_files_info_s cached_files_info_t; +RMM_OFF_T_DECLARE(cached_files_info_t); +struct cached_files_info_s { + RMM_OFF_T(cached_files_info_t) prev; + RMM_OFF_T(cached_files_info_t) next; + RMM_OFF_T(char) basepath; // Path without .header or .data postfix + RMM_OFF_T(char) uri; // URI of the page (useful for logging purposes) + RMM_OFF_T(vary_headers_t) vary_headers; +}; + +typedef struct sp_per_content_type_s sp_per_content_type_t; +RMM_OFF_T_DECLARE(sp_per_content_type_t); +struct sp_per_content_type_s { + RMM_OFF_T(sp_per_content_type_t) next; + RMM_OFF_T(char) content_type; + RMM_OFF_T(cached_files_info_t) cached_files_info; + RMM_OFF_T(rmm_hash_t) cached_files_info_by_path; + RMM_OFF_T(cached_files_info_t) tail_file_info; +}; + +typedef struct sp_per_regex_s sp_per_regex_t; +RMM_OFF_T_DECLARE(sp_per_regex_t); +struct sp_per_regex_s { + RMM_OFF_T(sp_per_regex_t) next; + /* The regex parameter stored here is the non-compiled regex string. + * The compiled version must be cached in a per-process cache pool. + * Reason is that the ap_regex compiler allocates an internal structure + * for the compiled data using malloc. The ap_preg structure does not provide + * any info about that internal structure (like the length) and as such, + * the internal structure can not be transferred to the shared memory :-( + */ + RMM_OFF_T(char) regex; + apr_size_t regex_len; + RMM_OFF_T(sp_per_content_type_t) similar_pages_per_content_type; +}; + +RMM_OFF_T_DECLARE(int); +struct similar_page_cache_s { + const char* cache_root; + apr_size_t cache_root_len; + + apr_global_mutex_t *fs_cache_lock; + apr_size_t cache_bytes; /* Size (in bytes) of shared memory cache */ +#if APR_HAS_SHARED_MEMORY + apr_shm_t *shm; +#endif + apr_rmm_t *rmm; + RMM_OFF_T(rmm_hash_t) similar_pages_per_host; + const char *cache_file; /* filename for shm backing cache file */ + const char *lock_file; /* filename for shm lock mutex */ + RMM_OFF_T(int) lock_is_available; /* lock is available in all threads/subprocesses */ + apr_hash_t *similar_pages_regexs; /* compiled regular expressions for similar pages */ + RMM_OFF_T(rmm_hash_t) vary_headers_cache; + int similar_pages_cache_initialized; +}; + +/** + * Returns 1 when the lock is available in all threads/subprocesses and 0 otherwise + */ +static int is_lock_available(similar_page_cache_t *sp_cache) +{ + return *APR_RMM_ADDR_GET(int, sp_cache->rmm, sp_cache->lock_is_available); +} + +/** + * Duplicate a string value into the a memory segment allocated from the relocatable memory. + * Returns: RMM_OFF_NULL on memory allocation error + * offset of duplicated string when all fine + */ +static RMM_OFF_T(char) rmm_strdup(apr_rmm_t *rmm, const char *value) +{ + size_t valuelen = strlen(value); + RMM_OFF_T(char) rslt = apr_rmm_malloc(rmm, valuelen+1); + if (rslt == RMM_OFF_NULL) + { + return RMM_OFF_NULL; + } + memcpy(APR_RMM_ADDR_GET(char, rmm, rslt), value, valuelen+1); + return rslt; +} + +static apr_status_t similar_page_cache_kill(void *data) +{ + similar_page_cache_t *sp_cache = data; + + sp_cache->similar_pages_cache_initialized = 0; + if (sp_cache->rmm != NULL) + { + apr_rmm_destroy(sp_cache->rmm); + sp_cache->rmm = NULL; + } +#if APR_HAS_SHARED_MEMORY + if (sp_cache->shm != NULL) { + apr_status_t result = apr_shm_destroy(sp_cache->shm); + sp_cache->shm = NULL; + return result; + } +#endif + return APR_SUCCESS; +} + +typedef struct { + int compiled; + ap_regex_t *preg; +} compiled_regex_info_t; + +static int fsp_regex_match(request_rec *r, const char *regex, const char *uri_key, similar_page_cache_t *sp_cache) +{ + if (sp_cache->similar_pages_regexs == NULL) { + sp_cache->similar_pages_regexs = apr_hash_make(r->server->process->pool); + if (sp_cache->similar_pages_regexs == NULL) + { + // Not enough memory to cache the regexs, so probably also not enough memory to + // compile the regex. + return 0; // Return a mismatch + } + } + compiled_regex_info_t *regex_info = (compiled_regex_info_t *)apr_hash_get(sp_cache->similar_pages_regexs, regex, APR_HASH_KEY_STRING); + if (regex_info == NULL) + { + regex_info = apr_palloc(r->server->process->pool, sizeof(compiled_regex_info_t)); + if (regex_info == NULL) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "Could not allocate memory for regex_info"); + return 0; // Return a mismatch + } + regex_info->preg = apr_palloc(r->server->process->pool, sizeof(ap_regex_t)); + if (regex_info->preg == NULL) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "Could not allocate memory for regex_info->preg"); + return 0; // Return a mismatch + } + int rslt = ap_regcomp(regex_info->preg, regex, 0); + if (rslt != 0) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "Could not compile regexp %s, return code: %d", regex, rslt); + regex_info->compiled = 0; + } + else + { + regex_info->compiled = 1; + } + // Store the 'compiled' regex even when the compilation failed. This prevents the same warning from re-appearing. Otherwise, the + // compilation will fail on each request for a page that might match this regex. + apr_hash_set(sp_cache->similar_pages_regexs, regex, APR_HASH_KEY_STRING, regex_info); + } + if (regex_info->compiled) + { + return ap_regexec(regex_info->preg, uri_key, 0, NULL, AP_REG_ICASE) == 0; + } + return 0; // Compilation of regex has failed at least once. Return a mismatch +} + +/***************************************************************** + * Record of available info on a media type specified by the client + * (we also use 'em for encodings and languages) + * + * - Taken from mod_negotation.c + */ +typedef struct accept_rec { + char *name; /* MUST be lowercase */ + float quality; + float level; + char *charset; /* for content-type only */ +} accept_rec; + +/***************************************************************** + * parse quality value. atof(3) is not well-usable here, because it + * depends on the locale (argh). + * + * However, RFC 2616 states: + * 3.9 Quality Values + * + * [...] HTTP/1.1 applications MUST NOT generate more than three digits + * after the decimal point. User configuration of these values SHOULD also + * be limited in this fashion. + * + * qvalue = ( "0" [ "." 0*3DIGIT ] ) + * | ( "1" [ "." 0*3("0") ] ) + * + * This is quite easy. If the supplied string doesn't match the above + * definition (loosely), we simply return 1 (same as if there's no qvalue) + * + * - Taken from mod_negotation.c + */ +static float atoq(const char *string) +{ + if (!string || !*string) { + return 1.0f; + } + + while (*string && apr_isspace(*string)) { + ++string; + } + + /* be tolerant and accept qvalues without leading zero + * (also for backwards compat, where atof() was in use) + */ + if (*string != '.' && *string++ != '0') { + return 1.0f; + } + + if (*string == '.') { + /* better only one division later, than dealing with fscking + * IEEE format 0.1 factors ... + */ + int i = 0; + + if (*++string >= '0' && *string <= '9') { + i += (*string - '0') * 100; + + if (*++string >= '0' && *string <= '9') { + i += (*string - '0') * 10; + + if (*++string > '0' && *string <= '9') { + i += (*string - '0'); + } + } + } + + return (float)i / 1000.0f; + } + + return 0.0f; +} + +/***************************************************************** + * Get a single mime type entry --- one media type and parameters; + * enter the values we recognize into the argument accept_rec + * + * - Taken from mod_negotation.c + */ +static const char *get_accept_entry(apr_pool_t *p, accept_rec *result, + const char *accept_line) +{ + result->quality = 1.0f; + result->level = 0.0f; + result->charset = ""; + + /* + * Note that this handles what I gather is the "old format", + * + * Accept: text/html text/plain moo/zot + * + * without any compatibility kludges --- if the token after the + * MIME type begins with a semicolon, we know we're looking at parms, + * otherwise, we know we aren't. (So why all the pissing and moaning + * in the CERN server code? I must be missing something). + */ + + result->name = ap_get_token(p, &accept_line, 0); + ap_str_tolower(result->name); /* You want case insensitive, + * you'll *get* case insensitive. + */ + + /* KLUDGE!!! Default HTML to level 2.0 unless the browser + * *explicitly* says something else. + */ + + if (!strcmp(result->name, "text/html") && (result->level == 0.0)) { + result->level = 2.0f; + } + else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE)) { + result->level = 2.0f; + } + else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE3)) { + result->level = 3.0f; + } + + while (*accept_line == ';') { + /* Parameters ... */ + + char *parm; + char *cp; + char *end; + + ++accept_line; + parm = ap_get_token(p, &accept_line, 1); + + /* Look for 'var = value' --- and make sure the var is in lcase. */ + + for (cp = parm; (*cp && !apr_isspace(*cp) && *cp != '='); ++cp) { + *cp = apr_tolower(*cp); + } + + if (!*cp) { + continue; /* No '='; just ignore it. */ + } + + *cp++ = '\0'; /* Delimit var */ + while (*cp && (apr_isspace(*cp) || *cp == '=')) { + ++cp; + } + + if (*cp == '"') { + ++cp; + for (end = cp; + (*end && *end != '\n' && *end != '\r' && *end != '\"'); + end++); + } + else { + for (end = cp; (*end && !apr_isspace(*end)); end++); + } + if (*end) { + *end = '\0'; /* strip ending quote or return */ + } + ap_str_tolower(cp); + + if (parm[0] == 'q' + && (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0'))) { + result->quality = atoq(cp); + } + else if (parm[0] == 'l' && !strcmp(&parm[1], "evel")) { + result->level = (float)atoi(cp); + } + else if (!strcmp(parm, "charset")) { + result->charset = cp; + } + } + + if (*accept_line == ',') { + ++accept_line; + } + + return accept_line; +} + + +/***************************************************************** + * Dealing with Accept... header lines ... + * Accept, Accept-Charset, Accept-Language and Accept-Encoding + * are handled by do_header_line() - they all have the same + * basic structure of a list of items of the format + * name; q=N; charset=TEXT + * + * where charset is only valid in Accept. + * + * - Taken from mod_negotation.c + */ +static apr_array_header_t *parse_accept_line(apr_pool_t *p, + const char *accept_line) +{ + apr_array_header_t *accept_recs; + + if (!accept_line) { + return NULL; + } + + accept_recs = apr_array_make(p, 40, sizeof(accept_rec)); + if (accept_recs == NULL) + { + return NULL; // Nothing to allocate + } + while (*accept_line) { + accept_rec *new = (accept_rec *) apr_array_push(accept_recs); + accept_line = get_accept_entry(p, new, accept_line); + if (!strcmp(new->name, "*/*")) + { + apr_array_pop(accept_recs); // Discard this entry + } + } + + return accept_recs; +} + + +static int match_accept_type_vs_mime_type(const char *mime_type, const char *accept_type) +{ + while (*mime_type && *accept_type && *mime_type == *accept_type) + { + mime_type++; + accept_type++; + } + return (*mime_type == 0 && *accept_type == 0) || (*accept_type == '*'); +} + + +// TODO: Refine. Current logic is simplistic. It only checks the mime-type part of the content-type +// header of the cached page (e.g. it ignores the charset) and furthermore, it ignores +// the 'quality'/'level' indicates in the accept header. The function returns true +// if the mime-type of the cached page matches at least one of the content-types indicated +// in the accept header +// Note that the foundation for more fine-grained logic has been laid. The accept-header +// is parsed and broken down in all the constituting elements, using code copied from +// module mod-negotation +static int fsp_accept_matches_content_type(similar_page_cache_t *sp_cache, + request_rec *r, RMM_OFF_T(char) content_type) +{ + apr_array_header_t *accepts = parse_accept_line(r->pool, apr_table_get(r->headers_in, ACCEPT_HEADER)); + const char *content_type_line = APR_RMM_ADDR_GET(char, sp_cache->rmm, content_type); + + if (accepts == NULL) + { + return 0; // Can't validate content type versus accept header + } + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Comparing content type line %s versus accept line %s", + content_type_line, apr_table_get(r->headers_in, ACCEPT_HEADER)); + + // Only look at the mime-type (e.g. text/html) of the content-type line. + // Discard any other parameters like the charset + char *mime_type = ap_get_token(r->pool, &content_type_line, 0); + ap_str_tolower(mime_type); + + accept_rec *accept_elts = (accept_rec *)accepts->elts; + int cnt; + for (cnt = 0; cnt != accepts->nelts; cnt++) + { + const char *accept_type = accept_elts[cnt].name; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Comparing mime type %s versus accept type %s", mime_type, accept_type); + if (match_accept_type_vs_mime_type(mime_type, accept_type)) + { + return 1; // A good-enough match found. Use this page. + } + } + return 0; // No match found.Skip this page +} + +static void clear_rmm_field(apr_rmm_t *rmm, apr_rmm_off_t *offset_ptr) +{ + if (*offset_ptr != RMM_OFF_NULL) { + apr_rmm_free(rmm, *offset_ptr); + *offset_ptr = RMM_OFF_NULL; + } +} + +/* + * Free all memory used by a cached_files_info_t structure + * Be aware that this function might get called while the structure is not yet complete. E.g. + * it gets called when an out-of-memory condition occurs during the construction + */ +static void free_cached_files_info(apr_rmm_t *rmm, sp_per_content_type_t *sp_per_ct_physical, RMM_OFF_T(cached_files_info_t) cached_file_info) +{ + cached_files_info_t *cfi_physical = APR_RMM_ADDR_GET(cached_files_info_t, rmm, cached_file_info); + + // Delete the entry from the hash table + if (sp_per_ct_physical->cached_files_info_by_path != RMM_OFF_NULL && cfi_physical->basepath != RMM_OFF_NULL) { + rmm_hash_set(rmm, sp_per_ct_physical->cached_files_info_by_path, cfi_physical->basepath, APR_HASH_KEY_STRING, RMM_OFF_NULL); + } + + // Update the tail entry if this was the tail entry + if (cached_file_info == sp_per_ct_physical->tail_file_info) { + sp_per_ct_physical->tail_file_info = cfi_physical->prev; + } + + // Remove the entry from the (double-linked) list + if (cfi_physical->next != RMM_OFF_NULL) { + APR_RMM_ADDR_GET(cached_files_info_t, rmm, cfi_physical->next)->prev = cfi_physical->prev; + } + if (cfi_physical->prev != RMM_OFF_NULL) { + APR_RMM_ADDR_GET(cached_files_info_t, rmm, cfi_physical->prev)->next = cfi_physical->next; + } + else { + sp_per_ct_physical->cached_files_info = cfi_physical->next; + } + + clear_rmm_field(rmm, &cfi_physical->basepath); + clear_rmm_field(rmm, &cfi_physical->uri); + apr_rmm_free(rmm, cached_file_info); +} + +/** + * Verify if the cached file contains a vary header. If yes, then match the headers in the request with + * the corresponding headers in the cached page. + * Returns true if there is no vary header or if the vary headers match correctly + * TODO: refine the logic to match the header values. According to the RFC, the comparison may + * ignore white-space characters in the header values (accordingly to the BNF/syntax of that specific header...). + * At the moment, the header values are compared literally, so in theory, this comparison is too restrictive. + */ +static int match_vary_headers(similar_page_cache_t *sp_cache, request_rec *r, RMM_OFF_T(vary_headers_t)vary_headers) +{ + if (vary_headers == RMM_OFF_NULL) { + return 1; // The cached page did not specify vary header, so the new request matches by definition + } + apr_rmm_t *rmm = sp_cache->rmm; + while (vary_headers != RMM_OFF_NULL) { + vary_headers_t *vary_headers_physical = APR_RMM_ADDR_GET(vary_headers_t, rmm, vary_headers); + const char *headername = APR_RMM_ADDR_GET(char, rmm, vary_headers_physical->name); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Comparing vary header %s", headername); + if (strcmp(headername, "*") == 0) { + // The special 'header name' * signifies that the server always varies stuff in an undisclosed manner. + // The similar page matching will probably yield bad results. Ignore this page. + return 0; + } + const char *cached_headervalue = (vary_headers_physical->value == RMM_OFF_NULL) ? + NULL : APR_RMM_ADDR_GET(char, rmm, vary_headers_physical->value); + const char *req_headervalue = apr_table_get(r->headers_in, headername); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Cached value: %s, request value: %s", + cached_headervalue, req_headervalue); + if (req_headervalue == NULL && cached_headervalue != NULL) { + return 0; // Expecting a value but did not get one + } + if (req_headervalue != NULL && cached_headervalue == NULL) { + return 0; // Expecting empty header but got a value + } + if (req_headervalue != NULL && strcmp(req_headervalue, cached_headervalue) != 0) { + return 0; // The new and old header value differ + } + vary_headers = vary_headers_physical->next; + } + return 1; // All vary headers are the same +} + +/** + * Try to open the file indicated in cfi_physical structure + * Returns APR_SUCCESS if the file was successfully opened, in which case the dobj structure + * will have been properly updated. + * Returns other error codes in case of problems. + * WARNING: When the file no longer exists, the structure cfi_physical will be deleted from memory and + * from the linked-list. It means that the caller should evaluate cfi_physical->next *before* invoking + * this function. + */ +static apr_status_t open_cached_file(disk_cache_object_t *dobj, request_rec *r, + similar_page_cache_t *sp_cache, sp_per_content_type_t *sp_per_ct_physical, + RMM_OFF_T(cached_files_info_t) cached_file_info) +{ + apr_rmm_t *rmm = sp_cache->rmm; + cached_files_info_t *cfi_physical = APR_RMM_ADDR_GET(cached_files_info_t, sp_cache->rmm, cached_file_info); + const char *fullpath = apr_pstrcat(r->pool, sp_cache->cache_root, "/", + APR_RMM_ADDR_GET(char, rmm, cfi_physical->basepath), CACHE_DATA_SUFFIX, NULL); + int flags = APR_READ|APR_BINARY; +#ifdef APR_SENDFILE_ENABLED + flags |= APR_SENDFILE_ENABLED; +#endif + apr_status_t rc = apr_file_open(&dobj->fd, fullpath, flags, 0, r->pool); + if (rc == APR_SUCCESS) + { + // Successfully opened the file. Try to obtain the file-size and return the completed dobj + // to the caller + apr_finfo_t finfo; + rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd); + if (rc == APR_SUCCESS) { + dobj->file_size = finfo.size; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Basing CRCSYNC/delta-http for requested URL on cached page for URL %s of size %" APR_SIZE_T_FMT, + APR_RMM_ADDR_GET(char, sp_cache->rmm, cfi_physical->uri), dobj->file_size); + return APR_SUCCESS; + } + // Could not obtain file info for a mysterious reason. Skip this file. + apr_file_close(dobj->fd); + } + else + { + // Apparently the cached file is no longer there. Maybe it got cleaned by htcacheclean? + if (is_lock_available(sp_cache)) { + // Remove the entry. But only if this process could obtain the semaphore... + free_cached_files_info(rmm, sp_per_ct_physical, cached_file_info); + } + } + return rc; // Could not open file or obtain file-info for whatever reason. +} + +/** + * Critical section of the code to find similar pages. While this code is in progress, no updates to the data + * structures may happen by other threads/processes, like by function 'update_or_add_similar_page(...), which is invoked + * when a new file has been saved to the disk cache. + * + * Please note that this function itself can update the 'free-pages' list if the code discovers that the data + * structure is referencing a file that no longer exists. Apart from that update-block, the code is fully re-entrant. + * With other words: multiple requests can enter this code concurrently, as long as they don't update the 'free-pages' + * list and as long as it does not happen concurrently with the 'update_or_add_similar_page(...) function + * + * At the moment, the code block that updates the 'free-pages' list checks if a lock could be obtained. If no lock could + * be obtained, it does not update the list. It only updates the list if a lock could be obtained. + * + * The locking is currently rather coarse grained: when locks are available, the (global mutex) makes sure that the access + * to this function and to the 'update_or_add_similar_page(...) function is exclusive. On the other hand, when the + * global mutex could not be initialized and as such is not available, the 'update_or_add_similar_page(...) function + * is disabled and only the 'find-similar-page' function works, for data that got loaded during the server startup. + * + * In order to increase the scalability, a more fine-grained locking could be implemented by carefully assessing which + * parts of the 'update_or_add_similar_page(...) function conflict with data structures used by this 'find_similar_page' + * function and then adding the appropriate locks where required. + */ +static apr_status_t find_similar_page_cs(disk_cache_object_t *dobj, request_rec *r, similar_page_cache_t *sp_cache, const char *host) +{ + apr_rmm_t *rmm = sp_cache->rmm; + RMM_OFF_T(sp_per_regex_t) sp_per_regex = rmm_hash_get(rmm, sp_cache->similar_pages_per_host, host, APR_HASH_KEY_STRING); + while (sp_per_regex != RMM_OFF_NULL) + { + sp_per_regex_t *sp_per_regex_physical = APR_RMM_ADDR_GET(sp_per_regex_t, rmm, sp_per_regex); + if (fsp_regex_match(r, APR_RMM_ADDR_GET(char, rmm, sp_per_regex_physical->regex), r->unparsed_uri, sp_cache)) + { + // Found the largest matching regex. Find a group of pages with an appropriate content type + RMM_OFF_T(sp_per_content_type_t) sp_per_ct = sp_per_regex_physical->similar_pages_per_content_type; + while (sp_per_ct != RMM_OFF_NULL) + { + sp_per_content_type_t *sp_per_ct_physical = APR_RMM_ADDR_GET(sp_per_content_type_t, rmm, sp_per_ct); + if (fsp_accept_matches_content_type(sp_cache, r, sp_per_ct_physical->content_type)) + { + // Found list of pages with appropriate content type for the matching regex + // Now try to open a page associated with this regex and content type + RMM_OFF_T(cached_files_info_t) cached_file_info = sp_per_ct_physical->cached_files_info; + while (cached_file_info != RMM_OFF_NULL) + { + cached_files_info_t *cfi_physical = APR_RMM_ADDR_GET(cached_files_info_t, sp_cache->rmm, cached_file_info); + RMM_OFF_T(cached_files_info_t) next_cfi = cfi_physical->next; + if (match_vary_headers(sp_cache, r, cfi_physical->vary_headers)) { + if (open_cached_file(dobj, r, sp_cache, sp_per_ct_physical, cached_file_info) == APR_SUCCESS) { + return APR_SUCCESS; // File successfully opened. Done. + } + } + cached_file_info = next_cfi; + } // while (cached_file_info != RMM_OFF_NULL) + } // if (find_similar_page_accept_matches_content_type(sp_cache, r, sp_per_ct_physical->content_type)) + sp_per_ct = sp_per_ct_physical->next; + } // while (sp_per_ct != RMM_OFF_NULL) + } // if (find_similar_page_regex_match(r, APR_RMM_ADDR_GET(char, rmm, sp_per_regex_physical->regex), r->unparsed_uri, sp_cache)) + sp_per_regex = sp_per_regex_physical->next; + } // while (sp_per_regex != RMM_OFF_NULL) + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Could not find a similar page for the requesed URL"); + return DECLINED; +} + +/** + * Find a page in the cache for an URL that is similar to the requested URL and that can + * fullfill at least one of the expected mime-types indicated in the "Accept" header + * This page can then be used by the CRCCache as basis for the CRCSYNC/Delta-http encoding. + */ +apr_status_t find_similar_page(disk_cache_object_t *dobj, request_rec *r, similar_page_cache_t *sp_cache) +{ + if (!sp_cache->similar_pages_cache_initialized) + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Similar page cache is not initialized"); + return DECLINED; + } + const char *host = apr_table_get(r->headers_in, HOST_HEADER); + if (!host) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "Can't find host header in the request"); + return DECLINED; + } + + apr_status_t findrslt; + if (is_lock_available(sp_cache)) { + apr_status_t lockrslt = apr_global_mutex_lock(sp_cache->fs_cache_lock); + if (lockrslt != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, lockrslt, r->server, "Can't obtain the lock"); + return lockrslt; + } + findrslt = find_similar_page_cs(dobj, r, sp_cache, host); + lockrslt = apr_global_mutex_unlock(sp_cache->fs_cache_lock); + if (lockrslt != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, lockrslt, r->server, "Can't release the lock"); + } + } + else { + findrslt = find_similar_page_cs(dobj, r, sp_cache, host); + } + return findrslt; +} + +/** + * Create info about a cached file + * Returns RMM_OFF_NULL when a memory allocation error has occured. + */ +static RMM_OFF_T(cached_files_info_t) create_cached_files_info(apr_rmm_t *rmm, + const char *basepath, const char *uri, RMM_OFF_T(vary_headers_t) vary_headers) +{ + RMM_OFF_T(cached_files_info_t) cached_files_info = apr_rmm_calloc(rmm, sizeof(cached_files_info_t)); + if (cached_files_info == RMM_OFF_NULL) + { + return cached_files_info; + } + cached_files_info_t *cfi_physical = APR_RMM_ADDR_GET(cached_files_info_t, rmm, cached_files_info); + cfi_physical->basepath = rmm_strdup(rmm, basepath); + cfi_physical->uri = rmm_strdup(rmm, uri); + if (cfi_physical->basepath == RMM_OFF_NULL || cfi_physical->uri == RMM_OFF_NULL) + { + clear_rmm_field(rmm, &cfi_physical->basepath); + clear_rmm_field(rmm, &cfi_physical->uri); + apr_rmm_free(rmm, cached_files_info); + return RMM_OFF_NULL; + } + cfi_physical->prev = RMM_OFF_NULL; + cfi_physical->next = RMM_OFF_NULL; + cfi_physical->vary_headers = vary_headers; + + return cached_files_info; +} + +/* + * Create a 'similar pages per content type' structure for the current basepath, uri and content_type + * Returns NULL when a memory allocation error has occured + */ +static RMM_OFF_T(sp_per_content_type_t) create_sp_per_content_type(apr_rmm_t *rmm, + const char *basepath, const char *uri, const char *content_type, RMM_OFF_T(vary_headers_t)vary_headers) +{ + RMM_OFF_T(sp_per_content_type_t) sp_per_ct = apr_rmm_calloc(rmm, sizeof(sp_per_content_type_t)); + if (sp_per_ct == RMM_OFF_NULL) + { + return RMM_OFF_NULL; // Memory allocation failure! + } + sp_per_content_type_t *sp_per_ct_physical = APR_RMM_ADDR_GET(sp_per_content_type_t, rmm, sp_per_ct); + sp_per_ct_physical->next = RMM_OFF_NULL; + sp_per_ct_physical->content_type = rmm_strdup(rmm, content_type); + if (sp_per_ct_physical->content_type == RMM_OFF_NULL) + { + apr_rmm_free(rmm, sp_per_ct); + return RMM_OFF_NULL; + } + + sp_per_ct_physical->cached_files_info = create_cached_files_info(rmm, basepath, uri, vary_headers); + if (sp_per_ct_physical->cached_files_info == RMM_OFF_NULL) + { + apr_rmm_free(rmm, sp_per_ct_physical->content_type); + apr_rmm_free(rmm, sp_per_ct); + return RMM_OFF_NULL; + } + sp_per_ct_physical->tail_file_info = sp_per_ct_physical->cached_files_info; + + sp_per_ct_physical->cached_files_info_by_path = rmm_hash_make(rmm); + if (sp_per_ct_physical->cached_files_info_by_path == RMM_OFF_NULL) + { + free_cached_files_info(rmm, sp_per_ct_physical, sp_per_ct_physical->cached_files_info); + apr_rmm_free(rmm, sp_per_ct_physical->content_type); + apr_rmm_free(rmm, sp_per_ct); + return RMM_OFF_NULL; + } + // FIXME: rmm_hash_set should be able to return an out-of-memory condition when appropriate so that *this* function can properly handle + // the error condition... + rmm_hash_set(rmm, sp_per_ct_physical->cached_files_info_by_path, + APR_RMM_ADDR_GET(cached_files_info_t, rmm, sp_per_ct_physical->cached_files_info)->basepath, APR_HASH_KEY_STRING, + sp_per_ct_physical->cached_files_info); + + return sp_per_ct; +} + + +/* + * Create a 'similar pages per regex' structure for the current regex, basepath, uri and content_type + * Returns NULL when a memory allocation error has occured + */ +static RMM_OFF_T(sp_per_regex_t) create_sp_per_regex(apr_rmm_t *rmm, + const char *regex, const char *basepath, const char *uri, const char *content_type, RMM_OFF_T(vary_headers_t)vary_headers) +{ + RMM_OFF_T(sp_per_regex_t) sp_per_regex = apr_rmm_calloc(rmm, sizeof(sp_per_regex_t)); + if (sp_per_regex == RMM_OFF_NULL) + { + return RMM_OFF_NULL; // Memory allocation failure! + } + sp_per_regex_t *sp_per_regex_physical = APR_RMM_ADDR_GET(sp_per_regex_t, rmm, sp_per_regex); + sp_per_regex_physical->next = RMM_OFF_NULL; + sp_per_regex_physical->regex_len = strlen(regex); + sp_per_regex_physical->regex = rmm_strdup(rmm, regex); + if (sp_per_regex_physical->regex == RMM_OFF_NULL) + { + apr_rmm_free(rmm, sp_per_regex); + return RMM_OFF_NULL; + } + sp_per_regex_physical->similar_pages_per_content_type = create_sp_per_content_type(rmm, basepath, uri, content_type, vary_headers); + if (sp_per_regex_physical->similar_pages_per_content_type == RMM_OFF_NULL) + { + apr_rmm_free(rmm, sp_per_regex_physical->regex); + apr_rmm_free(rmm, sp_per_regex); + return RMM_OFF_NULL; + } + return sp_per_regex; +} + +/** + * Add a new cached file to the list of cached files for the current content type or update the entry if it + * is already present + * Returns: 1 on memory allocation error + * 0 when all fine + */ +static int add_cached_file_to_content_type(similar_page_cache_t *sp_cache, sp_per_content_type_t *sp_per_ct_physical, + const char *basepath, const char *uri, RMM_OFF_T(vary_headers_t) vary_headers) +{ + apr_rmm_t *rmm = sp_cache->rmm; + RMM_OFF_T(cached_files_info_t) cached_file_info; + cached_files_info_t *cfi_physical; + + // Make the cached_file_info record + cached_file_info = create_cached_files_info(rmm, basepath, uri, vary_headers); + if (cached_file_info == RMM_OFF_NULL) { + return 1; // Could not allocate memory. Can't store the info. + } + cfi_physical = APR_RMM_ADDR_GET(cached_files_info_t, rmm, cached_file_info); + + // Insert the new entry at the head of the list + cfi_physical->next = sp_per_ct_physical->cached_files_info; + if (cfi_physical->next != RMM_OFF_NULL) { + // There was already something in the list. Make the old head entry point back to + // this new head entry + APR_RMM_ADDR_GET(cached_files_info_t, rmm, cfi_physical->next)->prev = cached_file_info; + } + else { + // The list was empty. This new entry is now by definition a tail entry + sp_per_ct_physical->tail_file_info = cached_file_info; + } + sp_per_ct_physical->cached_files_info = cached_file_info; + + // Remove old version of the page (if it exists) from the list + RMM_OFF_T(cached_files_info_t) old_cached_file = rmm_hash_get(rmm, + sp_per_ct_physical->cached_files_info_by_path, + basepath, APR_HASH_KEY_STRING); + if (old_cached_file != RMM_OFF_NULL) { + free_cached_files_info(rmm, sp_per_ct_physical, old_cached_file); + } + + // Add the new version to the reverse index + // FIXME: deal with failure of rmm_hash_set (once rmm_hash_set has been fixed to return an out-of-memory condition + // when appropriate + rmm_hash_set(rmm, sp_per_ct_physical->cached_files_info_by_path, cfi_physical->basepath, APR_HASH_KEY_STRING, cached_file_info); + + if (rmm_hash_count(rmm, sp_per_ct_physical->cached_files_info_by_path) > 40 /* TODO: make this threshold configurable */) + { + // Only maintain info about the (40) most recently cached pages per host per regex per content-type + // The chance that all of them point to meanwhile deleted/obsolete files is very small, considering + // the fact that each freshly cached file gets inserted at the head of the list, so it does not make + // much sense to fill-up the memory with a longer list. + free_cached_files_info(rmm, sp_per_ct_physical, sp_per_ct_physical->tail_file_info); + } + + return 0; // Cached file info successfully added +} + +/** + * Add a new cached file to the list of cached files for the current regular expression or update the page if it + * is already present + * Returns: 1 on memory allocation error + * 0 when all fine + */ +static int add_cached_file_to_regex(similar_page_cache_t *sp_cache, sp_per_regex_t *sp_per_regex_physical, + const char *basepath, const char *uri, const char *content_type, RMM_OFF_T(vary_headers_t)vary_headers) +{ + RMM_OFF_T(sp_per_content_type_t) sp_per_ct; + apr_rmm_t *rmm = sp_cache->rmm; + sp_per_ct = sp_per_regex_physical->similar_pages_per_content_type; + while (sp_per_ct != RMM_OFF_NULL) { + sp_per_content_type_t *sp_per_ct_physical = APR_RMM_ADDR_GET(sp_per_content_type_t, rmm, sp_per_ct); + if (!strcmp(content_type, APR_RMM_ADDR_GET(char, rmm, sp_per_ct_physical->content_type))) { + // Found the correct entry. Add or update the page here + return add_cached_file_to_content_type(sp_cache, sp_per_ct_physical, basepath, uri, vary_headers); + } + sp_per_ct = sp_per_ct_physical->next; + } + // There is nothing yet for this content type. Add it to the list + sp_per_ct = create_sp_per_content_type(rmm, basepath, uri, content_type, vary_headers); + if (sp_per_ct == RMM_OFF_NULL) { + return 1; + } + // Add it to the head of the list + APR_RMM_ADDR_GET(sp_per_content_type_t, rmm, sp_per_ct)->next = sp_per_regex_physical->similar_pages_per_content_type; + sp_per_regex_physical->similar_pages_per_content_type = sp_per_ct; + return 0; +} + + +/** + * Add a new page to the list of similar pages for current host or update an existing page + * Returns: 1 on memory allocation error + * 0 when all fine + */ +static int add_similar_pages_info(similar_page_cache_t *sp_cache, RMM_OFF_T(sp_per_regex_t) *sp_per_regex_p, + const char *regex, const char *basepath, const char *uri, const char *content_type, RMM_OFF_T(vary_headers_t)vary_headers) +{ + apr_rmm_t *rmm = sp_cache->rmm; + size_t regex_len = strlen(regex); + while (1) + { + RMM_OFF_T(sp_per_regex_t) curr_sp_per_regex = *sp_per_regex_p; + sp_per_regex_t *sp_per_regex_physical = APR_RMM_ADDR_GET(sp_per_regex_t, rmm, curr_sp_per_regex); + if (regex_len == sp_per_regex_physical->regex_len && strcmp(regex, APR_RMM_ADDR_GET(char, rmm, sp_per_regex_physical->regex))==0) + { + // Found a perfect match. Add or update the page to the head of the current pages list + return add_cached_file_to_regex(sp_cache, sp_per_regex_physical, basepath, uri, content_type, vary_headers); + } + else + { + if (regex_len > sp_per_regex_physical->regex_len ) + { + // No matching regex found that is longer then the current regex. + // Insert the new entry here in the list, so that the list remains sorted in descending order on regex_len + RMM_OFF_T(sp_per_regex_t) new_sp_per_regex = create_sp_per_regex(rmm, regex, basepath, uri, content_type, vary_headers); + if (new_sp_per_regex == RMM_OFF_NULL) + { + return 1; // Out of memory condition occurred + } + APR_RMM_ADDR_GET(sp_per_regex_t, rmm, new_sp_per_regex)->next = curr_sp_per_regex; + *sp_per_regex_p = new_sp_per_regex; + return 0; // New page succesfully inserted + } + else + { + if (sp_per_regex_physical->next == RMM_OFF_NULL) + { + // Reached tail of the list. The new regex is shorter then any of the existing ones + // Insert new entry to the end of the list + RMM_OFF_T(sp_per_regex_t) new_sp_per_regex = create_sp_per_regex(rmm, regex, basepath, uri, content_type, vary_headers); + if (new_sp_per_regex == RMM_OFF_NULL) + { + return 1; // Out of memory condition occurred + } + sp_per_regex_physical->next = new_sp_per_regex; + return 0; // New page succesfully inserted + } + // Evaluate the next entry + sp_per_regex_p = &sp_per_regex_physical->next; + } + } + } + return 0; +} + +/** + * Add (or update) a cached page to the 'similar pages' cache + * Returns: 1 on memory allocation error + * 0 when all fine + * The invoking function may want to log a warning in case of memory + * allocation error so that the system administrator can tune the cache + * parameters if this happens too often + */ +static int add_cached_page(similar_page_cache_t *sp_cache, const char *regex, const char *host, + const char *basepath, const char *uri, const char *content_type, RMM_OFF_T(vary_headers_t)vary_headers) +{ + apr_rmm_t *rmm = sp_cache->rmm; + RMM_OFF_T(sp_per_regex_t) sp_per_regex = rmm_hash_get(rmm, sp_cache->similar_pages_per_host, host, APR_HASH_KEY_STRING); + if (sp_per_regex == RMM_OFF_NULL) + { + // There is no info yet for the current host. Make the first entry. + RMM_OFF_T(char) host_offset = rmm_strdup(rmm, host); + if (host_offset == RMM_OFF_NULL) { + return 1; // Could not allocate memory + } + sp_per_regex = create_sp_per_regex(rmm, regex, basepath, uri, content_type, vary_headers); + if (sp_per_regex == RMM_OFF_NULL) { + apr_rmm_free(rmm, host_offset); + return 1; // Could not allocate memory! + } + rmm_hash_set(rmm, sp_cache->similar_pages_per_host, host_offset, APR_HASH_KEY_STRING, sp_per_regex); + return 0; // All fine + } + else + { + // The current entry already contains similar pages info. Add new or updated page to the list + int rslt = add_similar_pages_info(sp_cache, &sp_per_regex, regex, basepath, uri, content_type, vary_headers); + return rslt; + } +} + +/** + * Allocate and initialze an empty similar page cache + */ +static apr_status_t similar_page_cache_init(apr_pool_t *pool, server_rec *s, similar_page_cache_t *sp_cache) +{ +#if APR_HAS_SHARED_MEMORY + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "APR supports shared memory"); + apr_status_t result; + apr_size_t requested_size; + apr_size_t retrieved_size; + + if (sp_cache->cache_file) { + /* Remove any existing shm segment with this name. */ + apr_shm_remove(sp_cache->cache_file, pool); + } + + requested_size = APR_ALIGN_DEFAULT(sp_cache->cache_bytes); + result = apr_shm_create(&sp_cache->shm, requested_size, sp_cache->cache_file, pool); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, + "Unable to obtain %" APR_SIZE_T_FMT " bytes shared memory", requested_size); + return result; + } + + /* Determine the usable size of the shm segment. */ + retrieved_size = apr_shm_size_get(sp_cache->shm); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, + "Requested %" APR_SIZE_T_FMT " bytes shared memory, retrieved %" APR_SIZE_T_FMT " bytes", + requested_size, retrieved_size); + + /* This will create a rmm "handler" to get into the shared memory area */ + result = apr_rmm_init(&sp_cache->rmm, NULL, + apr_shm_baseaddr_get(sp_cache->shm), retrieved_size, + pool); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, "Unable to initialize rmm handler for (shared) memory"); + return result; + } +#else + void *local_memory = apr_palloc(pool, sp_cache->cache_bytes); + if (local_memory == NULL) + { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, + "Unable to obtain %" APR_SIZE_T_FMT " bytes of memory", requested_size); + } + + /* This will create a rmm "handler" to get into the memory area */ + result = apr_rmm_init(&sp_cache->rmm, NULL, + local_memory, sp_cache->cache_bytes, + pool); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, "Unable to initialize rmm handler for (shared) memory"); + return result; + } + +#endif + + apr_pool_cleanup_register(pool, sp_cache, similar_page_cache_kill, apr_pool_cleanup_null); + + sp_cache->similar_pages_per_host = rmm_hash_make(sp_cache->rmm); + if (sp_cache->similar_pages_per_host == RMM_OFF_NULL) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, s, "Unable to allocate memory for similar pages info cache"); + return APR_EGENERAL; + } + + sp_cache->lock_is_available = apr_rmm_calloc(sp_cache->rmm, sizeof(int)); + if (sp_cache->lock_is_available == RMM_OFF_NULL) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, s, "Unable to allocate memory for similar pages info cache"); + return APR_EGENERAL; + } + + sp_cache->vary_headers_cache = rmm_hash_make(sp_cache->rmm); + if (sp_cache->vary_headers_cache == RMM_OFF_NULL) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, s, "Unable to allocate memory for similar pages info cache"); + return APR_EGENERAL; + } + + return APR_SUCCESS; +} + +static apr_status_t make_vary_headers(apr_pool_t *p, server_rec *s, similar_page_cache_t *sp_cache, + apr_table_t *req_hdrs, apr_table_t *resp_hdrs, RMM_OFF_T(vary_headers_t) *vary_headers_p) +{ + *vary_headers_p = RMM_OFF_NULL; + apr_rmm_t *rmm = sp_cache->rmm; + const char *vary = apr_table_get(resp_hdrs, VARY_HEADER); + if (vary != NULL) + { + char *headername; + char *vary_cache_key = ""; + char *separator=""; + while ((headername = ap_get_token(p, &vary, 1)) != NULL && strlen(headername) != 0) + { + // Ignore 'Accept-Encoding' vary header; we transform anything anyway to identity coding before storing it in the cache + // so it does not matter what the server has done with respect to the content-encoding. + if (strcmp(headername, ACCEPT_ENCODING_HEADER) != 0) { + vary_cache_key = apr_pstrcat(p, vary_cache_key, separator, headername, "=", apr_table_get(req_hdrs, headername), NULL); + separator=", "; + } + } + if (*vary_cache_key == 0) { + // Apparently the content only varies based on the 'Accept-Encoding', which we ignore. + return APR_SUCCESS; + } + RMM_OFF_T(vary_headers_t) vary_headers = rmm_hash_get(sp_cache->rmm, sp_cache->vary_headers_cache, vary_cache_key, APR_HASH_KEY_STRING); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Vary cache key: %s, found in cache?: %s", + vary_cache_key, (vary_headers == RMM_OFF_NULL) ? "No" : "Yes"); + if (vary_headers == RMM_OFF_NULL) { + // This vary headers combination is not yet cached. Make the structure and cache it + vary = apr_table_get(resp_hdrs, VARY_HEADER); // Get again the vary header + while ((headername = ap_get_token(p, &vary, 1)) != NULL && strlen(headername) != 0) + { + // Ignore 'Accept-Encoding' vary header; we transform anything anyway to identity coding before storing it in the cache + // so it does not matter what the server has done with respect to the content-encoding. + if (strcmp(headername, ACCEPT_ENCODING_HEADER) != 0) { + // Allocate the new entry + RMM_OFF_T(vary_headers_t) new_vary_header = apr_rmm_malloc(rmm, sizeof(vary_headers_t)); + if (new_vary_header == RMM_OFF_NULL) { + return 1; // Could not allocate memory + } + vary_headers_t *new_vh_physical = APR_RMM_ADDR_GET(vary_headers_t, rmm, new_vary_header); + + // Put the new vary header at the head of the list of entries + new_vh_physical->next = vary_headers; + vary_headers = new_vary_header; + + if ((new_vh_physical->name = rmm_strdup(rmm, headername)) == RMM_OFF_NULL) { + return 1; + } + + new_vh_physical->value = RMM_OFF_NULL; + const char *value = apr_table_get(req_hdrs, headername); + if (value != NULL) + { + if ((new_vh_physical->value = rmm_strdup(rmm, value)) == RMM_OFF_NULL) { + return 1; + } + } + } + } + rmm_hash_set(sp_cache->rmm, sp_cache->vary_headers_cache, rmm_strdup(rmm, vary_cache_key), APR_HASH_KEY_STRING, vary_headers); + + } + *vary_headers_p = vary_headers; + } + return APR_SUCCESS; +} + +/** + * Load the info from the file-cache into the 'find similar page' cache + */ +static apr_status_t similar_page_cache_load(apr_pool_t *ptemp, server_rec *s, const char *abs_dirname, const char *rel_dirname, similar_page_cache_t *sp_cache) +{ + apr_status_t result; + apr_dir_t *dirinfo; // structure for referencing directories + apr_finfo_t fileinfo; // file information structure + + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Opening directory %s", abs_dirname); + result = apr_dir_open(&dirinfo, abs_dirname, ptemp); + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Result: %d", result); + if (result != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, "Unable to open directory %s", abs_dirname); + return result; + } + while (apr_dir_read(&fileinfo, 0, dirinfo) == APR_SUCCESS) + { + if (!strcmp(fileinfo.name, ".") || !strcmp(fileinfo.name, "..")) + { + // Do not recursively go into current or parent directory! + continue; + } + if (fileinfo.filetype == APR_DIR) + { + const char *sub_abs_dirname = apr_pstrcat(ptemp, abs_dirname, "/", fileinfo.name, NULL); + const char *sub_rel_dirname = (*rel_dirname == 0) ? apr_pstrdup(ptemp, fileinfo.name) : + apr_pstrcat(ptemp, rel_dirname, "/", fileinfo.name, NULL); + if (similar_page_cache_load(ptemp, s, sub_abs_dirname, sub_rel_dirname, sp_cache) != APR_SUCCESS) + { + continue; // skip this sub directory and process the next one + } + } + else if (fileinfo.filetype == APR_REG) + { + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "About to open file: %s", fileinfo.name); + if (strstr(fileinfo.name, CACHE_HEADER_SUFFIX) != NULL) + { + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Its a header file"); + // Build the key (basepath) for the cache. + // It consists of the relative path name exluding the .header extension + char *basepath = apr_pstrdup(ptemp, fileinfo.name); + *strstr(basepath, CACHE_HEADER_SUFFIX)=0; + basepath = apr_pstrcat(ptemp, rel_dirname, "/", basepath, NULL); + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Basepath: %s", basepath); + + char *full_filepath = apr_pstrcat(ptemp, abs_dirname, "/", fileinfo.name, NULL); + // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Full_filepath: %s", full_filepath); + + apr_file_t *fd; + result = apr_file_open(&fd, full_filepath, APR_READ|APR_BINARY|APR_BUFFERED, 0, ptemp); + if (result != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, "Failed to open file %s", full_filepath); + continue; // Skip this file + } + + apr_uint32_t format; + apr_size_t len; + + /* Read and evaluate the format from the cache file */ + len = sizeof(format); + apr_file_read_full(fd, &format, len, &len); + if (format == VARY_FORMAT_VERSION) { + // TODO: Smartly handle "vary" header files. But skip them for the time being. + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Skipping vary header file %s", full_filepath); + apr_file_close(fd); + continue; // Skip this file + } + if (format != DISK_FORMAT_VERSION) { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, s, + "File %s has a version mismatch. File had version %d, but expected version is %d", + full_filepath, format, DISK_FORMAT_VERSION); + apr_file_close(fd); + continue; // Skip this file + } + // Format OK, rewind to file begin + apr_off_t offset = 0; + apr_file_seek(fd, APR_SET, &offset); + + // Read metadata from file + cache_object_t *obj = apr_pcalloc(ptemp, sizeof(cache_object_t));; + disk_cache_object_t *dobj = apr_pcalloc(ptemp, sizeof(disk_cache_object_t));; + cache_info_t *cache_info = &(obj->info); + result = file_cache_recall_mydata(ptemp, fd, cache_info, dobj, 0); + if (result != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, + "Problem encountered reading meta data from %s", full_filepath); + apr_file_close(fd); + continue; // Skip this file + } + + // Read request and response headers + apr_table_t *req_hdrs = apr_table_make(ptemp, 20); + apr_table_t *resp_hdrs = apr_table_make(ptemp, 20); + result = read_table(s, resp_hdrs, fd); + if (result != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, "Failed to read response headers from file %s", full_filepath); + apr_file_close(fd); + continue; // Skip this file + } + result = read_table(s, req_hdrs, fd); + apr_file_close(fd); + if (result != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, "Failed to read request headers from file %s", full_filepath); + continue; // Skip this file + } + + // Add file to 'similar pages' cache if host, crcsync_similar and content_type headers are present + const char *hostname = apr_table_get(req_hdrs, HOST_HEADER); + const char *crcsync_similar = apr_table_get(resp_hdrs, CRCSYNC_SIMILAR_HEADER); + const char *content_type = apr_table_get(resp_hdrs, CONTENT_TYPE_HEADER); + if (hostname != NULL && crcsync_similar != NULL && content_type != NULL) + { + RMM_OFF_T(vary_headers_t) vary_headers; + result = make_vary_headers(ptemp, s, sp_cache, req_hdrs, resp_hdrs, &vary_headers); + if (result != 0) { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, s, + "Could not allocate memory to cache vary headers"); + continue; // Skip this file + } + result = add_cached_page(sp_cache, crcsync_similar, hostname, basepath, cache_info->uri, content_type, vary_headers); + if (result == 0) + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, + "Successfully added file %s to 'find similar page' cache (host: %s, content-type: %s, regex: %s, uri: %s)", + basepath, hostname, content_type, crcsync_similar, cache_info->uri); + } + else + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, s, + "Failed to add file %s with regex %s for host %s, content-type %s, uri %s to 'find similar page' cache, result: %d", + basepath, crcsync_similar, hostname, content_type, cache_info->uri, result); + } + } + } + } + else + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, s, "Unknown file type %d for file %s/%s", + fileinfo.filetype, abs_dirname, fileinfo.name); + } + } + + apr_dir_close(dirinfo); + return APR_SUCCESS; +} + +const char *crccache_client_fsp_set_cache_bytes(cmd_parms *parms, void *in_struct_ptr, + const char *arg, similar_page_cache_t *sp_cache) +{ + apr_size_t val = atol(arg); + if (val < 0) + return "CRCClientSharedCacheSize value must be an integer greater than or equal to 0"; + sp_cache->cache_bytes = val; + return NULL; + +} + +similar_page_cache_t *create_similar_page_cache(apr_pool_t *p) +{ + similar_page_cache_t *sp_cache = apr_pcalloc(p, sizeof(similar_page_cache_t)); + if (sp_cache != NULL) { + sp_cache->cache_bytes = 10*1024*1024; // Default to 10 MB + } + return sp_cache; +} + +static void create_global_mutex(similar_page_cache_t *sp_cache, apr_pool_t *p, apr_pool_t *ptemp, server_rec *s) +{ + apr_status_t result; + result = apr_global_mutex_create(&sp_cache->fs_cache_lock, + sp_cache->lock_file, APR_LOCK_DEFAULT, + p); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, + "Failed to allocate mutex on vhost %s. Similar page cache will only be loaded on start-up but not maintained for new pages cached while the server is running", + format_hostinfo(ptemp, s)); + sp_cache->fs_cache_lock = NULL; + return; + } + +#ifdef AP_NEED_SET_MUTEX_PERMS + result = unixd_set_global_mutex_perms(sp_cache->fs_cache_lock); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, result, s, + "Failed to set mutex permissions on vhost %s. Similar page cache will only be loaded on start-up but not maintained for new pages cached while the server is running", + format_hostinfo(ptemp, s)); + apr_global_mutex_destroy(sp_cache->fs_cache_lock); + sp_cache->fs_cache_lock = NULL; + return; + } +#endif + + // Lock is available for all threads/subprocesses + *APR_RMM_ADDR_GET(int, sp_cache->rmm, sp_cache->lock_is_available)=1; +} + +int crccache_client_fsp_post_config_per_virtual_host(apr_pool_t *p, apr_pool_t *plog, + apr_pool_t *ptemp, server_rec *s, similar_page_cache_t *sp_cache, const char *cache_root) +{ + apr_status_t result; + + /* + * Set-up the shared memory block and the mutex for the 'find similar page' memory cache + */ + + // Need to know the CacheRootClient value in order to make the SHM + // cache backing file and the mutex lock backing file + + const char *cache_file_tmp = apr_pstrcat(ptemp, cache_root, "/crccache_client_shm", NULL); + const char *lock_file_tmp = apr_pstrcat(ptemp, cache_file_tmp, ".lck", NULL); + void *data; + const char *userdata_key = apr_pstrcat(p, "crccache_client_init:", cache_root, NULL); + + /* util_crccache_client_post_config() will be called twice. Don't bother + * going through all of the initialization on the first call + * because it will just be thrown away.*/ + apr_pool_userdata_get(&data, userdata_key, s->process->pool); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s,"vhost %s, data=%s", + format_hostinfo(ptemp, s), + data == NULL ? "null" : "not null"); + if (!data) { + // This code-block is only executed on first invocation of post_config + apr_pool_userdata_set((const void *)1, userdata_key, + apr_pool_cleanup_null, s->process->pool); +#if APR_HAS_SHARED_MEMORY + /* If the lock file already exists then delete it. Otherwise we are + * going to run into problems creating the shared memory mutex. */ + if (lock_file_tmp) { + apr_file_remove(lock_file_tmp, ptemp); + } +#endif + return OK; + + } + + // Below code-block is only executed on second invocation of post_config + sp_cache->cache_root = cache_root; + sp_cache->cache_root_len = strlen(cache_root); + sp_cache->cache_file = apr_pstrdup(p, cache_file_tmp); + sp_cache->lock_file = apr_pstrdup(p, lock_file_tmp); + +#if APR_HAS_SHARED_MEMORY + /* initializing cache if we don't have shm address + */ + if (!sp_cache->shm) { +#endif + /* initializing cache if shared memory size or entries is not zero + */ + if (sp_cache->cache_bytes > 0) { + result = similar_page_cache_init(p, s, sp_cache); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, + "Could not initialize in-memory cache to efficiently find similar pages on vhost %s. Find similar page functionality is disabled", + format_hostinfo(ptemp, s)); + return DONE; + } + + create_global_mutex(sp_cache, p, ptemp, s); + + result = similar_page_cache_load(ptemp, s, sp_cache->cache_root, "", sp_cache); + if (result != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, + "Failed to load data into in-memory cache to efficiently find similar pages on vhost %s. Find similar page functionality is disabled", + format_hostinfo(ptemp, s)); + return result; + } + + sp_cache->similar_pages_regexs = apr_hash_make(p); // Set-up cache for compiled regular expressions for similar page lookup + sp_cache->similar_pages_cache_initialized = 1; // Similar page cache has finally been successfully set-up and is ready to be used + + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, + "Successfully initialized shared memory cache for this context (%s)", + format_hostinfo(ptemp, s)); + } + else { + ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, s, + "CRCCacheClientSharedCacheSize is zero on vhost %s. Find similar page functionality is disabled", + format_hostinfo(ptemp, s)); + } +#if APR_HAS_SHARED_MEMORY + } + else + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, + "vhost (%s): Weird. Shared memory cache is already initialized for this context", + format_hostinfo(ptemp, s)); + } +#endif + return OK; +} + +void crccache_client_fsp_child_init_per_virtual_host(apr_pool_t *p, server_rec *s, similar_page_cache_t *sp_cache) +{ + apr_status_t sts; + + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, + "mod_crccache_client.child_init_per_vhost (%s): cache_lock: %s", + format_hostinfo(p, s), + sp_cache->fs_cache_lock ? "defined" : "empty"); + + if (sp_cache->fs_cache_lock) + { + sts = apr_global_mutex_child_init(&sp_cache->fs_cache_lock, + sp_cache->lock_file, p); + if (sts != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, sts, s, + "Failed to initialise global mutex %s in child process %" APR_PID_T_FMT ". The similar page cache will not be maintained for newly cached pages", + sp_cache->lock_file, getpid()); + sp_cache->fs_cache_lock = NULL; // Disable the global mutex in this child process + *APR_RMM_ADDR_GET(int, sp_cache->rmm, sp_cache->lock_is_available) = 0; // Disable global mutex in all child processes + } + else + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, sts, s, + "Successfully initialized global mutex %s in child process %" APR_PID_T_FMT ".", + sp_cache->lock_file, getpid()); + } + } +} + +void update_or_add_similar_page(disk_cache_object_t *dobj, request_rec *r, similar_page_cache_t *sp_cache) +{ + if (!is_lock_available(sp_cache)) { + return; // Lock is not available. Can't start doing updates + } + + if (strlen(dobj->hdrsfile)+1 < sp_cache->cache_root_len || + memcmp(dobj->hdrsfile, sp_cache->cache_root, sp_cache->cache_root_len) || + dobj->hdrsfile[sp_cache->cache_root_len] != '/') { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_EGENERAL, r->server, + "FIXME: Header file name %s does not start with cache root path %s while it should", + dobj->hdrsfile, sp_cache->cache_root); + return; + } + char *basepath = apr_pstrdup(r->pool, dobj->hdrsfile+sp_cache->cache_root_len+1); + apr_size_t suffix_len=strlen(CACHE_HEADER_SUFFIX); + apr_size_t basepath_len = strlen(basepath); + if (basepath_len < suffix_len || memcmp(basepath+(basepath_len-suffix_len), CACHE_HEADER_SUFFIX, suffix_len)) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_EGENERAL, r->server, + "FIXME: Header file name %s does not end on %s suffix", + dobj->hdrsfile, CACHE_HEADER_SUFFIX); + return; + + } + *(basepath+(basepath_len-suffix_len)) = 0; // Terminate the suffix location + + const char *hostname = apr_table_get(r->headers_in, HOST_HEADER); + const char *crcsync_similar = apr_table_get(r->headers_out, CRCSYNC_SIMILAR_HEADER); + const char *content_type = apr_table_get(r->headers_out, CONTENT_TYPE_HEADER); + if (hostname != NULL && crcsync_similar != NULL && content_type != NULL) + { + apr_status_t lockrslt = apr_global_mutex_lock(sp_cache->fs_cache_lock); + if (lockrslt != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, lockrslt, r->server, "Can't obtain the lock"); + return; + } + RMM_OFF_T(vary_headers_t) vary_headers; + int addrslt = make_vary_headers(r->pool, r->server, sp_cache, r->headers_in, r->headers_out, &vary_headers); + if (addrslt != 0) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, + "Could not allocate memory to cache vary headers"); + } + else + { + addrslt = add_cached_page(sp_cache, crcsync_similar, hostname, basepath, dobj->name, content_type, vary_headers); + if (addrslt == 0) + { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Successfully added file %s to 'find similar page' cache (host: %s, content-type: %s, regex: %s, uri: %s)", + basepath, hostname, content_type, crcsync_similar, dobj->name); + } + else + { + ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, + "Failed to add file %s with regex %s for host %s, content-type %s, uri %s to 'find similar page' cache, result: %d", + basepath, crcsync_similar, hostname, content_type, dobj->name, addrslt); + } + } + lockrslt = apr_global_mutex_unlock(sp_cache->fs_cache_lock); + if (lockrslt != APR_SUCCESS) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, lockrslt, r->server, "Can't release the lock"); + } + } +} diff --git a/crccache/mod_crccache_client_find_similar.h b/crccache/mod_crccache_client_find_similar.h new file mode 100644 index 000000000..244869363 --- /dev/null +++ b/crccache/mod_crccache_client_find_similar.h @@ -0,0 +1,34 @@ +/* + * mod_crccache_client.h + * + * Created on: 15/03/2009 + * Author: awulms + */ +#ifndef MOD_CRCCACHE_CLIENT_FIND_SIMILAR_H +#define MOD_CRCCACHE_CLIENT_FIND_SIMILAR_H + +#include "cache/cache.h" +#include +#include +#include +#include +#include +#include + +typedef struct similar_page_cache_s similar_page_cache_t; + +similar_page_cache_t *create_similar_page_cache(apr_pool_t *p); + +const char *crccache_client_fsp_set_cache_bytes(cmd_parms *parms, void *in_struct_ptr, + const char *arg, similar_page_cache_t *conf); + +int crccache_client_fsp_post_config_per_virtual_host(apr_pool_t *p, apr_pool_t *plog, + apr_pool_t *ptemp, server_rec *s, similar_page_cache_t *conf, const char *cache_root); + +void crccache_client_fsp_child_init_per_virtual_host(apr_pool_t *p, server_rec *s, similar_page_cache_t *conf); + +apr_status_t find_similar_page(disk_cache_object_t *dobj, request_rec *r, similar_page_cache_t *sp_cache); + +void update_or_add_similar_page(disk_cache_object_t *dobj, request_rec *r, similar_page_cache_t *sp_cache); + +#endif /* MOD_CRCCACHE_CLIENT_FIND_SIMILAR_H */ \ No newline at end of file diff --git a/crccache/mod_crccache_server.c b/crccache/mod_crccache_server.c index 91085f192..b3c40a51c 100644 --- a/crccache/mod_crccache_server.c +++ b/crccache/mod_crccache_server.c @@ -40,7 +40,7 @@ #include "util_charset.h" #include -#include "ap_wrapper.h" +#include "ap_log_helper.h" #include "crccache.h" #include "mod_crccache_server.h" @@ -58,6 +58,38 @@ typedef enum { COMPRESSION_ENDED } compression_state_t; +// Private structures +typedef struct encodings_s encodings_t; +struct encodings_s { + encodings_t *next; + const char *encoding; +}; + +typedef struct decoder_modules_s decoder_modules_t; +struct decoder_modules_s { + decoder_modules_t *next; + const char *name; + encodings_t *encodings; +}; + +typedef struct regexs_s regexs_t; +struct regexs_s { + regexs_t *next; + ap_regex_t *preg; + const char *regex; + encodings_t *mime_types; +}; + +/* Static information about the crccache server */ +typedef struct { + int enabled; + decoder_modules_t *decoder_modules; + unsigned decoder_modules_cnt; + regexs_t *regexs; + regexs_t *regexs_tail; +} crccache_server_conf; + + static void *crccache_server_create_config(apr_pool_t *p, server_rec *s) { crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf)); conf->enabled = 0; @@ -123,20 +155,80 @@ static const char *add_crccache_similar_page_regex (cmd_parms *parms, void *in_s regexs->preg = apr_palloc(parms->pool, sizeof(ap_regex_t)); if (regexs == NULL) { - return "Out of memory exception while allocating regexs->preg structure"; + return "Out of memory exception while allocating regexs->preg field"; } - regexs->regex = apr_pstrdup(parms->pool, arg); - if (regexs->regex == NULL) + char *parsed_arg = apr_pstrdup(parms->pool, arg); + if (parsed_arg == NULL) + { + return "Out of memory exception while allocating parsed_args field"; + } + + char *separator = strstr(parsed_arg, ";"); // Find separator between mime-types and regex + if (separator == NULL) { - return "Out of memory exception while allocating regexs->regex parameter"; + return "Can't find ; separator between mime-types and regular expression"; } + *separator++ = 0; // Null-terminate the mime-type(s) part of the string + while (*separator == ' ') + { + separator++; // skip any whitespace before the regex + } + if (*separator == 0) + { + return "Found an empty regular expression after the ; separator"; + } + regexs->regex = separator; // Regex starts here // Compile the regular expression - int rslt = ap_regcomp(regexs->preg, arg, 0); + int rslt = ap_regcomp(regexs->preg, regexs->regex, 0); + if (rslt != 0) + { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, parms->server, + "CRCCACHE-ENCODE ap_regcomp return code: %d for regex %s", rslt, regexs->regex); + return "Failure to compile regular expression. See error log for further details"; + } + + // Now start splitting the mime-types themselves into separate tokens + ap_regex_t *validate_mime_type_regex = apr_palloc(parms->pool, sizeof(ap_regex_t)); + if (validate_mime_type_regex == NULL) + { + return "Out of memory exception while allocationg validate_mime_type_regex structure"; + } + rslt = ap_regcomp(validate_mime_type_regex, + "^((\\*/\\*)|([^]()<>@,;:\\\"/[?.=* ]+/(\\*|[^]()<>@,;:\\\"/[?.=* ]+)))$", 0); if (rslt != 0) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE ap_regcomp return code: %d", rslt); - return "Failure to compile regular expression"; + ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, parms->server, + "CRCCACHE-ENCODE ap_regcomp return code: %d for validate_mime_type_regex", rslt); + return "Failure to compile regular expression. See error log for further details"; } + regexs->mime_types = NULL; + char *last; + char *token = apr_strtok(parsed_arg, ", ", &last); + while (token != NULL) + { + if (ap_regexec(validate_mime_type_regex, token, 0, NULL, AP_REG_ICASE) != 0) + { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, parms->server, + "CRCCACHE-ENCODE ap_regexec returned mismatch for mime-type %s", token); + return "Invalid mime-type format specified. See error log for further details"; + } + encodings_t *mime_type = apr_palloc(parms->pool, sizeof(*mime_type)); + if (mime_type == NULL) + { + return "Out of memory exception while allocationg mime_type structure"; + } + mime_type->next = regexs->mime_types; + regexs->mime_types = mime_type; + // Store the wild-card mime-type (*/*) as a NULL pointer so that it can be quickly recognized + mime_type->encoding = (strcmp(token, "*/*") == 0) ? NULL : token; + token = apr_strtok(NULL, ", ", &last); + } + ap_regfree(validate_mime_type_regex); // Free the memory used by the mime type validation regex. + if (regexs->mime_types == NULL) + { + return "Could not find any mime-types before the ; separator"; + } + // Add regular expression to the tail of the regular expressions list regexs->next = NULL; if (conf->regexs_tail == NULL) @@ -222,7 +314,7 @@ static const command_rec crccache_server_cmds[] = { AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF, "Enable the CRCCache server in this virtual server"), AP_INIT_TAKE1("DecoderModule", set_crccache_decoder_module, NULL, RSRC_CONF, "DecoderModules to decode content-types (e.g. INFLATE:gzip,x-gzip)"), - AP_INIT_ITERATE("AddSimilarPageRegEx", add_crccache_similar_page_regex, NULL, RSRC_CONF, "Regular expression to indicate which pages are similar to each other"), + AP_INIT_ITERATE("AddSimilarPageRegEx", add_crccache_similar_page_regex, NULL, RSRC_CONF, "Regular expression to indicate which pages are similar to each other, per mime-type"), { NULL } }; @@ -241,7 +333,7 @@ int decode_if_block_header(request_rec *r, const char * header, int * version, s int rslt = 0; size_t headerlen = strlen(header); - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE headerlen: %zd", headerlen); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE headerlen: %" APR_SIZE_T_FMT, headerlen); extracted_hashes = malloc(headerlen); if (extracted_hashes == NULL) @@ -628,8 +720,8 @@ static apr_status_t process_block(ap_filter_t *f) ctx->buffer+ctx->buffer_digest_getpos, ctx->buffer_putpos-ctx->buffer_digest_getpos ); - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested, rd_block_rslt); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE crc_read_block ndigested: %" APR_SIZE_T_FMT ", result %ld", ndigested, rd_block_rslt); // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'tail_blocksize' bytes at the end of the buffer, @@ -766,8 +858,8 @@ static apr_status_t process_eos(ap_filter_t *f) unsigned char sha1_value[APR_SHA1_DIGESTSIZE]; apr_sha1_final(sha1_value, &ctx->sha1_ctx); write_hash(f, sha1_value, APR_SHA1_DIGESTSIZE); - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, - "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu) for uri %s",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length, f->r->unparsed_uri); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, + "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%" APR_SIZE_T_FMT " encoded=%" APR_SIZE_T_FMT " original=%" APR_SIZE_T_FMT ") for uri %s",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length, f->r->unparsed_uri); return APR_SUCCESS; } @@ -809,8 +901,8 @@ static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e) if (ctx->buffer_putpos == ctx->buffer_size) { // Buffer is filled to the end. Flush as much as possible - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)", + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %" APR_SIZE_T_FMT ", digest_getpos: %" APR_SIZE_T_FMT ", putpos: %" APR_SIZE_T_FMT ", putpos-digest_getpos: %" APR_SIZE_T_FMT " (tail_block_size: %" APR_SIZE_T_FMT ")", ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size); while (ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size) { @@ -820,8 +912,8 @@ static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e) { return rslt; } - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)", + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Processed a block, read_getpos: %" APR_SIZE_T_FMT ", digest_getpos: %" APR_SIZE_T_FMT ", putpos: %" APR_SIZE_T_FMT ", putpos-digest_getpos: %" APR_SIZE_T_FMT " (tail_block_size: %" APR_SIZE_T_FMT ")", ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size); } @@ -829,8 +921,8 @@ static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e) { // Copy the remaining part of the buffer to the start of the buffer, // so that it can be filled again as new data arrive - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer", + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Moving %" APR_SIZE_T_FMT " bytes to begin of buffer", ctx->buffer_putpos - ctx->buffer_read_getpos); memcpy(ctx->buffer, ctx->buffer + ctx->buffer_read_getpos, ctx->buffer_putpos - ctx->buffer_read_getpos); } @@ -842,8 +934,8 @@ static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e) while (ctx->crc_read_block_result < 0 && ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size) { // Previous block matched exactly. Let's hope the next block as well - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)", + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Previous block matched, read_getpos: %" APR_SIZE_T_FMT ", digest_getpos: %" APR_SIZE_T_FMT ", putpos: %" APR_SIZE_T_FMT ", putpos-digest_getpos: %" APR_SIZE_T_FMT " (tail_block_size: %" APR_SIZE_T_FMT ")", ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size); rslt = process_block(f); if (rslt != APR_SUCCESS) @@ -855,6 +947,48 @@ static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e) return APR_SUCCESS; // Yahoo, all went well } +static int match_content_type(request_rec *r, encodings_t *allowed_mime_types, const char *resp_content_type) +{ + // Response content type consists of mime-type, optionally followed by a ; and parameters + // E.g. text/html; charset=ISO-8859-15 + // An allowed mime-type consists of one of: + // -The NULL pointer to allow any mime-type (*/* in the config file) + // -The string of format type/subtype, which must match exactly with the mime-type + // -The string of format type/*, which means the subtype can be anything + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"resp_content_type: %s", resp_content_type); + while (allowed_mime_types != NULL) + { + const char *allowed_pos = allowed_mime_types->encoding; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"allowed: %s", allowed_pos); + if (allowed_pos == NULL) + { + // User specified wild-card. This matches per definition. + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"matched wildcard"); + return 1; + } + const char *resp_pos = resp_content_type; + char ch_r; + char ch_a; + while ((ch_r = *resp_pos) != 0 && ch_r != ';' && (ch_a = *allowed_pos) != 0 && ch_r == ch_a) { + resp_pos++; + allowed_pos++; + } + ch_a = *allowed_pos; + if (((ch_r == 0 || ch_r == ';') && ch_a == 0) || (ch_r != 0 && ch_a == '*')) + { + // It's OK if the mime-type part of the response content type matches exactly + // with the allowed mime type + // It is also OK if the (mime-type part of the) content-type has still characters + // remaining but the allowed mime-type is at the * wild-card + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"matched specific"); + return 1; + } + allowed_mime_types = allowed_mime_types->next; + } + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"nothing matched"); + return 0; +} + /* * CACHE_OUT filter * ---------------- @@ -907,16 +1041,20 @@ static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) crccache_server_conf *conf = ap_get_module_config(r->server->module_config, &crccache_server_module); regexs_t *regexs = conf->regexs; - // ap_regmatch_t regmatch; - while (regexs != NULL) + const char *content_type = apr_table_get(r->headers_out, CONTENT_TYPE_HEADER); + if (content_type != NULL) { - if (ap_regexec(regexs->preg, r->unparsed_uri, 0, NULL, AP_REG_ICASE) == 0) + while (regexs != NULL) { - // Found a regex to which this page is similar. Store it in the header - apr_table_set(r->headers_out, CRCSYNC_SIMILAR_HEADER, apr_pstrdup(r->pool, regexs->regex)); - break; + if (match_content_type(r, regexs->mime_types, content_type) && + ap_regexec(regexs->preg, r->unparsed_uri, 0, NULL, AP_REG_ICASE) == 0) + { + // Found a regex to which this page is similar. Store it in the header + apr_table_set(r->headers_out, CRCSYNC_SIMILAR_HEADER, apr_pstrdup(r->pool, regexs->regex)); + break; + } + regexs=regexs->next; } - regexs=regexs->next; } if (ctx->global_state == GS_INIT) @@ -1024,7 +1162,7 @@ static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) // TODO: should I pass some apr_palloc based function to prevent memory leaks //in case of unexpected errors? - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx->compression_stream))); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %" APR_SIZE_T_FMT, sizeof(*(ctx->compression_stream))); ctx->compression_stream = apr_palloc(r->pool, sizeof(*(ctx->compression_stream))); ctx->compression_stream->zalloc = Z_NULL; ctx->compression_stream->zfree = Z_NULL; @@ -1126,11 +1264,11 @@ static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) */ apr_bucket_read(e, &data, &len, APR_BLOCK_READ); if (len > 2) - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len,data[0],data[1],data[2]); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Metadata, read %" APR_SIZE_T_FMT ", %d %d %d",len,data[0],data[1],data[2]); else - ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "CRCCACHE-ENCODE Metadata, read %zu",len); + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "CRCCACHE-ENCODE Metadata, read %" APR_SIZE_T_FMT,len); APR_BUCKET_REMOVE(e); APR_BRIGADE_INSERT_TAIL(ctx->bb, e); continue; diff --git a/crccache/mod_crccache_server.h b/crccache/mod_crccache_server.h dissimilarity index 65% index ff78d9f29..1e975b34b 100644 --- a/crccache/mod_crccache_server.h +++ b/crccache/mod_crccache_server.h @@ -1,43 +1,17 @@ -/* - * mod_crccache_server.h - * - * Created on: 15/03/2009 - * Author: awulms - */ - -#ifndef MOD_CRCCACHE_SERVER_H -#define MOD_CRCCACHE_SERVER_H - -#include -#include - -extern module AP_MODULE_DECLARE_DATA crccache_server_module; - -typedef struct encodings_s { - struct encodings_s *next; - const char *encoding; -} encodings_t; - -typedef struct decoder_modules_s { - struct decoder_modules_s *next; - const char *name; - encodings_t *encodings; -} decoder_modules_t; - -typedef struct regexs_s { - struct regexs_s *next; - ap_regex_t *preg; - const char *regex; -} regexs_t; - -/* Static information about the crccache server */ -typedef struct { - int enabled; - decoder_modules_t *decoder_modules; - unsigned decoder_modules_cnt; - regexs_t *regexs; - regexs_t *regexs_tail; -} crccache_server_conf; - -#endif /*MOD_CRCCACHE_SERVER_H*/ - +/* + * mod_crccache_server.h + * + * Created on: 15/03/2009 + * Author: awulms + */ + +#ifndef MOD_CRCCACHE_SERVER_H +#define MOD_CRCCACHE_SERVER_H + +#include +#include + +extern module AP_MODULE_DECLARE_DATA crccache_server_module; + +#endif /*MOD_CRCCACHE_SERVER_H*/ + diff --git a/crccache/rmm_hash.c b/crccache/rmm_hash.c new file mode 100644 index 000000000..c80f224e4 --- /dev/null +++ b/crccache/rmm_hash.c @@ -0,0 +1,531 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * A hash table allocated/maintained in an RMM memory structure, so it can be used + * in shared memory + * + * Based on apr_hash.c + * + * Created on: 02/08/2010 + * Author: Alex Wulms + */ + + +#if APR_HAVE_STDLIB_H +#include +#endif +#if APR_HAVE_STRING_H +#include +#endif + +#if APR_POOL_DEBUG && APR_HAVE_STDIO_H +#include +#endif + +#include "rmm_hash.h" +/* + * The internal form of a hash table. + * + * The table is an array indexed by the hash of the key; collisions + * are resolved by hanging a linked list of hash entries off each + * element of the array. Although this is a really simple design it + * isn't too bad given that RMM has a low allocation overhead. + */ + +typedef struct rmm_hash_entry_t rmm_hash_entry_t; + +RMM_OFF_T_DECLARE(rmm_hash_entry_t); + +struct rmm_hash_entry_t { + RMM_OFF_T(rmm_hash_entry_t) next; + unsigned int hash; + apr_rmm_off_t key; + apr_ssize_t klen; + apr_rmm_off_t val; +}; + +/* + * Data structure for iterating through a hash table. + * + * We keep a pointer to the next hash entry here to allow the current + * hash entry to be freed or otherwise mangled between calls to + * apr_hash_next(). + */ +typedef struct rmm_hash_index_t rmm_hash_index_t; +struct rmm_hash_index_t { + RMM_OFF_T(rmm_hash_index_t) ht; + RMM_OFF_T(rmm_hash_entry_t) this, next; + unsigned int index; +}; + +/* + * The size of the array is always a power of two. We use the maximum + * index rather than the size so that we can use bitwise-AND for + * modular arithmetic. + * The count of hash entries may be greater depending on the chosen + * collision rate. + */ +RMM_OFF_T_DECLARE(rmm_hash_entry_t_ptr); +typedef struct rmm_hash_t rmm_hash_t; +struct rmm_hash_t { + RMM_OFF_T(rmm_hash_entry_t_ptr) array; + rmm_hash_index_t iterator; /* For apr_hash_first(NULL, ...) */ + unsigned int count, max; + apr_hashfunc_t hash_func; + RMM_OFF_T(rmm_hash_entry_t) free; /* List of recycled entries */ +}; + +#define INITIAL_MAX 15 /* tunable == 2^n - 1 */ + + +/* + * Hash creation functions. + */ + +static RMM_OFF_T(rmm_hash_entry_t_ptr) alloc_array(apr_rmm_t *rmm, unsigned int max) +{ + return apr_rmm_malloc(rmm, sizeof(apr_rmm_off_t) * (max + 1)); +} + + +APR_DECLARE(RMM_OFF_T(rmm_hash_t)) rmm_hash_make(apr_rmm_t *rmm) +{ + rmm_hash_t *ht_physical; + RMM_OFF_T(rmm_hash_t) ht_offset; + ht_offset = apr_rmm_malloc(rmm, sizeof(rmm_hash_t)); + if (ht_offset == RMM_OFF_NULL) { + return RMM_OFF_NULL; + } + ht_physical = APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht_offset); + ht_physical->free = RMM_OFF_NULL; + ht_physical->count = 0; + ht_physical->max = INITIAL_MAX; + ht_physical->array = alloc_array(rmm, ht_physical->max); + if (ht_physical == RMM_OFF_NULL) + { + apr_rmm_free(rmm, ht_offset); + return RMM_OFF_NULL; + } + ht_physical->hash_func = apr_hashfunc_default; + return ht_offset; +} + +APR_DECLARE(RMM_OFF_T(rmm_hash_t)) rmm_hash_make_custom(apr_rmm_t *rmm, + apr_hashfunc_t hash_func) +{ + RMM_OFF_T(rmm_hash_t) ht_offset = rmm_hash_make(rmm); + if (ht_offset == RMM_OFF_NULL) { + return RMM_OFF_NULL; + } + APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht_offset)->hash_func=hash_func; + return ht_offset; +} + + +/* + * Hash iteration functions. + */ +APR_DECLARE(RMM_OFF_T(rmm_hash_index_t)) rmm_hash_next(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_index_t)hi) +{ + rmm_hash_index_t *hi_physical = APR_RMM_ADDR_GET(rmm_hash_index_t, rmm, hi); + rmm_hash_t *ht_physical = APR_RMM_ADDR_GET(rmm_hash_t, rmm, hi_physical->ht); + RMM_OFF_T(rmm_hash_entry_t) *array_physical = APR_RMM_ADDR_GET(RMM_OFF_T(rmm_hash_entry_t), rmm, ht_physical->array); + hi_physical->this = hi_physical->next; + + while (hi_physical->this == RMM_OFF_NULL) { + if (hi_physical->index > ht_physical->max) + return RMM_OFF_NULL; + + hi_physical->this = array_physical[hi_physical->index++]; + } + hi_physical->next = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, hi_physical->this)->next; + return hi; +} + +APR_DECLARE(RMM_OFF_T(rmm_hash_index_t)) rmm_hash_first(int allocate_hi, apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht) +{ + RMM_OFF_T(rmm_hash_index_t) hi; + rmm_hash_index_t *hi_physical; + if (allocate_hi) { + hi = apr_rmm_calloc(rmm, sizeof(rmm_hash_index_t)); + hi_physical = APR_RMM_ADDR_GET(rmm_hash_index_t, rmm, hi); + } + else { + hi_physical = &(APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht)->iterator); + hi = apr_rmm_offset_get(rmm, hi_physical); + } + hi_physical->ht = ht; + hi_physical->index = 0; + hi_physical->this = RMM_OFF_NULL; + hi_physical->next = RMM_OFF_NULL; + return rmm_hash_next(rmm, hi); +} + +APR_DECLARE(void) rmm_hash_this(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_index_t) hi, + apr_rmm_off_t *key, + apr_ssize_t *klen, + apr_rmm_off_t *val) +{ + rmm_hash_index_t *hi_physical = APR_RMM_ADDR_GET(rmm_hash_index_t, rmm, hi); + rmm_hash_entry_t *this_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, hi_physical->this); + if (key) *key = this_physical->key; + if (klen) *klen = this_physical->klen; + if (val) *val = this_physical->val; +} + +/* + * Expanding a hash table + */ +static void expand_array(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht_offset) +{ + RMM_OFF_T(rmm_hash_index_t) hi_offset; + rmm_hash_index_t *hi_physical; + RMM_OFF_T(rmm_hash_entry_t_ptr) new_array_offset; + RMM_OFF_T(rmm_hash_entry_t) *new_array_physical; + unsigned int new_max; + rmm_hash_t *ht_physical = APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht_offset); + new_max = ht_physical->max * 2 + 1; + new_array_offset = alloc_array(rmm, new_max); + if (new_array_offset == RMM_OFF_NULL) { + return; // Can't allocate memory to expand the array, keep using the old one + } + new_array_physical = APR_RMM_ADDR_GET(RMM_OFF_T(rmm_hash_entry_t), rmm, new_array_offset); + for (hi_offset = rmm_hash_first(0, rmm, ht_offset); hi_offset; hi_offset = rmm_hash_next(rmm, hi_offset)) { + hi_physical = APR_RMM_ADDR_GET(rmm_hash_index_t, rmm, hi_offset); + rmm_hash_entry_t *this_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, hi_physical->this); + unsigned int i = this_physical->hash & new_max; + this_physical->next = new_array_physical[i]; + new_array_physical[i] = hi_physical->this; + } + apr_rmm_free(rmm, ht_physical->array); + ht_physical->array = new_array_offset; + ht_physical->max = new_max; +} + + +/* + * This is where we keep the details of the hash function and control + * the maximum collision rate. + * + * If val is non-NULL it creates and initializes a new hash entry if + * there isn't already one there; it returns an updatable pointer so + * that hash entries can be removed. + */ +static RMM_OFF_T(rmm_hash_entry_t) *find_entry(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht_offset, + const void *key_physical, + apr_ssize_t klen, + apr_rmm_off_t val_offset) +{ + RMM_OFF_T(rmm_hash_entry_t) *he_offset_p, he_offset; + rmm_hash_entry_t *he_physical; + unsigned int hash; + + rmm_hash_t *ht_physical = APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht_offset); + hash = ht_physical->hash_func(key_physical, &klen); + RMM_OFF_T(rmm_hash_entry_t) *array_physical= APR_RMM_ADDR_GET(RMM_OFF_T(rmm_hash_entry_t), rmm, ht_physical->array); + + /* scan linked list */ + for (he_offset_p = &array_physical[hash & ht_physical->max], he_offset = *he_offset_p; + he_offset != RMM_OFF_NULL; + he_offset_p = &he_physical->next, he_offset = *he_offset_p) { + he_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, he_offset); + if (he_physical->hash == hash + && he_physical->klen == klen + && memcmp(APR_RMM_ADDR_GET(void, rmm, he_physical->key), key_physical, klen) == 0) + break; + } + if (he_offset || val_offset == RMM_OFF_NULL) + return he_offset_p; + + /* add a new entry for non-NULL values */ + if ((he_offset = ht_physical->free) != RMM_OFF_NULL) + ht_physical->free = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, he_offset)->next; + else + he_offset = apr_rmm_calloc(rmm, sizeof(*he_physical)); + he_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, he_offset); + he_physical->next = RMM_OFF_NULL; + he_physical->hash = hash; + he_physical->key = apr_rmm_offset_get(rmm, (void *)key_physical); + he_physical->klen = klen; + he_physical->val = val_offset; + *he_offset_p = he_offset; + ht_physical->count++; + return he_offset_p; +} + +/** + * TODO: Migrate from apr_hash (pool-based) structure to rmm_hash (rmm-based) structure + */ +#if 0 +APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool, + const apr_hash_t *orig) +{ + apr_hash_t *ht; + apr_hash_entry_t *new_vals; + unsigned int i, j; + + ht = apr_palloc(pool, sizeof(apr_hash_t) + + sizeof(*ht->array) * (orig->max + 1) + + sizeof(apr_hash_entry_t) * orig->count); + ht->pool = pool; + ht->free = NULL; + ht->count = orig->count; + ht->max = orig->max; + ht->hash_func = orig->hash_func; + ht->array = (apr_hash_entry_t **)((char *)ht + sizeof(apr_hash_t)); + + new_vals = (apr_hash_entry_t *)((char *)(ht) + sizeof(apr_hash_t) + + sizeof(*ht->array) * (orig->max + 1)); + j = 0; + for (i = 0; i <= ht->max; i++) { + apr_hash_entry_t **new_entry = &(ht->array[i]); + apr_hash_entry_t *orig_entry = orig->array[i]; + while (orig_entry) { + *new_entry = &new_vals[j++]; + (*new_entry)->hash = orig_entry->hash; + (*new_entry)->key = orig_entry->key; + (*new_entry)->klen = orig_entry->klen; + (*new_entry)->val = orig_entry->val; + new_entry = &((*new_entry)->next); + orig_entry = orig_entry->next; + } + *new_entry = NULL; + } + return ht; +} +#endif + +APR_DECLARE(apr_rmm_off_t) rmm_hash_get(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht, + const void *key, + apr_ssize_t klen) +{ + RMM_OFF_T(rmm_hash_entry_t) he_offset; + he_offset = *find_entry(rmm, ht, key, klen, RMM_OFF_NULL); + if (he_offset) + return APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, he_offset)->val; + else + return RMM_OFF_NULL; +} + +APR_DECLARE(apr_rmm_off_t) rmm_hash_set(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht, + apr_rmm_off_t key, + apr_ssize_t klen, + apr_rmm_off_t val) +{ + RMM_OFF_T(rmm_hash_entry_t) *he_offset_p; + apr_rmm_off_t oldval = RMM_OFF_NULL; + he_offset_p = find_entry(rmm, ht, apr_rmm_addr_get(rmm, key), klen, val); + if (*he_offset_p) { + RMM_OFF_T(rmm_hash_entry_t) old = *he_offset_p; + rmm_hash_entry_t *old_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, old); + oldval = old_physical->val; + rmm_hash_t *ht_physical = APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht); + if (!val) { + /* delete entry */ + *he_offset_p = old_physical->next; + old_physical->next = ht_physical->free; + ht_physical->free = old; + --ht_physical->count; + } + else { + /* replace entry */ + old_physical->val = val; + /* check that the collision rate doesn't become too high */ + if (ht_physical->count > ht_physical->max) { + // It's too high. Try to re-hash into a larger array + expand_array(rmm, ht); + } + } + } + /* else key not present and val==NULL */ + return oldval; +} + +APR_DECLARE(unsigned int) rmm_hash_count(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht) +{ + return APR_RMM_ADDR_GET(rmm_hash_t, rmm, ht)->count; +} + +APR_DECLARE(void) rmm_hash_clear(apr_rmm_t *rmm, int free_keys, int free_values, RMM_OFF_T(rmm_hash_t) ht) +{ + RMM_OFF_T(rmm_hash_index_t) hi_offset; + for (hi_offset = rmm_hash_first(0, rmm, ht); hi_offset; hi_offset = rmm_hash_next(rmm, hi_offset)) { + rmm_hash_entry_t *this_physical = APR_RMM_ADDR_GET(rmm_hash_entry_t, rmm, APR_RMM_ADDR_GET(rmm_hash_index_t, rmm, hi_offset)->this); + apr_rmm_off_t key = this_physical->key; + apr_rmm_off_t val = this_physical->val; + rmm_hash_set(rmm, ht, this_physical->key, this_physical->klen, RMM_OFF_NULL); + if (free_keys) { + apr_rmm_free(rmm, key); + } + if (free_values) { + apr_rmm_free(rmm, val); + } + } +} + +/** + * TODO: Migrate from apr_hash (pool-based) structure to rmm_hash (rmm-based) structure + */ +#if 0 +APR_DECLARE(apr_hash_t*) apr_hash_overlay(apr_pool_t *p, + const apr_hash_t *overlay, + const apr_hash_t *base) +{ + return apr_hash_merge(p, overlay, base, NULL, NULL); +} +#endif + +/** + * TODO: Migrate from apr_hash (pool-based) structure to rmm_hash (rmm-based) structure + */ +#if 0 +APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p, + const apr_hash_t *overlay, + const apr_hash_t *base, + void * (*merger)(apr_pool_t *p, + const void *key, + apr_ssize_t klen, + const void *h1_val, + const void *h2_val, + const void *data), + const void *data) +{ + apr_hash_t *res; + apr_hash_entry_t *new_vals = NULL; + apr_hash_entry_t *iter; + apr_hash_entry_t *ent; + unsigned int i,j,k; + +#if APR_POOL_DEBUG + /* we don't copy keys and values, so it's necessary that + * overlay->a.pool and base->a.pool have a life span at least + * as long as p + */ + if (!apr_pool_is_ancestor(overlay->pool, p)) { + fprintf(stderr, + "apr_hash_merge: overlay's pool is not an ancestor of p\n"); + abort(); + } + if (!apr_pool_is_ancestor(base->pool, p)) { + fprintf(stderr, + "apr_hash_merge: base's pool is not an ancestor of p\n"); + abort(); + } +#endif + + res = apr_palloc(p, sizeof(apr_hash_t)); + res->pool = p; + res->free = NULL; + res->hash_func = base->hash_func; + res->count = base->count; + res->max = (overlay->max > base->max) ? overlay->max : base->max; + if (base->count + overlay->count > res->max) { + res->max = res->max * 2 + 1; + } + res->array = alloc_array(res, res->max); + if (base->count + overlay->count) { + new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) * + (base->count + overlay->count)); + } + j = 0; + for (k = 0; k <= base->max; k++) { + for (iter = base->array[k]; iter; iter = iter->next) { + i = iter->hash & res->max; + new_vals[j].klen = iter->klen; + new_vals[j].key = iter->key; + new_vals[j].val = iter->val; + new_vals[j].hash = iter->hash; + new_vals[j].next = res->array[i]; + res->array[i] = &new_vals[j]; + j++; + } + } + + for (k = 0; k <= overlay->max; k++) { + for (iter = overlay->array[k]; iter; iter = iter->next) { + i = iter->hash & res->max; + for (ent = res->array[i]; ent; ent = ent->next) { + if ((ent->klen == iter->klen) && + (memcmp(ent->key, iter->key, iter->klen) == 0)) { + if (merger) { + ent->val = (*merger)(p, iter->key, iter->klen, + iter->val, ent->val, data); + } + else { + ent->val = iter->val; + } + break; + } + } + if (!ent) { + new_vals[j].klen = iter->klen; + new_vals[j].key = iter->key; + new_vals[j].val = iter->val; + new_vals[j].hash = iter->hash; + new_vals[j].next = res->array[i]; + res->array[i] = &new_vals[j]; + res->count++; + j++; + } + } + } + return res; +} +#endif + +/* This is basically the following... + * for every element in hash table { + * comp elemeny.key, element.value + * } + * + * Like with apr_table_do, the comp callback is called for each and every + * element of the hash table. + */ +/** + * TODO: Migrate from apr_hash (pool-based) structure to rmm_hash (rmm-based) structure + */ +#if 0 +APR_DECLARE(int) apr_hash_do(apr_hash_do_callback_fn_t *comp, + void *rec, const apr_hash_t *ht) +{ + apr_hash_index_t hix; + apr_hash_index_t *hi; + int rv, dorv = 1; + + hix.ht = (apr_hash_t *)ht; + hix.index = 0; + hix.this = NULL; + hix.next = NULL; + + if ((hi = apr_hash_next(&hix))) { + /* Scan the entire table */ + do { + rv = (*comp)(rec, hi->this->key, hi->this->klen, hi->this->val); + } while (rv && (hi = apr_hash_next(hi))); + + if (rv == 0) { + dorv = 0; + } + } + return dorv; +} +#endif + +/** + * TODO: Migrate from apr_hash (pool-based) structure to rmm_hash (rmm-based) structure + */ +#if 0 +APR_POOL_IMPLEMENT_ACCESSOR(hash) +#endif diff --git a/crccache/rmm_hash.h b/crccache/rmm_hash.h new file mode 100644 index 000000000..b906a2432 --- /dev/null +++ b/crccache/rmm_hash.h @@ -0,0 +1,169 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * A hash table allocated&maintained in an RMM memory structure, so it can be used + * in shared memory + * + * Based on apr_hash.h + * + * Created on: 02/08/2010 + * Author: Alex Wulms + */ + +#ifndef RMM_HASH_H_ +#define RMM_HASH_H_ + +#include +#include +#include + +#define RMM_OFF_NULL 0 + +#define RMM_OFF_T_DECLARE(type) typedef apr_rmm_off_t rmm_##type##_rmm_off_t +#define RMM_OFF_T(type) rmm_##type##_rmm_off_t + +RMM_OFF_T_DECLARE(rmm_hash_index_t); +RMM_OFF_T_DECLARE(rmm_hash_t); + +#define APR_RMM_ADDR_GET(type, rmm, offset) ((type *)apr_rmm_addr_get(rmm, offset)) + +/** + * Create a hash table + * @param rmm: the relocatable memory (structure) in which the hash table must be created + * @return: the offset of the table in the rmm or RMM_OFF_NULL if the table can not be allocated + */ +APR_DECLARE(RMM_OFF_T(rmm_hash_t)) rmm_hash_make(apr_rmm_t *rmm); + +/** + * Create a hash table with a custom hash functon + * @param rmm: the relocatable memory (structure) in which the hash table must be created + * @param hash_func: the custom hash function + * @return: the offset of the table in the rmm or RMM_OFF_NULL if the table can not be allocated + */ +APR_DECLARE(RMM_OFF_T(rmm_hash_t)) rmm_hash_make_custom(apr_rmm_t *rmm, + apr_hashfunc_t hash_func); + +/** + * Associate a value with a key in a hash table + * @param rmm The relocatable memory structure in which the hash table exists + * @param ht The (offset of the) hash table in the rmm + * @param key The (offset of the) key in the rmm. The key must exist in the same rmm as the hash table. + * The key will not be copied into the hash table. In stead, a reference to the key will + * be stored. + * @param klen Length of the key. Can be APR_HASH_KEY_STRING to use the string length. See apr_hash.h for + * furher details (rmm_hash uses same hash function as apr_hash) + * @param val The (offset of the) value in the rmm. Like the key, the rmm must exist in the same rmm as + * the hash table. + * @return NULL if the key is new. Otherwise the (RMM offset of the) old value. It is the responsability of the + * invoking application to free the old value from the RMM memory if it is no longer required + * @remark When value RMM_OFF_NULL is passed, the hash entry itself is deleted but not the key nor the value + */ +APR_DECLARE(apr_rmm_off_t) rmm_hash_set(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht, + apr_rmm_off_t key, + apr_ssize_t klen, + apr_rmm_off_t val); + + +/** + * Look up the value associated with a key in a hash table. + * @param rmm The relocatable memory structure in which the hash table exists + * @param ht The (offset of the) hash table in the rmm + * @param key A pointer to the key in the (local) address space of the caller. + * @param klen Length of the key. Can be APR_HASH_KEY_STRING to use the string length. See apr_hash.h for + * furher details (rmm_hash uses same hash function as apr_hash) + * @return Returns NULL if the key is not present. + */ + +APR_DECLARE(apr_rmm_off_t) rmm_hash_get(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht, + const void *key, + apr_ssize_t klen); + +/** + * Start iterating over the entries in a hash table. + * @param allocate_hi If false, an internal non-thread-safe iterator is used + * Otherwise, a new iterator is allocated from the rmm + * @param rmm The relocatable memory structure in which the hash table exists + * and/or from which to allocate the RMM_OFF_T(rmm_hash_index_t) iterator. + * @param ht The (offset of the) hash table in the rmm + * @return The (offset of the) hash index/iterator + * @remark There is no restriction on adding or deleting hash entries during + * an iteration (although the results may be unpredictable unless all you do + * is delete the current entry) and multiple iterations can be in + * progress at the same time. + */ +/** + * @example + * + *
+ *
+ * int sum_values(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht)
+ * {
+ *     RMM_OFF_T(rmm_hash_index_t) hi;
+ *     apr_rmm_off_t val;
+ *     int sum = 0;
+ *     for (hi = rmm_hash_first(0, rmm, ht); hi; hi = rmm_hash_next(rmm, hi)) {
+ *         rmm_hash_this(rmm, hi, NULL, NULL, &val);
+ *         sum += *(APR_RMM_ADDR_GET(int, rmm, val));
+ *     }
+ *     return sum;
+ * }
+ * 
+ */ +APR_DECLARE(RMM_OFF_T(rmm_hash_index_t)) rmm_hash_first(int allocate_hi, apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht); + +/** + * Continue iterating over the entries in a hash table. + * @param rmm The relocatable memory structure in which the hash table exists + * @param hi The iteration state + * @return the RMM offset of the updated iteration state. RMM_OFF_NULL if there are no more + * entries. + */ +APR_DECLARE(RMM_OFF_T(rmm_hash_index_t)) rmm_hash_next(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_index_t)hi); + + +/** + * Get the current entry's details from the iteration state. + * @param rmm The relocatable memory structure in which the hash table exists + * @param hi The iteration state + * @param key Return pointer for the RMM offset of the key. + * @param klen Return pointer for the key length. + * @param val Return pointer for the RMM offset of the associated value. + * @remark The return pointers should point to a variable that will be set to the + * corresponding data, or they may be NULL if the data isn't interesting. + */ +APR_DECLARE(void) rmm_hash_this(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_index_t) hi, + apr_rmm_off_t *key, + apr_ssize_t *klen, + apr_rmm_off_t *val); + +/** + * Get the number of key/value pairs in the hash table. + * @param rmm The relocatable memory structure in which the hash table exists + * @param ht The hash table + * @return The number of key/value pairs in the hash table. + */ +APR_DECLARE(unsigned int) rmm_hash_count(apr_rmm_t *rmm, RMM_OFF_T(rmm_hash_t) ht); + +/** + * Clear any key/value pairs in the hash table. + * @param rmm The relocatable memory structure in which the hash table exists + * @param free_keys If true, free the memory in the RMM used by the keys + * @param free_values If true, free the memory in the RMM used by the values + * @param ht The hash table + */ + +APR_DECLARE(void) rmm_hash_clear(apr_rmm_t *rmm, int free_keys, int free_values, RMM_OFF_T(rmm_hash_t) ht); + +#endif /* RMM_HASH_H_ */ diff --git a/crccache/test_hash_small_invokations.c b/crccache/test_hash_small_invokations.c index 2d4a9453e..5cbbd1d63 100644 --- a/crccache/test_hash_small_invokations.c +++ b/crccache/test_hash_small_invokations.c @@ -28,10 +28,10 @@ int main(int argc, char *argv[]) uint64_t hashes[nblocks]; size_t datalen = strlen(tstdata); size_t block_size = datalen/nblocks; - crc_of_blocks(tstdata, datalen, block_size, 64, true, hashes); // set-up hashes + size_t tail_size = block_size + datalen%nblocks; + crc_of_blocks(tstdata, datalen, block_size, tail_size, 64, hashes); // set-up hashes - size_t tail_size = block_size + datalen%nblocks; printf("block_size: %zd, tail_size: %zd, nblocks: %zd\n", block_size, tail_size, nblocks); struct crc_context *crcctx = crc_context_new(block_size, 64, hashes, nblocks, tail_size); printf("context initialized\n"); -- 2.11.4.GIT