[mod_accesslog] %{ratio}n logs compression ratio (fixes #2133)
[lighttpd.git] / src / mod_deflate.c
blob6eecdb124358753e96329ed629a19a23d8fcb7f5
1 /* mod_deflate
4 * bug fix on Robert Jakabosky from alphatrade.com's lighttp 1.4.10 mod_deflate patch
6 * Bug fix and new features:
7 * 1) fix loop bug when content-length is bigger than work-block-size*k
9 * -------
11 * lighttpd-1.4.26.mod_deflate.patch from
12 * https://redmine.lighttpd.net/projects/1/wiki/Docs_ModDeflate
14 * -------
16 * Patch further modified in this incarnation.
18 * Note: this patch only handles completed responses (con->file_finished);
19 * this patch does not currently handle streaming dynamic responses,
20 * and therefore also does not worry about Transfer-Encoding: chunked
21 * (or having separate con->output_queue for chunked-encoded output)
22 * (or using separate buffers per connection instead of p->tmp_buf)
23 * (or handling interactions with block buffering and write timeouts)
25 * Bug fix:
26 * - fixed major bug with compressing chunks with offset > 0
27 * x-ref:
28 * "Response breaking in mod_deflate"
29 * https://redmine.lighttpd.net/issues/986
30 * - fix broken (in some cases) chunk accounting in deflate_compress_response()
31 * - fix broken bzip2
32 * x-ref:
33 * "mod_deflate's bzip2 broken by default"
34 * https://redmine.lighttpd.net/issues/2035
35 * - fix mismatch with current chunk interfaces
36 * x-ref:
37 * "Weird things in chunk.c (functions only handling specific cases, unexpected behaviour)"
38 * https://redmine.lighttpd.net/issues/1510
40 * Behavior changes from prior patch:
41 * - deflate.mimetypes must now be configured to enable compression
42 * deflate.mimetypes = ( ) # compress nothing (disabled; default)
43 * deflate.mimetypes = ( "" ) # compress all mimetypes
44 * deflate.mimetypes = ( "text/" ) # compress text/... mimetypes
45 * x-ref:
46 * "mod_deflate enabled by default"
47 * https://redmine.lighttpd.net/issues/1394
48 * - deflate.enabled directive removed (see new behavior of deflate.mimetypes)
49 * - deflate.debug removed (was developer debug trace, not end-user debug)
50 * - deflate.bzip2 replaced with deflate.allowed-encodings (like mod_compress)
51 * x-ref:
52 * "mod_deflate should allow limiting of compression algorithm from the configuration file"
53 * https://redmine.lighttpd.net/issues/996
54 * "mod_compress disabling methods"
55 * https://redmine.lighttpd.net/issues/1773
56 * - deflate.nocompress-url removed since disabling compression for a URL
57 * can now easily be done by setting to a blank list either directive
58 * deflate.accept_encodings = () or deflate.mimetypes = () in a conditional
59 * block, e.g. $HTTP["url"] =~ "....." { deflate.mimetypes = ( ) }
60 * - deflate.sync-flush removed; controlled by con->conf.stream_response_body
61 * (though streaming compression not currently implemented in mod_deflate)
62 * - inactive directives in this patch (since con->file_finished required)
63 * deflate.work-block-size
64 * deflate.output-buffer-size
65 * - remove weak file size check; SIGBUS is trapped, file that shrink will error
66 * x-ref:
67 * "mod_deflate: filesize check is too weak"
68 * https://redmine.lighttpd.net/issues/1512
69 * - change default deflate.min-compress-size from 0 to now be 256
70 * http://webmasters.stackexchange.com/questions/31750/what-is-recommended-minimum-object-size-for-gzip-performance-benefits
71 * Apache 2.4 mod_deflate minimum is 68 bytes
72 * Akamai recommends minimum 860 bytes
73 * Google recommends minimum be somewhere in range between 150 and 1024 bytes
74 * - deflate.max-compress-size new directive (in kb like compress.max_filesize)
75 * - deflate.mem-level removed (too many knobs for little benefit)
76 * - deflate.window-size removed (too many knobs for little benefit)
78 * Future:
79 * - config directives may be changed, renamed, or removed
80 * e.g. A set of reasonable defaults might be chosen
81 * instead of making them configurable.
82 * deflate.min-compress-size
83 * - might add deflate.mimetypes-exclude = ( ... ) for list of mimetypes
84 * to avoid compressing, even if a broader deflate.mimetypes matched,
85 * e.g. to compress all "text/" except "text/special".
86 * - mod_compress and mod_deflate might merge overlapping feature sets
87 * (mod_compress.cache-dir does not yet have an equivalent in mod_deflate)
89 * Implementation notes:
90 * - http_chunk_append_mem() used instead of http_chunk_append_buffer()
91 * so that p->tmp_buf can be large and re-used. This results in an extra copy
92 * of compressed data before data is sent to network, though if the compressed
93 * size is larger than 64k, it ends up being sent to a temporary file on
94 * disk without suffering an extra copy in memory, and without extra chunk
95 * create and destroy. If this is ever changed to give away buffers, then use
96 * a unique hctx->output buffer per hctx; do not reuse p->tmp_buf across
97 * multiple requests being handled in parallel.
99 #include "first.h"
101 #include <sys/types.h>
102 #include <sys/stat.h>
103 #include "sys-mmap.h"
105 #include <fcntl.h>
106 #include <stdlib.h>
107 #include <string.h>
108 #include <errno.h>
109 #include <time.h>
111 #include "base.h"
112 #include "log.h"
113 #include "buffer.h"
114 #include "etag.h"
115 #include "http_chunk.h"
116 #include "response.h"
118 #include "plugin.h"
120 #if defined HAVE_ZLIB_H && defined HAVE_LIBZ
121 # define USE_ZLIB
122 # include <zlib.h>
123 #endif
124 #ifndef Z_DEFAULT_COMPRESSION
125 #define Z_DEFAULT_COMPRESSION -1
126 #endif
127 #ifndef MAX_WBITS
128 #define MAX_WBITS 15
129 #endif
131 #if defined HAVE_BZLIB_H && defined HAVE_LIBBZ2
132 # define USE_BZ2LIB
133 /* we don't need stdio interface */
134 # define BZ_NO_STDIO
135 # include <bzlib.h>
136 #endif
138 #if defined HAVE_SYS_MMAN_H && defined HAVE_MMAP && defined ENABLE_MMAP
139 #define USE_MMAP
141 #include "sys-mmap.h"
142 #include <setjmp.h>
143 #include <signal.h>
145 static volatile int sigbus_jmp_valid;
146 static sigjmp_buf sigbus_jmp;
148 static void sigbus_handler(int sig) {
149 UNUSED(sig);
150 if (sigbus_jmp_valid) siglongjmp(sigbus_jmp, 1);
151 log_failed_assert(__FILE__, __LINE__, "SIGBUS");
153 #endif
155 /* request: accept-encoding */
156 #define HTTP_ACCEPT_ENCODING_IDENTITY BV(0)
157 #define HTTP_ACCEPT_ENCODING_GZIP BV(1)
158 #define HTTP_ACCEPT_ENCODING_DEFLATE BV(2)
159 #define HTTP_ACCEPT_ENCODING_COMPRESS BV(3)
160 #define HTTP_ACCEPT_ENCODING_BZIP2 BV(4)
161 #define HTTP_ACCEPT_ENCODING_X_GZIP BV(5)
162 #define HTTP_ACCEPT_ENCODING_X_BZIP2 BV(6)
164 #define KByte * 1024
165 #define MByte * 1024 KByte
166 #define GByte * 1024 MByte
168 typedef struct {
169 array *mimetypes;
170 int allowed_encodings;
171 unsigned int max_compress_size;
172 unsigned short min_compress_size;
173 unsigned short output_buffer_size;
174 unsigned short work_block_size;
175 unsigned short sync_flush;
176 short compression_level;
177 } plugin_config;
179 typedef struct {
180 PLUGIN_DATA;
181 buffer *tmp_buf;
182 array *encodings;
184 plugin_config **config_storage;
185 plugin_config conf;
186 } plugin_data;
188 typedef struct {
189 union {
190 #ifdef USE_ZLIB
191 z_stream z;
192 #endif
193 #ifdef USE_BZ2LIB
194 bz_stream bz;
195 #endif
196 int dummy;
197 } u;
198 off_t bytes_in;
199 off_t bytes_out;
200 chunkqueue *in_queue;
201 buffer *output;
202 plugin_data *plugin_data;
203 int compression_type;
204 } handler_ctx;
206 static handler_ctx *handler_ctx_init() {
207 handler_ctx *hctx;
209 hctx = calloc(1, sizeof(*hctx));
210 hctx->in_queue = chunkqueue_init();
212 return hctx;
215 static void handler_ctx_free(handler_ctx *hctx) {
216 #if 0
217 if (hctx->output != p->tmp_buf) {
218 buffer_free(hctx->output);
220 #endif
221 chunkqueue_free(hctx->in_queue);
222 free(hctx);
225 INIT_FUNC(mod_deflate_init) {
226 plugin_data *p;
228 p = calloc(1, sizeof(*p));
230 p->encodings = array_init();
231 p->tmp_buf = buffer_init();
232 buffer_string_prepare_copy(p->tmp_buf, 64 KByte);
234 return p;
237 FREE_FUNC(mod_deflate_free) {
238 plugin_data *p = p_d;
240 UNUSED(srv);
242 if (!p) return HANDLER_GO_ON;
244 if (p->config_storage) {
245 size_t i;
246 for (i = 0; i < srv->config_context->used; i++) {
247 plugin_config *s = p->config_storage[i];
249 if (!s) continue;
251 array_free(s->mimetypes);
252 free(s);
254 free(p->config_storage);
257 buffer_free(p->tmp_buf);
258 array_free(p->encodings);
260 free(p);
262 return HANDLER_GO_ON;
265 SETDEFAULTS_FUNC(mod_deflate_setdefaults) {
266 plugin_data *p = p_d;
267 size_t i = 0;
269 config_values_t cv[] = {
270 { "deflate.mimetypes", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
271 { "deflate.allowed-encodings", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
272 { "deflate.max-compress-size", NULL, T_CONFIG_INT, T_CONFIG_SCOPE_CONNECTION },
273 { "deflate.min-compress-size", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
274 { "deflate.compression-level", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
275 { "deflate.output-buffer-size", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
276 { "deflate.work-block-size", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
277 { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET }
280 p->config_storage = calloc(1, srv->config_context->used * sizeof(plugin_config *));
282 for (i = 0; i < srv->config_context->used; i++) {
283 plugin_config *s;
285 s = calloc(1, sizeof(plugin_config));
286 s->mimetypes = array_init();
287 s->allowed_encodings = 0;
288 s->max_compress_size = 128*1024; /*(128 MB measured as num KB)*/
289 s->min_compress_size = 256;
290 s->output_buffer_size = 0;
291 s->work_block_size = 2048;
292 s->sync_flush = 0;
293 s->compression_level = -1;
295 array_reset(p->encodings); /* temp array for allowed encodings list */
297 cv[0].destination = s->mimetypes;
298 cv[1].destination = p->encodings;
299 cv[2].destination = &(s->max_compress_size);
300 cv[3].destination = &(s->min_compress_size);
301 cv[4].destination = &(s->compression_level);
302 cv[5].destination = &(s->output_buffer_size);
303 cv[6].destination = &(s->work_block_size);
305 p->config_storage[i] = s;
307 if (0 != config_insert_values_global(srv, ((data_config *)srv->config_context->data[i])->value, cv, i == 0 ? T_CONFIG_SCOPE_SERVER : T_CONFIG_SCOPE_CONNECTION)) {
308 return HANDLER_ERROR;
311 if ((s->compression_level < 1 || s->compression_level > 9) &&
312 s->compression_level != -1) {
313 log_error_write(srv, __FILE__, __LINE__, "sd",
314 "compression-level must be between 1 and 9:", s->compression_level);
315 return HANDLER_ERROR;
318 if (p->encodings->used) {
319 size_t j = 0;
320 for (j = 0; j < p->encodings->used; j++) {
321 #if defined(USE_ZLIB) || defined(USE_BZ2LIB)
322 data_string *ds = (data_string *)p->encodings->data[j];
323 #endif
324 #ifdef USE_ZLIB
325 if (NULL != strstr(ds->value->ptr, "gzip"))
326 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_GZIP | HTTP_ACCEPT_ENCODING_X_GZIP;
327 if (NULL != strstr(ds->value->ptr, "x-gzip"))
328 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_X_GZIP;
329 if (NULL != strstr(ds->value->ptr, "deflate"))
330 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_DEFLATE;
332 if (NULL != strstr(ds->value->ptr, "compress"))
333 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_COMPRESS;
335 #endif
336 #ifdef USE_BZ2LIB
337 if (NULL != strstr(ds->value->ptr, "bzip2"))
338 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_BZIP2 | HTTP_ACCEPT_ENCODING_X_BZIP2;
339 if (NULL != strstr(ds->value->ptr, "x-bzip2"))
340 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_X_BZIP2;
341 #endif
343 } else {
344 /* default encodings */
345 #ifdef USE_ZLIB
346 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_GZIP
347 | HTTP_ACCEPT_ENCODING_X_GZIP
348 | HTTP_ACCEPT_ENCODING_DEFLATE;
349 #endif
350 #ifdef USE_BZ2LIB
351 s->allowed_encodings |= HTTP_ACCEPT_ENCODING_BZIP2
352 | HTTP_ACCEPT_ENCODING_X_BZIP2;
353 #endif
356 /* mod_deflate matches mimetype as prefix of Content-Type
357 * so ignore '*' at end of mimetype for end-user flexibility
358 * in specifying trailing wildcard to grouping of mimetypes */
359 for (size_t m = 0; m < s->mimetypes->used; ++m) {
360 buffer *mimetype = ((data_string *)s->mimetypes->data[m])->value;
361 size_t len = buffer_string_length(mimetype);
362 if (len > 2 && mimetype->ptr[len-1] == '*') {
363 buffer_string_set_length(mimetype, len-1);
368 return HANDLER_GO_ON;
373 #if defined(USE_ZLIB) || defined(USE_BZ2LIB)
374 static int stream_http_chunk_append_mem(server *srv, connection *con, handler_ctx *hctx, size_t len) {
375 /* future: might also write stream to hctx temporary file in compressed file cache */
376 return http_chunk_append_mem(srv, con, hctx->output->ptr, len);
378 #endif
381 #ifdef USE_ZLIB
383 static int stream_deflate_init(handler_ctx *hctx) {
384 z_stream * const z = &hctx->u.z;
385 const plugin_data * const p = hctx->plugin_data;
386 z->zalloc = Z_NULL;
387 z->zfree = Z_NULL;
388 z->opaque = Z_NULL;
389 z->total_in = 0;
390 z->total_out = 0;
391 z->next_out = (unsigned char *)hctx->output->ptr;
392 z->avail_out = hctx->output->size;
394 if (Z_OK != deflateInit2(z,
395 p->conf.compression_level > 0
396 ? p->conf.compression_level
397 : Z_DEFAULT_COMPRESSION,
398 Z_DEFLATED,
399 (hctx->compression_type == HTTP_ACCEPT_ENCODING_GZIP)
400 ? (MAX_WBITS | 16) /*(0x10 flags gzip header, trailer)*/
401 : -MAX_WBITS, /*(negate to suppress zlib header)*/
402 8, /* default memLevel */
403 Z_DEFAULT_STRATEGY)) {
404 return -1;
407 return 0;
410 static int stream_deflate_compress(server *srv, connection *con, handler_ctx *hctx, unsigned char *start, off_t st_size) {
411 z_stream * const z = &(hctx->u.z);
412 size_t len;
414 z->next_in = start;
415 z->avail_in = st_size;
416 hctx->bytes_in += st_size;
418 /* compress data */
419 do {
420 if (Z_OK != deflate(z, Z_NO_FLUSH)) return -1;
422 if (z->avail_out == 0 || z->avail_in > 0) {
423 len = hctx->output->size - z->avail_out;
424 hctx->bytes_out += len;
425 stream_http_chunk_append_mem(srv, con, hctx, len);
426 z->next_out = (unsigned char *)hctx->output->ptr;
427 z->avail_out = hctx->output->size;
429 } while (z->avail_in > 0);
431 return 0;
434 static int stream_deflate_flush(server *srv, connection *con, handler_ctx *hctx, int end) {
435 z_stream * const z = &(hctx->u.z);
436 const plugin_data *p = hctx->plugin_data;
437 size_t len;
438 int rc = 0;
439 int done;
441 /* compress data */
442 do {
443 done = 1;
444 if (end) {
445 rc = deflate(z, Z_FINISH);
446 if (rc == Z_OK) {
447 done = 0;
448 } else if (rc != Z_STREAM_END) {
449 return -1;
451 } else {
452 if (p->conf.sync_flush) {
453 rc = deflate(z, Z_SYNC_FLUSH);
454 if (rc != Z_OK) return -1;
455 } else if (z->avail_in > 0) {
456 rc = deflate(z, Z_NO_FLUSH);
457 if (rc != Z_OK) return -1;
461 len = hctx->output->size - z->avail_out;
462 if (z->avail_out == 0 || (len > 0 && (end || p->conf.sync_flush))) {
463 hctx->bytes_out += len;
464 stream_http_chunk_append_mem(srv, con, hctx, len);
465 z->next_out = (unsigned char *)hctx->output->ptr;
466 z->avail_out = hctx->output->size;
468 } while (z->avail_in != 0 || !done);
470 return 0;
473 static int stream_deflate_end(server *srv, handler_ctx *hctx) {
474 z_stream * const z = &(hctx->u.z);
475 int rc = deflateEnd(z);
476 if (Z_OK == rc || Z_DATA_ERROR == rc) return 0;
478 if (z->msg != NULL) {
479 log_error_write(srv, __FILE__, __LINE__, "sdss",
480 "deflateEnd error ret=", rc, ", msg=", z->msg);
481 } else {
482 log_error_write(srv, __FILE__, __LINE__, "sd",
483 "deflateEnd error ret=", rc);
485 return -1;
488 #endif
491 #ifdef USE_BZ2LIB
493 static int stream_bzip2_init(handler_ctx *hctx) {
494 bz_stream * const bz = &hctx->u.bz;
495 const plugin_data * const p = hctx->plugin_data;
496 bz->bzalloc = NULL;
497 bz->bzfree = NULL;
498 bz->opaque = NULL;
499 bz->total_in_lo32 = 0;
500 bz->total_in_hi32 = 0;
501 bz->total_out_lo32 = 0;
502 bz->total_out_hi32 = 0;
503 bz->next_out = hctx->output->ptr;
504 bz->avail_out = hctx->output->size;
506 if (BZ_OK != BZ2_bzCompressInit(bz,
507 p->conf.compression_level > 0
508 ? p->conf.compression_level
509 : 9, /* blocksize = 900k */
510 0, /* verbosity */
511 0)) { /* workFactor: default */
512 return -1;
515 return 0;
518 static int stream_bzip2_compress(server *srv, connection *con, handler_ctx *hctx, unsigned char *start, off_t st_size) {
519 bz_stream * const bz = &(hctx->u.bz);
520 size_t len;
522 bz->next_in = (char *)start;
523 bz->avail_in = st_size;
524 hctx->bytes_in += st_size;
526 /* compress data */
527 do {
528 if (BZ_RUN_OK != BZ2_bzCompress(bz, BZ_RUN)) return -1;
530 if (bz->avail_out == 0 || bz->avail_in > 0) {
531 len = hctx->output->size - bz->avail_out;
532 hctx->bytes_out += len;
533 stream_http_chunk_append_mem(srv, con, hctx, len);
534 bz->next_out = hctx->output->ptr;
535 bz->avail_out = hctx->output->size;
537 } while (bz->avail_in > 0);
539 return 0;
542 static int stream_bzip2_flush(server *srv, connection *con, handler_ctx *hctx, int end) {
543 bz_stream * const bz = &(hctx->u.bz);
544 const plugin_data *p = hctx->plugin_data;
545 size_t len;
546 int rc;
547 int done;
549 /* compress data */
550 do {
551 done = 1;
552 if (end) {
553 rc = BZ2_bzCompress(bz, BZ_FINISH);
554 if (rc == BZ_FINISH_OK) {
555 done = 0;
556 } else if (rc != BZ_STREAM_END) {
557 return -1;
559 } else if (bz->avail_in > 0) {
560 /* p->conf.sync_flush not implemented here,
561 * which would loop on BZ_FLUSH while BZ_FLUSH_OK
562 * until BZ_RUN_OK returned */
563 rc = BZ2_bzCompress(bz, BZ_RUN);
564 if (rc != BZ_RUN_OK) {
565 return -1;
569 len = hctx->output->size - bz->avail_out;
570 if (bz->avail_out == 0 || (len > 0 && (end || p->conf.sync_flush))) {
571 hctx->bytes_out += len;
572 stream_http_chunk_append_mem(srv, con, hctx, len);
573 bz->next_out = hctx->output->ptr;
574 bz->avail_out = hctx->output->size;
576 } while (bz->avail_in != 0 || !done);
578 return 0;
581 static int stream_bzip2_end(server *srv, handler_ctx *hctx) {
582 bz_stream * const bz = &(hctx->u.bz);
583 int rc = BZ2_bzCompressEnd(bz);
584 if (BZ_OK == rc || BZ_DATA_ERROR == rc) return 0;
586 log_error_write(srv, __FILE__, __LINE__, "sd",
587 "BZ2_bzCompressEnd error ret=", rc);
588 return -1;
591 #endif
594 static int mod_deflate_stream_init(handler_ctx *hctx) {
595 switch(hctx->compression_type) {
596 #ifdef USE_ZLIB
597 case HTTP_ACCEPT_ENCODING_GZIP:
598 case HTTP_ACCEPT_ENCODING_DEFLATE:
599 return stream_deflate_init(hctx);
600 #endif
601 #ifdef USE_BZ2LIB
602 case HTTP_ACCEPT_ENCODING_BZIP2:
603 return stream_bzip2_init(hctx);
604 #endif
605 default:
606 return -1;
610 static int mod_deflate_compress(server *srv, connection *con, handler_ctx *hctx, unsigned char *start, off_t st_size) {
611 if (0 == st_size) return 0;
612 switch(hctx->compression_type) {
613 #ifdef USE_ZLIB
614 case HTTP_ACCEPT_ENCODING_GZIP:
615 case HTTP_ACCEPT_ENCODING_DEFLATE:
616 return stream_deflate_compress(srv, con, hctx, start, st_size);
617 #endif
618 #ifdef USE_BZ2LIB
619 case HTTP_ACCEPT_ENCODING_BZIP2:
620 return stream_bzip2_compress(srv, con, hctx, start, st_size);
621 #endif
622 default:
623 UNUSED(srv);
624 UNUSED(con);
625 UNUSED(start);
626 return -1;
630 static int mod_deflate_stream_flush(server *srv, connection *con, handler_ctx *hctx, int end) {
631 if (0 == hctx->bytes_in) return 0;
632 switch(hctx->compression_type) {
633 #ifdef USE_ZLIB
634 case HTTP_ACCEPT_ENCODING_GZIP:
635 case HTTP_ACCEPT_ENCODING_DEFLATE:
636 return stream_deflate_flush(srv, con, hctx, end);
637 #endif
638 #ifdef USE_BZ2LIB
639 case HTTP_ACCEPT_ENCODING_BZIP2:
640 return stream_bzip2_flush(srv, con, hctx, end);
641 #endif
642 default:
643 UNUSED(srv);
644 UNUSED(con);
645 UNUSED(end);
646 return -1;
650 static void mod_deflate_note_ratio(server *srv, connection *con, handler_ctx *hctx) {
651 /* store compression ratio in con->environment
652 * for possible logging by mod_accesslog
653 * (late in response handling, so not seen by most other modules) */
654 /*(should be called only at end of successful response compression)*/
655 char ratio[LI_ITOSTRING_LENGTH];
656 if (0 == hctx->bytes_in) return;
657 li_itostrn(ratio, sizeof(ratio), hctx->bytes_out * 100 / hctx->bytes_in);
658 array_set_key_value(con->environment,
659 CONST_STR_LEN("ratio"),
660 ratio, strlen(ratio));
661 UNUSED(srv);
664 static int mod_deflate_stream_end(server *srv, handler_ctx *hctx) {
665 switch(hctx->compression_type) {
666 #ifdef USE_ZLIB
667 case HTTP_ACCEPT_ENCODING_GZIP:
668 case HTTP_ACCEPT_ENCODING_DEFLATE:
669 return stream_deflate_end(srv, hctx);
670 #endif
671 #ifdef USE_BZ2LIB
672 case HTTP_ACCEPT_ENCODING_BZIP2:
673 return stream_bzip2_end(srv, hctx);
674 #endif
675 default:
676 UNUSED(srv);
677 return -1;
681 static void deflate_compress_cleanup(server *srv, connection *con, handler_ctx *hctx) {
682 const plugin_data *p = hctx->plugin_data;
683 con->plugin_ctx[p->id] = NULL;
685 if (0 != mod_deflate_stream_end(srv, hctx)) {
686 log_error_write(srv, __FILE__, __LINE__, "s", "error closing stream");
689 #if 1 /* unnecessary if deflate.min-compress-size is set to a reasonable value */
690 if (hctx->bytes_in < hctx->bytes_out) {
691 log_error_write(srv, __FILE__, __LINE__, "sbsdsd",
692 "uri ", con->uri.path_raw, " in=", hctx->bytes_in, " smaller than out=", hctx->bytes_out);
694 #endif
696 handler_ctx_free(hctx);
700 static int mod_deflate_file_chunk(server *srv, connection *con, handler_ctx *hctx, chunk *c, off_t st_size) {
701 off_t abs_offset;
702 off_t toSend = -1;
703 char *start;
704 #ifdef USE_MMAP
705 off_t we_want_to_mmap = 2 MByte;
706 off_t we_want_to_send = st_size;
707 volatile int mapped = 0;/* quiet warning: might be clobbered by 'longjmp' */
708 #else
709 start = NULL;
710 #endif
712 if (-1 == c->file.fd) { /* open the file if not already open */
713 if (-1 == (c->file.fd = fdevent_open_cloexec(c->file.name->ptr, O_RDONLY, 0))) {
714 log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno));
716 return -1;
720 abs_offset = c->file.start + c->offset;
722 #ifdef USE_MMAP
723 /* mmap the buffer
724 * - first mmap
725 * - new mmap as the we are at the end of the last one */
726 if (c->file.mmap.start == MAP_FAILED ||
727 abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) {
729 /* Optimizations for the future:
731 * adaptive mem-mapping
732 * the problem:
733 * we mmap() the whole file. If someone has alot large files and 32bit
734 * machine the virtual address area will be unrun and we will have a failing
735 * mmap() call.
736 * solution:
737 * only mmap 16M in one chunk and move the window as soon as we have finished
738 * the first 8M
740 * read-ahead buffering
741 * the problem:
742 * sending out several large files in parallel trashes the read-ahead of the
743 * kernel leading to long wait-for-seek times.
744 * solutions: (increasing complexity)
745 * 1. use madvise
746 * 2. use a internal read-ahead buffer in the chunk-structure
747 * 3. use non-blocking IO for file-transfers
748 * */
750 /* all mmap()ed areas are 512kb expect the last which might be smaller */
751 off_t to_mmap;
753 /* this is a remap, move the mmap-offset */
754 if (c->file.mmap.start != MAP_FAILED) {
755 munmap(c->file.mmap.start, c->file.mmap.length);
756 c->file.mmap.offset += we_want_to_mmap;
757 } else {
758 /* in case the range-offset is after the first mmap()ed area we skip the area */
759 c->file.mmap.offset = 0;
761 while (c->file.mmap.offset + we_want_to_mmap < c->file.start) {
762 c->file.mmap.offset += we_want_to_mmap;
766 /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */
767 to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset;
768 if (to_mmap > we_want_to_mmap) to_mmap = we_want_to_mmap;
769 /* we have more to send than we can mmap() at once */
770 if (we_want_to_send > to_mmap) we_want_to_send = to_mmap;
772 if (MAP_FAILED == (c->file.mmap.start = mmap(0, (size_t)to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) {
773 /* close it here, otherwise we'd have to set FD_CLOEXEC */
775 log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:",
776 strerror(errno), c->file.name, c->file.fd);
778 return -1;
781 c->file.mmap.length = to_mmap;
782 #ifdef HAVE_MADVISE
783 /* don't advise files < 64Kb */
784 if (c->file.mmap.length > (64 KByte) &&
785 0 != madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED)) {
786 log_error_write(srv, __FILE__, __LINE__, "ssbd", "madvise failed:",
787 strerror(errno), c->file.name, c->file.fd);
789 #endif
791 /* chunk_reset() or chunk_free() will cleanup for us */
794 /* to_send = abs_mmap_end - abs_offset */
795 toSend = (c->file.mmap.offset + c->file.mmap.length) - abs_offset;
796 if (toSend > we_want_to_send) toSend = we_want_to_send;
798 if (toSend < 0) {
799 log_error_write(srv, __FILE__, __LINE__, "soooo",
800 "toSend is negative:",
801 toSend,
802 c->file.mmap.length,
803 abs_offset,
804 c->file.mmap.offset);
805 force_assert(toSend < 0);
808 start = c->file.mmap.start;
809 mapped = 1;
810 #endif
812 if (MAP_FAILED == c->file.mmap.start) {
813 toSend = st_size;
814 if (toSend > 2 MByte) toSend = 2 MByte;
815 if (NULL == (start = malloc((size_t)toSend)) || -1 == lseek(c->file.fd, abs_offset, SEEK_SET) || toSend != read(c->file.fd, start, (size_t)toSend)) {
816 log_error_write(srv, __FILE__, __LINE__, "sbss", "reading", c->file.name, "failed:", strerror(errno));
818 free(start);
819 return -1;
823 #ifdef USE_MMAP
824 if (mapped) {
825 signal(SIGBUS, sigbus_handler);
826 sigbus_jmp_valid = 1;
827 if (0 != sigsetjmp(sigbus_jmp, 1)) {
828 sigbus_jmp_valid = 0;
830 log_error_write(srv, __FILE__, __LINE__, "sbd", "SIGBUS in mmap:",
831 c->file.name, c->file.fd);
832 return -1;
835 #endif
837 if (mod_deflate_compress(srv, con, hctx,
838 (unsigned char *)start + (abs_offset - c->file.mmap.offset), toSend) < 0) {
839 log_error_write(srv, __FILE__, __LINE__, "s",
840 "compress failed.");
841 toSend = -1;
844 #ifdef USE_MMAP
845 if (mapped)
846 sigbus_jmp_valid = 0;
847 else
848 #endif
849 free(start);
851 return toSend;
855 static handler_t deflate_compress_response(server *srv, connection *con, handler_ctx *hctx) {
856 off_t len, max;
857 int close_stream;
859 /* move all chunk from write_queue into our in_queue, then adjust
860 * counters since con->write_queue is reused for compressed output */
861 len = chunkqueue_length(con->write_queue);
862 chunkqueue_remove_finished_chunks(con->write_queue);
863 chunkqueue_append_chunkqueue(hctx->in_queue, con->write_queue);
864 con->write_queue->bytes_in -= len;
865 con->write_queue->bytes_out -= len;
867 max = chunkqueue_length(hctx->in_queue);
868 #if 0
869 /* calculate max bytes to compress for this call */
870 if (p->conf.sync_flush && max > (len = p->conf.work_block_size << 10)) {
871 max = len;
873 #endif
875 /* Compress chunks from in_queue into chunks for write_queue */
876 while (max) {
877 chunk *c = hctx->in_queue->first;
879 switch(c->type) {
880 case MEM_CHUNK:
881 len = buffer_string_length(c->mem) - c->offset;
882 if (len > max) len = max;
883 if (mod_deflate_compress(srv, con, hctx, (unsigned char *)c->mem->ptr+c->offset, len) < 0) {
884 log_error_write(srv, __FILE__, __LINE__, "s",
885 "compress failed.");
886 return HANDLER_ERROR;
888 break;
889 case FILE_CHUNK:
890 len = c->file.length - c->offset;
891 if (len > max) len = max;
892 if ((len = mod_deflate_file_chunk(srv, con, hctx, c, len)) < 0) {
893 log_error_write(srv, __FILE__, __LINE__, "s",
894 "compress file chunk failed.");
895 return HANDLER_ERROR;
897 break;
898 default:
899 log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known");
900 return HANDLER_ERROR;
903 max -= len;
904 chunkqueue_mark_written(hctx->in_queue, len);
907 /*(currently should always be true)*/
908 /*(current implementation requires response be complete)*/
909 close_stream = (con->file_finished && chunkqueue_is_empty(hctx->in_queue));
910 if (mod_deflate_stream_flush(srv, con, hctx, close_stream) < 0) {
911 log_error_write(srv, __FILE__, __LINE__, "s", "flush error");
912 return HANDLER_ERROR;
915 return close_stream ? HANDLER_FINISHED : HANDLER_GO_ON;
919 #define PATCH(x) \
920 p->conf.x = s->x;
921 static int mod_deflate_patch_connection(server *srv, connection *con, plugin_data *p) {
922 size_t i, j;
923 plugin_config *s = p->config_storage[0];
925 PATCH(mimetypes);
926 PATCH(allowed_encodings);
927 PATCH(max_compress_size);
928 PATCH(min_compress_size);
929 PATCH(compression_level);
930 PATCH(output_buffer_size);
931 PATCH(work_block_size);
933 /* skip the first, the global context */
934 for (i = 1; i < srv->config_context->used; i++) {
935 data_config *dc = (data_config *)srv->config_context->data[i];
936 s = p->config_storage[i];
938 /* condition didn't match */
939 if (!config_check_cond(srv, con, dc)) continue;
941 /* merge config */
942 for (j = 0; j < dc->value->used; j++) {
943 data_unset *du = dc->value->data[j];
945 if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.mimetypes"))) {
946 PATCH(mimetypes);
947 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.allowed-encodings"))) {
948 PATCH(allowed_encodings);
949 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.max-compress-size"))) {
950 PATCH(max_compress_size);
951 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.min-compress-size"))) {
952 PATCH(min_compress_size);
953 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.compression-level"))) {
954 PATCH(compression_level);
955 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.output-buffer-size"))) {
956 PATCH(output_buffer_size);
957 } else if (buffer_is_equal_string(du->key, CONST_STR_LEN("deflate.work-block-size"))) {
958 PATCH(work_block_size);
963 return 0;
965 #undef PATCH
967 static int mod_deflate_choose_encoding (const char *value, plugin_data *p, const char **label) {
968 /* get client side support encodings */
969 int accept_encoding = 0;
970 #if !defined(USE_ZLIB) && !defined(USE_BZ2LIB)
971 UNUSED(value);
972 #endif
973 #ifdef USE_ZLIB
974 if (NULL != strstr(value, "gzip")) accept_encoding |= HTTP_ACCEPT_ENCODING_GZIP;
975 else if (NULL != strstr(value, "x-gzip")) accept_encoding |= HTTP_ACCEPT_ENCODING_X_GZIP;
976 if (NULL != strstr(value, "deflate")) accept_encoding |= HTTP_ACCEPT_ENCODING_DEFLATE;
977 #endif
978 /* if (NULL != strstr(value, "compress")) accept_encoding |= HTTP_ACCEPT_ENCODING_COMPRESS; */
979 #ifdef USE_BZ2LIB
980 if (p->conf.allowed_encodings & (HTTP_ACCEPT_ENCODING_BZIP2 | HTTP_ACCEPT_ENCODING_X_BZIP2)) {
981 if (NULL != strstr(value, "bzip2")) accept_encoding |= HTTP_ACCEPT_ENCODING_BZIP2;
982 else if (NULL != strstr(value, "x-bzip2")) accept_encoding |= HTTP_ACCEPT_ENCODING_X_BZIP2;
984 #endif
985 /* if (NULL != strstr(value, "identity")) accept_encoding |= HTTP_ACCEPT_ENCODING_IDENTITY; */
987 /* mask to limit to allowed_encodings */
988 accept_encoding &= p->conf.allowed_encodings;
990 /* select best matching encoding */
991 #ifdef USE_BZ2LIB
992 if (accept_encoding & HTTP_ACCEPT_ENCODING_BZIP2) {
993 *label = "bzip2";
994 return HTTP_ACCEPT_ENCODING_BZIP2;
995 } else if (accept_encoding & HTTP_ACCEPT_ENCODING_X_BZIP2) {
996 *label = "x-bzip2";
997 return HTTP_ACCEPT_ENCODING_BZIP2;
998 } else
999 #endif
1000 if (accept_encoding & HTTP_ACCEPT_ENCODING_GZIP) {
1001 *label = "gzip";
1002 return HTTP_ACCEPT_ENCODING_GZIP;
1003 } else if (accept_encoding & HTTP_ACCEPT_ENCODING_X_GZIP) {
1004 *label = "x-gzip";
1005 return HTTP_ACCEPT_ENCODING_GZIP;
1006 } else if (accept_encoding & HTTP_ACCEPT_ENCODING_DEFLATE) {
1007 *label = "deflate";
1008 return HTTP_ACCEPT_ENCODING_DEFLATE;
1009 } else {
1010 return 0;
1014 CONNECTION_FUNC(mod_deflate_handle_response_start) {
1015 plugin_data *p = p_d;
1016 data_string *ds;
1017 handler_ctx *hctx;
1018 const char *label;
1019 off_t len;
1020 size_t etaglen = 0;
1021 int compression_type;
1022 handler_t rc;
1024 /*(current implementation requires response be complete)*/
1025 if (!con->file_finished) return HANDLER_GO_ON;
1026 if (con->request.http_method == HTTP_METHOD_HEAD) return HANDLER_GO_ON;
1027 if (con->parsed_response & HTTP_TRANSFER_ENCODING_CHUNKED) return HANDLER_GO_ON;
1029 /* disable compression for some http status types. */
1030 switch(con->http_status) {
1031 case 100:
1032 case 101:
1033 case 204:
1034 case 205:
1035 case 304:
1036 /* disable compression as we have no response entity */
1037 return HANDLER_GO_ON;
1038 default:
1039 break;
1042 mod_deflate_patch_connection(srv, con, p);
1044 /* check if deflate configured for any mimetypes */
1045 if (!p->conf.mimetypes->used) return HANDLER_GO_ON;
1047 /* check if size of response is below min-compress-size or exceeds max*/
1048 /* (con->file_finished checked at top of routine) */
1049 len = chunkqueue_length(con->write_queue);
1050 if (len <= (off_t)p->conf.min_compress_size) return HANDLER_GO_ON;
1051 if (p->conf.max_compress_size /*(max_compress_size in KB)*/
1052 && len > ((off_t)p->conf.max_compress_size << 10)) {
1053 return HANDLER_GO_ON;
1056 /* Check if response has a Content-Encoding. */
1057 ds = (data_string *)array_get_element(con->response.headers, "Content-Encoding");
1058 if (NULL != ds) return HANDLER_GO_ON;
1060 /* Check Accept-Encoding for supported encoding. */
1061 ds = (data_string *)array_get_element(con->request.headers, "Accept-Encoding");
1062 if (NULL == ds) return HANDLER_GO_ON;
1064 /* find matching encodings */
1065 compression_type = mod_deflate_choose_encoding(ds->value->ptr, p, &label);
1066 if (!compression_type) return HANDLER_GO_ON;
1068 /* Check mimetype in response header "Content-Type" */
1069 if (NULL != (ds = (data_string *)array_get_element(con->response.headers, "Content-Type"))) {
1070 int found = 0;
1071 size_t m;
1072 for (m = 0; m < p->conf.mimetypes->used; ++m) {
1073 data_string *mimetype = (data_string *)p->conf.mimetypes->data[m];
1074 if (0 == strncmp(mimetype->value->ptr, ds->value->ptr, buffer_string_length(mimetype->value))) {
1075 /* mimetype found */
1076 found = 1;
1077 break;
1080 if (!found) return HANDLER_GO_ON;
1082 #if 0
1083 if (0 == strncasecmp(ds->value->ptr, "application/x-javascript", 24)) {
1084 /*reset compress type to deflate for javascript
1085 * prevent buggy IE6 SP1 doesn't work for js in IFrame
1087 compression_type = HTTP_ACCEPT_ENCODING_DEFLATE;
1089 #endif
1090 } else {
1091 /* If no Content-Type set, compress only if first p->conf.mimetypes value is "" */
1092 data_string *mimetype = (data_string *)p->conf.mimetypes->data[0];
1093 if (!buffer_string_is_empty(mimetype->value)) return HANDLER_GO_ON;
1096 /* Vary: Accept-Encoding (response might change according to request Accept-Encoding) */
1097 if (NULL != (ds = (data_string *)array_get_element(con->response.headers, "Vary"))) {
1098 if (NULL == strstr(ds->value->ptr, "Accept-Encoding")) {
1099 buffer_append_string_len(ds->value, CONST_STR_LEN(",Accept-Encoding"));
1101 } else {
1102 response_header_insert(srv, con, CONST_STR_LEN("Vary"),
1103 CONST_STR_LEN("Accept-Encoding"));
1106 /* check ETag as is done in http_response_handle_cachable()
1107 * (slightly imperfect (close enough?) match of ETag "000000" to "000000-gzip") */
1108 ds = (data_string *)array_get_element(con->response.headers, "ETag");
1109 if (NULL != ds) {
1110 etaglen = buffer_string_length(ds->value);
1111 if (etaglen
1112 && con->http_status < 300 /*(want 2xx only)*/
1113 && con->request.http_if_none_match
1114 && 0 == strncmp(con->request.http_if_none_match, ds->value->ptr, etaglen-1)
1115 && con->request.http_if_none_match[etaglen-1] == '-'
1116 && 0 == strncmp(con->request.http_if_none_match+etaglen, label, strlen(label))) {
1118 if ( HTTP_METHOD_GET == con->request.http_method
1119 || HTTP_METHOD_HEAD == con->request.http_method) {
1120 /* modify ETag response header in-place to remove '"' and append '-label"' */
1121 ds->value->ptr[etaglen-1] = '-'; /*(overwrite end '"')*/
1122 buffer_append_string(ds->value, label);
1123 buffer_append_string_len(ds->value, CONST_STR_LEN("\""));
1124 /*buffer_copy_buffer(con->physical.etag, ds->value);*//*(keep in sync?)*/
1125 con->http_status = 304;
1126 } else {
1127 con->http_status = 412;
1130 /* response_start hook occurs after error docs have been handled.
1131 * For now, send back empty response body.
1132 * In the future, might extract the error doc code so that it
1133 * might be run again if response_start hooks return with
1134 * changed http_status and con->mode = DIRECT */
1135 con->response.transfer_encoding &= ~HTTP_TRANSFER_ENCODING_CHUNKED;
1136 con->parsed_response &= ~HTTP_CONTENT_LENGTH;
1137 chunkqueue_reset(con->write_queue);
1138 con->file_finished = 1;
1140 con->mode = DIRECT;
1141 return HANDLER_GO_ON;
1145 /* update ETag, if ETag response header is set */
1146 if (etaglen) {
1147 /* modify ETag response header in-place to remove '"' and append '-label"' */
1148 ds->value->ptr[etaglen-1] = '-'; /*(overwrite end '"')*/
1149 buffer_append_string(ds->value, label);
1150 buffer_append_string_len(ds->value, CONST_STR_LEN("\""));
1151 /*buffer_copy_buffer(con->physical.etag, ds->value);*//*(keep in sync?)*/
1154 /* set Content-Encoding to show selected compression type */
1155 response_header_overwrite(srv, con, CONST_STR_LEN("Content-Encoding"), label, strlen(label));
1157 /* clear Content-Length and con->write_queue if HTTP HEAD request
1158 * (alternatively, could return original Content-Length with HEAD
1159 * request if ETag not modified and Content-Encoding not added) */
1160 if (HTTP_METHOD_HEAD == con->request.http_method) {
1161 /* ensure that uncompressed Content-Length is not sent in HEAD response */
1162 chunkqueue_reset(con->write_queue);
1163 if (con->parsed_response & HTTP_CONTENT_LENGTH) {
1164 con->parsed_response &= ~HTTP_CONTENT_LENGTH;
1165 if (NULL != (ds = (data_string*) array_get_element(con->response.headers, "Content-Length"))) {
1166 buffer_reset(ds->value); /* headers with empty values are ignored for output */
1169 return HANDLER_GO_ON;
1172 /* future: might use ETag to check if compressed content is in compressed file cache */
1173 /*if (etaglen) { ... } *//* return if in file cache after updating con->write_queue */
1175 /* enable compression */
1176 p->conf.sync_flush =
1177 (con->conf.stream_response_body && 0 == p->conf.output_buffer_size);
1178 hctx = handler_ctx_init();
1179 hctx->plugin_data = p;
1180 hctx->compression_type = compression_type;
1181 /* setup output buffer */
1182 buffer_string_set_length(p->tmp_buf, 0);
1183 hctx->output = p->tmp_buf;
1184 if (0 != mod_deflate_stream_init(hctx)) {
1185 /*(should not happen unless ENOMEM)*/
1186 handler_ctx_free(hctx);
1187 log_error_write(srv, __FILE__, __LINE__, "ss",
1188 "Failed to initialize compression", label);
1189 /* restore prior Etag and unset Content-Encoding */
1190 if (etaglen) {
1191 ds->value->ptr[etaglen-1] = '"'; /*(overwrite '-')*/
1192 buffer_string_set_length(ds->value, etaglen);
1194 ds = (data_string *)array_get_element(con->response.headers, "Content-Encoding");
1195 if (ds) buffer_reset(ds->value); /* headers with empty values are ignored for output */
1196 return HANDLER_GO_ON;
1199 con->parsed_response &= ~HTTP_CONTENT_LENGTH;
1200 con->plugin_ctx[p->id] = hctx;
1202 rc = deflate_compress_response(srv, con, hctx);
1203 if (HANDLER_GO_ON != rc) {
1204 if (HANDLER_FINISHED == rc) {
1205 mod_deflate_note_ratio(srv, con, hctx);
1207 deflate_compress_cleanup(srv, con, hctx);
1208 if (HANDLER_ERROR == rc) return HANDLER_ERROR;
1211 return HANDLER_GO_ON;
1214 static handler_t mod_deflate_cleanup(server *srv, connection *con, void *p_d) {
1215 plugin_data *p = p_d;
1216 handler_ctx *hctx = con->plugin_ctx[p->id];
1218 if (NULL != hctx) deflate_compress_cleanup(srv, con, hctx);
1220 return HANDLER_GO_ON;
1223 int mod_deflate_plugin_init(plugin *p);
1224 int mod_deflate_plugin_init(plugin *p) {
1225 p->version = LIGHTTPD_VERSION_ID;
1226 p->name = buffer_init_string("deflate");
1228 p->init = mod_deflate_init;
1229 p->cleanup = mod_deflate_free;
1230 p->set_defaults = mod_deflate_setdefaults;
1231 p->connection_reset = mod_deflate_cleanup;
1232 p->handle_connection_close = mod_deflate_cleanup;
1233 p->handle_response_start = mod_deflate_handle_response_start;
1235 p->data = NULL;
1237 return 0;