TOR: update to v0.2.5.12
[tomato.git] / release / src / router / tor / src / common / torgzip.c
blob15451ee30d0ca34e6a6441d4987a9f1af8b14623
1 /* Copyright (c) 2004, Roger Dingledine.
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2013, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
6 /**
7 * \file torgzip.c
8 * \brief A simple in-memory gzip implementation.
9 **/
11 #include "orconfig.h"
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <assert.h>
16 #include <string.h>
17 #include "torint.h"
19 #ifdef HAVE_NETINET_IN_H
20 #include <netinet/in.h>
21 #endif
23 #include "util.h"
24 #include "torlog.h"
25 #include "torgzip.h"
27 /* zlib 1.2.4 and 1.2.5 do some "clever" things with macros. Instead of
28 saying "(defined(FOO) ? FOO : 0)" they like to say "FOO-0", on the theory
29 that nobody will care if the compile outputs a no-such-identifier warning.
31 Sorry, but we like -Werror over here, so I guess we need to define these.
32 I hope that zlib 1.2.6 doesn't break these too.
34 #ifndef _LARGEFILE64_SOURCE
35 #define _LARGEFILE64_SOURCE 0
36 #endif
37 #ifndef _LFS64_LARGEFILE
38 #define _LFS64_LARGEFILE 0
39 #endif
40 #ifndef _FILE_OFFSET_BITS
41 #define _FILE_OFFSET_BITS 0
42 #endif
43 #ifndef off64_t
44 #define off64_t int64_t
45 #endif
47 #include <zlib.h>
49 /** Set to 1 if zlib is a version that supports gzip; set to 0 if it doesn't;
50 * set to -1 if we haven't checked yet. */
51 static int gzip_is_supported = -1;
53 /** Return true iff we support gzip-based compression. Otherwise, we need to
54 * use zlib. */
55 int
56 is_gzip_supported(void)
58 if (gzip_is_supported >= 0)
59 return gzip_is_supported;
61 if (!strcmpstart(ZLIB_VERSION, "0.") ||
62 !strcmpstart(ZLIB_VERSION, "1.0") ||
63 !strcmpstart(ZLIB_VERSION, "1.1"))
64 gzip_is_supported = 0;
65 else
66 gzip_is_supported = 1;
68 return gzip_is_supported;
71 /** Return a string representation of the version of the currently running
72 * version of zlib. */
73 const char *
74 tor_zlib_get_version_str(void)
76 return zlibVersion();
79 /** Return a string representation of the version of the version of zlib
80 * used at compilation. */
81 const char *
82 tor_zlib_get_header_version_str(void)
84 return ZLIB_VERSION;
87 /** Return the 'bits' value to tell zlib to use <b>method</b>.*/
88 static INLINE int
89 method_bits(compress_method_t method)
91 /* Bits+16 means "use gzip" in zlib >= 1.2 */
92 return method == GZIP_METHOD ? 15+16 : 15;
95 /** @{ */
96 /* These macros define the maximum allowable compression factor. Anything of
97 * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
98 * have an uncompression factor (uncompressed size:compressed size ratio) of
99 * any greater than MAX_UNCOMPRESSION_FACTOR.
101 * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
102 * be small to limit the attack multiplier, but we also want it to be large
103 * enough so that no legitimate document --even ones we might invent in the
104 * future -- ever compresses by a factor of greater than
105 * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
106 * large range of possible values. IMO, anything over 8 is probably safe; IMO
107 * anything under 50 is probably sufficient.
109 #define MAX_UNCOMPRESSION_FACTOR 25
110 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
111 /** @} */
113 /** Return true if uncompressing an input of size <b>in_size</b> to an input
114 * of size at least <b>size_out</b> looks like a compression bomb. */
115 static int
116 is_compression_bomb(size_t size_in, size_t size_out)
118 if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER)
119 return 0;
121 return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
124 /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
125 * allocated buffer, using the method described in <b>method</b>. Store the
126 * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
127 * Return 0 on success, -1 on failure.
130 tor_gzip_compress(char **out, size_t *out_len,
131 const char *in, size_t in_len,
132 compress_method_t method)
134 struct z_stream_s *stream = NULL;
135 size_t out_size, old_size;
136 off_t offset;
138 tor_assert(out);
139 tor_assert(out_len);
140 tor_assert(in);
141 tor_assert(in_len < UINT_MAX);
143 *out = NULL;
145 if (method == GZIP_METHOD && !is_gzip_supported()) {
146 /* Old zlib version don't support gzip in deflateInit2 */
147 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
148 goto err;
151 stream = tor_malloc_zero(sizeof(struct z_stream_s));
152 stream->zalloc = Z_NULL;
153 stream->zfree = Z_NULL;
154 stream->opaque = NULL;
155 stream->next_in = (unsigned char*) in;
156 stream->avail_in = (unsigned int)in_len;
158 if (deflateInit2(stream, Z_BEST_COMPRESSION, Z_DEFLATED,
159 method_bits(method),
160 8, Z_DEFAULT_STRATEGY) != Z_OK) {
161 log_warn(LD_GENERAL, "Error from deflateInit2: %s",
162 stream->msg?stream->msg:"<no message>");
163 goto err;
166 /* Guess 50% compression. */
167 out_size = in_len / 2;
168 if (out_size < 1024) out_size = 1024;
169 *out = tor_malloc(out_size);
170 stream->next_out = (unsigned char*)*out;
171 stream->avail_out = (unsigned int)out_size;
173 while (1) {
174 switch (deflate(stream, Z_FINISH))
176 case Z_STREAM_END:
177 goto done;
178 case Z_OK:
179 /* In case zlib doesn't work as I think .... */
180 if (stream->avail_out >= stream->avail_in+16)
181 break;
182 case Z_BUF_ERROR:
183 offset = stream->next_out - ((unsigned char*)*out);
184 old_size = out_size;
185 out_size *= 2;
186 if (out_size < old_size) {
187 log_warn(LD_GENERAL, "Size overflow in compression.");
188 goto err;
190 *out = tor_realloc(*out, out_size);
191 stream->next_out = (unsigned char*)(*out + offset);
192 if (out_size - offset > UINT_MAX) {
193 log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
194 "uncompressing.");
195 goto err;
197 stream->avail_out = (unsigned int)(out_size - offset);
198 break;
199 default:
200 log_warn(LD_GENERAL, "Gzip compression didn't finish: %s",
201 stream->msg ? stream->msg : "<no message>");
202 goto err;
205 done:
206 *out_len = stream->total_out;
207 #ifdef OPENBSD
208 /* "Hey Rocky! Watch me change an unsigned field to a signed field in a
209 * third-party API!"
210 * "Oh, that trick will just make people do unsafe casts to the unsigned
211 * type in their cross-platform code!"
212 * "Don't be foolish. I'm _sure_ they'll have the good sense to make sure
213 * the newly unsigned field isn't negative." */
214 tor_assert(stream->total_out >= 0);
215 #endif
216 if (((size_t)stream->total_out) > out_size + 4097) {
217 /* If we're wasting more than 4k, don't. */
218 *out = tor_realloc(*out, stream->total_out + 1);
220 if (deflateEnd(stream)!=Z_OK) {
221 log_warn(LD_BUG, "Error freeing gzip structures");
222 goto err;
224 tor_free(stream);
226 if (is_compression_bomb(*out_len, in_len)) {
227 log_warn(LD_BUG, "We compressed something and got an insanely high "
228 "compression factor; other Tors would think this was a zlib bomb.");
229 goto err;
232 return 0;
233 err:
234 if (stream) {
235 deflateEnd(stream);
236 tor_free(stream);
238 tor_free(*out);
239 return -1;
242 /** Given zero or more zlib-compressed or gzip-compressed strings of
243 * total length
244 * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
245 * buffer, using the method described in <b>method</b>. Store the uncompressed
246 * string in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on
247 * success, -1 on failure.
249 * If <b>complete_only</b> is true, we consider a truncated input as a
250 * failure; otherwise we decompress as much as we can. Warn about truncated
251 * or corrupt inputs at <b>protocol_warn_level</b>.
254 tor_gzip_uncompress(char **out, size_t *out_len,
255 const char *in, size_t in_len,
256 compress_method_t method,
257 int complete_only,
258 int protocol_warn_level)
260 struct z_stream_s *stream = NULL;
261 size_t out_size, old_size;
262 off_t offset;
263 int r;
265 tor_assert(out);
266 tor_assert(out_len);
267 tor_assert(in);
268 tor_assert(in_len < UINT_MAX);
270 if (method == GZIP_METHOD && !is_gzip_supported()) {
271 /* Old zlib version don't support gzip in inflateInit2 */
272 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
273 return -1;
276 *out = NULL;
278 stream = tor_malloc_zero(sizeof(struct z_stream_s));
279 stream->zalloc = Z_NULL;
280 stream->zfree = Z_NULL;
281 stream->opaque = NULL;
282 stream->next_in = (unsigned char*) in;
283 stream->avail_in = (unsigned int)in_len;
285 if (inflateInit2(stream,
286 method_bits(method)) != Z_OK) {
287 log_warn(LD_GENERAL, "Error from inflateInit2: %s",
288 stream->msg?stream->msg:"<no message>");
289 goto err;
292 out_size = in_len * 2; /* guess 50% compression. */
293 if (out_size < 1024) out_size = 1024;
294 if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
295 goto err;
297 *out = tor_malloc(out_size);
298 stream->next_out = (unsigned char*)*out;
299 stream->avail_out = (unsigned int)out_size;
301 while (1) {
302 switch (inflate(stream, complete_only ? Z_FINISH : Z_SYNC_FLUSH))
304 case Z_STREAM_END:
305 if (stream->avail_in == 0)
306 goto done;
307 /* There may be more compressed data here. */
308 if ((r = inflateEnd(stream)) != Z_OK) {
309 log_warn(LD_BUG, "Error freeing gzip structures");
310 goto err;
312 if (inflateInit2(stream, method_bits(method)) != Z_OK) {
313 log_warn(LD_GENERAL, "Error from second inflateInit2: %s",
314 stream->msg?stream->msg:"<no message>");
315 goto err;
317 break;
318 case Z_OK:
319 if (!complete_only && stream->avail_in == 0)
320 goto done;
321 /* In case zlib doesn't work as I think.... */
322 if (stream->avail_out >= stream->avail_in+16)
323 break;
324 case Z_BUF_ERROR:
325 if (stream->avail_out > 0) {
326 log_fn(protocol_warn_level, LD_PROTOCOL,
327 "possible truncated or corrupt zlib data");
328 goto err;
330 offset = stream->next_out - (unsigned char*)*out;
331 old_size = out_size;
332 out_size *= 2;
333 if (out_size < old_size) {
334 log_warn(LD_GENERAL, "Size overflow in uncompression.");
335 goto err;
337 if (is_compression_bomb(in_len, out_size)) {
338 log_warn(LD_GENERAL, "Input looks like a possible zlib bomb; "
339 "not proceeding.");
340 goto err;
342 if (out_size >= SIZE_T_CEILING) {
343 log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing.");
344 goto err;
346 *out = tor_realloc(*out, out_size);
347 stream->next_out = (unsigned char*)(*out + offset);
348 if (out_size - offset > UINT_MAX) {
349 log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
350 "uncompressing.");
351 goto err;
353 stream->avail_out = (unsigned int)(out_size - offset);
354 break;
355 default:
356 log_warn(LD_GENERAL, "Gzip decompression returned an error: %s",
357 stream->msg ? stream->msg : "<no message>");
358 goto err;
361 done:
362 *out_len = stream->next_out - (unsigned char*)*out;
363 r = inflateEnd(stream);
364 tor_free(stream);
365 if (r != Z_OK) {
366 log_warn(LD_BUG, "Error freeing gzip structures");
367 goto err;
370 /* NUL-terminate output. */
371 if (out_size == *out_len)
372 *out = tor_realloc(*out, out_size + 1);
373 (*out)[*out_len] = '\0';
375 return 0;
376 err:
377 if (stream) {
378 inflateEnd(stream);
379 tor_free(stream);
381 if (*out) {
382 tor_free(*out);
384 return -1;
387 /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
388 * to be compressed or not. If it is, return the likeliest compression method.
389 * Otherwise, return UNKNOWN_METHOD.
391 compress_method_t
392 detect_compression_method(const char *in, size_t in_len)
394 if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) {
395 return GZIP_METHOD;
396 } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
397 (ntohs(get_uint16(in)) % 31) == 0) {
398 return ZLIB_METHOD;
399 } else {
400 return UNKNOWN_METHOD;
404 /** Internal state for an incremental zlib compression/decompression. The
405 * body of this struct is not exposed. */
406 struct tor_zlib_state_t {
407 struct z_stream_s stream; /**< The zlib stream */
408 int compress; /**< True if we are compressing; false if we are inflating */
410 /** Number of bytes read so far. Used to detect zlib bombs. */
411 size_t input_so_far;
412 /** Number of bytes written so far. Used to detect zlib bombs. */
413 size_t output_so_far;
416 /** Construct and return a tor_zlib_state_t object using <b>method</b>. If
417 * <b>compress</b>, it's for compression; otherwise it's for
418 * decompression. */
419 tor_zlib_state_t *
420 tor_zlib_new(int compress, compress_method_t method)
422 tor_zlib_state_t *out;
424 if (method == GZIP_METHOD && !is_gzip_supported()) {
425 /* Old zlib version don't support gzip in inflateInit2 */
426 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
427 return NULL;
430 out = tor_malloc_zero(sizeof(tor_zlib_state_t));
431 out->stream.zalloc = Z_NULL;
432 out->stream.zfree = Z_NULL;
433 out->stream.opaque = NULL;
434 out->compress = compress;
435 if (compress) {
436 if (deflateInit2(&out->stream, Z_BEST_COMPRESSION, Z_DEFLATED,
437 method_bits(method), 8, Z_DEFAULT_STRATEGY) != Z_OK)
438 goto err;
439 } else {
440 if (inflateInit2(&out->stream, method_bits(method)) != Z_OK)
441 goto err;
443 return out;
445 err:
446 tor_free(out);
447 return NULL;
450 /** Compress/decompress some bytes using <b>state</b>. Read up to
451 * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
452 * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true,
453 * we've reached the end of the input.
455 * Return TOR_ZLIB_DONE if we've finished the entire compression/decompression.
456 * Return TOR_ZLIB_OK if we're processed everything from the input.
457 * Return TOR_ZLIB_BUF_FULL if we're out of space on <b>out</b>.
458 * Return TOR_ZLIB_ERR if the stream is corrupt.
460 tor_zlib_output_t
461 tor_zlib_process(tor_zlib_state_t *state,
462 char **out, size_t *out_len,
463 const char **in, size_t *in_len,
464 int finish)
466 int err;
467 tor_assert(*in_len <= UINT_MAX);
468 tor_assert(*out_len <= UINT_MAX);
469 state->stream.next_in = (unsigned char*) *in;
470 state->stream.avail_in = (unsigned int)*in_len;
471 state->stream.next_out = (unsigned char*) *out;
472 state->stream.avail_out = (unsigned int)*out_len;
474 if (state->compress) {
475 err = deflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
476 } else {
477 err = inflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
480 state->input_so_far += state->stream.next_in - ((unsigned char*)*in);
481 state->output_so_far += state->stream.next_out - ((unsigned char*)*out);
483 *out = (char*) state->stream.next_out;
484 *out_len = state->stream.avail_out;
485 *in = (const char *) state->stream.next_in;
486 *in_len = state->stream.avail_in;
488 if (! state->compress &&
489 is_compression_bomb(state->input_so_far, state->output_so_far)) {
490 log_warn(LD_DIR, "Possible zlib bomb; abandoning stream.");
491 return TOR_ZLIB_ERR;
494 switch (err)
496 case Z_STREAM_END:
497 return TOR_ZLIB_DONE;
498 case Z_BUF_ERROR:
499 if (state->stream.avail_in == 0)
500 return TOR_ZLIB_OK;
501 return TOR_ZLIB_BUF_FULL;
502 case Z_OK:
503 if (state->stream.avail_out == 0 || finish)
504 return TOR_ZLIB_BUF_FULL;
505 return TOR_ZLIB_OK;
506 default:
507 log_warn(LD_GENERAL, "Gzip returned an error: %s",
508 state->stream.msg ? state->stream.msg : "<no message>");
509 return TOR_ZLIB_ERR;
513 /** Deallocate <b>state</b>. */
514 void
515 tor_zlib_free(tor_zlib_state_t *state)
517 if (!state)
518 return;
520 if (state->compress)
521 deflateEnd(&state->stream);
522 else
523 inflateEnd(&state->stream);
525 tor_free(state);