clean up message; explain a magic number in a comment
[tor/rransom.git] / src / common / torgzip.c
blob249151cc9b2992dc4e129cd9af54e7551d24cdfa
1 /* Copyright (c) 2004, Roger Dingledine.
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2011, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
6 /**
7 * \file torgzip.c
8 * \brief A simple in-memory gzip implementation.
9 **/
11 #include "orconfig.h"
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <assert.h>
16 #ifdef _MSC_VER
17 #include "..\..\contrib\zlib\zlib.h"
18 #else
19 #include <zlib.h>
20 #endif
21 #include <string.h>
22 #ifdef HAVE_NETINET_IN_H
23 #include <netinet/in.h>
24 #endif
26 #include "util.h"
27 #include "log.h"
28 #include "torgzip.h"
30 /** Set to 1 if zlib is a version that supports gzip; set to 0 if it doesn't;
31 * set to -1 if we haven't checked yet. */
32 static int gzip_is_supported = -1;
34 /** Return true iff we support gzip-based compression. Otherwise, we need to
35 * use zlib. */
36 int
37 is_gzip_supported(void)
39 if (gzip_is_supported >= 0)
40 return gzip_is_supported;
42 if (!strcmpstart(ZLIB_VERSION, "0.") ||
43 !strcmpstart(ZLIB_VERSION, "1.0") ||
44 !strcmpstart(ZLIB_VERSION, "1.1"))
45 gzip_is_supported = 0;
46 else
47 gzip_is_supported = 1;
49 return gzip_is_supported;
52 /** Return the 'bits' value to tell zlib to use <b>method</b>.*/
53 static INLINE int
54 method_bits(compress_method_t method)
56 /* Bits+16 means "use gzip" in zlib >= 1.2 */
57 return method == GZIP_METHOD ? 15+16 : 15;
60 /* These macros define the maximum allowable compression factor. Anything of
61 * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
62 * have an uncompression factor (uncompressed size:compressed size ratio) of
63 * any greater than MAX_UNCOMPRESSION_FACTOR.
65 * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
66 * be small to limit the attack multiplier, but we also want it to be large
67 * enough so that no legitimate document --even ones we might invent in the
68 * future -- ever compresses by a factor of greater than
69 * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
70 * large range of possible values. IMO, anything over 8 is probably safe; IMO
71 * anything under 50 is probably sufficient.
73 #define MAX_UNCOMPRESSION_FACTOR 25
74 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
76 /** Return true if uncompressing an input of size <b>in_size</b> to an input
77 * of size at least <b>size_out</b> looks like a compression bomb. */
78 static int
79 is_compression_bomb(size_t size_in, size_t size_out)
81 if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER)
82 return 0;
84 return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
87 /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
88 * allocated buffer, using the method described in <b>method</b>. Store the
89 * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
90 * Return 0 on success, -1 on failure.
92 int
93 tor_gzip_compress(char **out, size_t *out_len,
94 const char *in, size_t in_len,
95 compress_method_t method)
97 struct z_stream_s *stream = NULL;
98 size_t out_size, old_size;
99 off_t offset;
101 tor_assert(out);
102 tor_assert(out_len);
103 tor_assert(in);
104 tor_assert(in_len < UINT_MAX);
106 *out = NULL;
108 if (method == GZIP_METHOD && !is_gzip_supported()) {
109 /* Old zlib version don't support gzip in deflateInit2 */
110 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
111 goto err;
114 stream = tor_malloc_zero(sizeof(struct z_stream_s));
115 stream->zalloc = Z_NULL;
116 stream->zfree = Z_NULL;
117 stream->opaque = NULL;
118 stream->next_in = (unsigned char*) in;
119 stream->avail_in = (unsigned int)in_len;
121 if (deflateInit2(stream, Z_BEST_COMPRESSION, Z_DEFLATED,
122 method_bits(method),
123 8, Z_DEFAULT_STRATEGY) != Z_OK) {
124 log_warn(LD_GENERAL, "Error from deflateInit2: %s",
125 stream->msg?stream->msg:"<no message>");
126 goto err;
129 /* Guess 50% compression. */
130 out_size = in_len / 2;
131 if (out_size < 1024) out_size = 1024;
132 *out = tor_malloc(out_size);
133 stream->next_out = (unsigned char*)*out;
134 stream->avail_out = (unsigned int)out_size;
136 while (1) {
137 switch (deflate(stream, Z_FINISH))
139 case Z_STREAM_END:
140 goto done;
141 case Z_OK:
142 /* In case zlib doesn't work as I think .... */
143 if (stream->avail_out >= stream->avail_in+16)
144 break;
145 case Z_BUF_ERROR:
146 offset = stream->next_out - ((unsigned char*)*out);
147 old_size = out_size;
148 out_size *= 2;
149 if (out_size < old_size) {
150 log_warn(LD_GENERAL, "Size overflow in compression.");
151 goto err;
153 *out = tor_realloc(*out, out_size);
154 stream->next_out = (unsigned char*)(*out + offset);
155 if (out_size - offset > UINT_MAX) {
156 log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
157 "uncompressing.");
158 goto err;
160 stream->avail_out = (unsigned int)(out_size - offset);
161 break;
162 default:
163 log_warn(LD_GENERAL, "Gzip compression didn't finish: %s",
164 stream->msg ? stream->msg : "<no message>");
165 goto err;
168 done:
169 *out_len = stream->total_out;
170 #ifdef OPENBSD
171 /* "Hey Rocky! Watch me change an unsigned field to a signed field in a
172 * third-party API!"
173 * "Oh, that trick will just make people do unsafe casts to the unsigned
174 * type in their cross-platform code!"
175 * "Don't be foolish. I'm _sure_ they'll have the good sense to make sure
176 * the newly unsigned field isn't negative." */
177 tor_assert(stream->total_out >= 0);
178 #endif
179 if (((size_t)stream->total_out) > out_size + 4097) {
180 /* If we're wasting more than 4k, don't. */
181 *out = tor_realloc(*out, stream->total_out + 1);
183 if (deflateEnd(stream)!=Z_OK) {
184 log_warn(LD_BUG, "Error freeing gzip structures");
185 goto err;
187 tor_free(stream);
189 if (is_compression_bomb(*out_len, in_len)) {
190 log_warn(LD_BUG, "We compressed something and got an insanely high "
191 "compression factor; other Tors would think this was a zlib bomb.");
192 goto err;
195 return 0;
196 err:
197 if (stream) {
198 deflateEnd(stream);
199 tor_free(stream);
201 if (*out) {
202 tor_free(*out);
204 return -1;
207 /** Given zero or more zlib-compressed or gzip-compressed strings of
208 * total length
209 * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
210 * buffer, using the method described in <b>method</b>. Store the uncompressed
211 * string in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on
212 * success, -1 on failure.
214 * If <b>complete_only</b> is true, we consider a truncated input as a
215 * failure; otherwise we decompress as much as we can. Warn about truncated
216 * or corrupt inputs at <b>protocol_warn_level</b>.
219 tor_gzip_uncompress(char **out, size_t *out_len,
220 const char *in, size_t in_len,
221 compress_method_t method,
222 int complete_only,
223 int protocol_warn_level)
225 struct z_stream_s *stream = NULL;
226 size_t out_size, old_size;
227 off_t offset;
228 int r;
230 tor_assert(out);
231 tor_assert(out_len);
232 tor_assert(in);
233 tor_assert(in_len < UINT_MAX);
235 if (method == GZIP_METHOD && !is_gzip_supported()) {
236 /* Old zlib version don't support gzip in inflateInit2 */
237 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
238 return -1;
241 *out = NULL;
243 stream = tor_malloc_zero(sizeof(struct z_stream_s));
244 stream->zalloc = Z_NULL;
245 stream->zfree = Z_NULL;
246 stream->opaque = NULL;
247 stream->next_in = (unsigned char*) in;
248 stream->avail_in = (unsigned int)in_len;
250 if (inflateInit2(stream,
251 method_bits(method)) != Z_OK) {
252 log_warn(LD_GENERAL, "Error from inflateInit2: %s",
253 stream->msg?stream->msg:"<no message>");
254 goto err;
257 out_size = in_len * 2; /* guess 50% compression. */
258 if (out_size < 1024) out_size = 1024;
259 if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
260 goto err;
262 *out = tor_malloc(out_size);
263 stream->next_out = (unsigned char*)*out;
264 stream->avail_out = (unsigned int)out_size;
266 while (1) {
267 switch (inflate(stream, complete_only ? Z_FINISH : Z_SYNC_FLUSH))
269 case Z_STREAM_END:
270 if (stream->avail_in == 0)
271 goto done;
272 /* There may be more compressed data here. */
273 if ((r = inflateEnd(stream)) != Z_OK) {
274 log_warn(LD_BUG, "Error freeing gzip structures");
275 goto err;
277 if (inflateInit2(stream, method_bits(method)) != Z_OK) {
278 log_warn(LD_GENERAL, "Error from second inflateInit2: %s",
279 stream->msg?stream->msg:"<no message>");
280 goto err;
282 break;
283 case Z_OK:
284 if (!complete_only && stream->avail_in == 0)
285 goto done;
286 /* In case zlib doesn't work as I think.... */
287 if (stream->avail_out >= stream->avail_in+16)
288 break;
289 case Z_BUF_ERROR:
290 if (stream->avail_out > 0) {
291 log_fn(protocol_warn_level, LD_PROTOCOL,
292 "possible truncated or corrupt zlib data");
293 goto err;
295 offset = stream->next_out - (unsigned char*)*out;
296 old_size = out_size;
297 out_size *= 2;
298 if (out_size < old_size) {
299 log_warn(LD_GENERAL, "Size overflow in uncompression.");
300 goto err;
302 if (is_compression_bomb(in_len, out_size)) {
303 log_warn(LD_GENERAL, "Input looks like a possible zlib bomb; "
304 "not proceeding.");
305 goto err;
307 if (out_size >= SIZE_T_CEILING) {
308 log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing.");
309 goto err;
311 *out = tor_realloc(*out, out_size);
312 stream->next_out = (unsigned char*)(*out + offset);
313 if (out_size - offset > UINT_MAX) {
314 log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
315 "uncompressing.");
316 goto err;
318 stream->avail_out = (unsigned int)(out_size - offset);
319 break;
320 default:
321 log_warn(LD_GENERAL, "Gzip decompression returned an error: %s",
322 stream->msg ? stream->msg : "<no message>");
323 goto err;
326 done:
327 *out_len = stream->next_out - (unsigned char*)*out;
328 r = inflateEnd(stream);
329 tor_free(stream);
330 if (r != Z_OK) {
331 log_warn(LD_BUG, "Error freeing gzip structures");
332 goto err;
335 /* NUL-terminate output. */
336 if (out_size == *out_len)
337 *out = tor_realloc(*out, out_size + 1);
338 (*out)[*out_len] = '\0';
340 return 0;
341 err:
342 if (stream) {
343 inflateEnd(stream);
344 tor_free(stream);
346 if (*out) {
347 tor_free(*out);
349 return -1;
352 /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
353 * to be compressed or not. If it is, return the likeliest compression method.
354 * Otherwise, return UNKNOWN_METHOD.
356 compress_method_t
357 detect_compression_method(const char *in, size_t in_len)
359 if (in_len > 2 && !memcmp(in, "\x1f\x8b", 2)) {
360 return GZIP_METHOD;
361 } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
362 (ntohs(get_uint16(in)) % 31) == 0) {
363 return ZLIB_METHOD;
364 } else {
365 return UNKNOWN_METHOD;
369 /** Internal state for an incremental zlib compression/decompression. The
370 * body of this struct is not exposed. */
371 struct tor_zlib_state_t {
372 struct z_stream_s stream;
373 int compress;
375 /* Number of bytes read so far. Used to detect zlib bombs. */
376 size_t input_so_far;
377 /* Number of bytes written so far. Used to detect zlib bombs. */
378 size_t output_so_far;
381 /** Construct and return a tor_zlib_state_t object using <b>method</b>. If
382 * <b>compress</b>, it's for compression; otherwise it's for
383 * decompression. */
384 tor_zlib_state_t *
385 tor_zlib_new(int compress, compress_method_t method)
387 tor_zlib_state_t *out;
389 if (method == GZIP_METHOD && !is_gzip_supported()) {
390 /* Old zlib version don't support gzip in inflateInit2 */
391 log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
392 return NULL;
395 out = tor_malloc_zero(sizeof(tor_zlib_state_t));
396 out->stream.zalloc = Z_NULL;
397 out->stream.zfree = Z_NULL;
398 out->stream.opaque = NULL;
399 out->compress = compress;
400 if (compress) {
401 if (deflateInit2(&out->stream, Z_BEST_COMPRESSION, Z_DEFLATED,
402 method_bits(method), 8, Z_DEFAULT_STRATEGY) != Z_OK)
403 goto err;
404 } else {
405 if (inflateInit2(&out->stream, method_bits(method)) != Z_OK)
406 goto err;
408 return out;
410 err:
411 tor_free(out);
412 return NULL;
415 /** Compress/decompress some bytes using <b>state</b>. Read up to
416 * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
417 * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true,
418 * we've reached the end of the input.
420 * Return TOR_ZLIB_DONE if we've finished the entire compression/decompression.
421 * Return TOR_ZLIB_OK if we're processed everything from the input.
422 * Return TOR_ZLIB_BUF_FULL if we're out of space on <b>out</b>.
423 * Return TOR_ZLIB_ERR if the stream is corrupt.
425 tor_zlib_output_t
426 tor_zlib_process(tor_zlib_state_t *state,
427 char **out, size_t *out_len,
428 const char **in, size_t *in_len,
429 int finish)
431 int err;
432 tor_assert(*in_len <= UINT_MAX);
433 tor_assert(*out_len <= UINT_MAX);
434 state->stream.next_in = (unsigned char*) *in;
435 state->stream.avail_in = (unsigned int)*in_len;
436 state->stream.next_out = (unsigned char*) *out;
437 state->stream.avail_out = (unsigned int)*out_len;
439 if (state->compress) {
440 err = deflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
441 } else {
442 err = inflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
445 state->input_so_far += state->stream.next_in - ((unsigned char*)*in);
446 state->output_so_far += state->stream.next_out - ((unsigned char*)*out);
448 *out = (char*) state->stream.next_out;
449 *out_len = state->stream.avail_out;
450 *in = (const char *) state->stream.next_in;
451 *in_len = state->stream.avail_in;
453 if (! state->compress &&
454 is_compression_bomb(state->input_so_far, state->output_so_far)) {
455 log_warn(LD_DIR, "Possible zlib bomb; abandoning stream.");
456 return TOR_ZLIB_ERR;
459 switch (err)
461 case Z_STREAM_END:
462 return TOR_ZLIB_DONE;
463 case Z_BUF_ERROR:
464 if (state->stream.avail_in == 0)
465 return TOR_ZLIB_OK;
466 return TOR_ZLIB_BUF_FULL;
467 case Z_OK:
468 if (state->stream.avail_out == 0 || finish)
469 return TOR_ZLIB_BUF_FULL;
470 return TOR_ZLIB_OK;
471 default:
472 log_warn(LD_GENERAL, "Gzip returned an error: %s",
473 state->stream.msg ? state->stream.msg : "<no message>");
474 return TOR_ZLIB_ERR;
478 /** Deallocate <b>state</b>. */
479 void
480 tor_zlib_free(tor_zlib_state_t *state)
482 tor_assert(state);
484 if (state->compress)
485 deflateEnd(&state->stream);
486 else
487 inflateEnd(&state->stream);
489 tor_free(state);