1 /* Copyright (c) 2004, Roger Dingledine.
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2011, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
8 * \brief A simple in-memory gzip implementation.
17 #include "..\..\contrib\zlib\zlib.h"
22 #ifdef HAVE_NETINET_IN_H
23 #include <netinet/in.h>
30 /** Set to 1 if zlib is a version that supports gzip; set to 0 if it doesn't;
31 * set to -1 if we haven't checked yet. */
32 static int gzip_is_supported
= -1;
34 /** Return true iff we support gzip-based compression. Otherwise, we need to
37 is_gzip_supported(void)
39 if (gzip_is_supported
>= 0)
40 return gzip_is_supported
;
42 if (!strcmpstart(ZLIB_VERSION
, "0.") ||
43 !strcmpstart(ZLIB_VERSION
, "1.0") ||
44 !strcmpstart(ZLIB_VERSION
, "1.1"))
45 gzip_is_supported
= 0;
47 gzip_is_supported
= 1;
49 return gzip_is_supported
;
52 /** Return the 'bits' value to tell zlib to use <b>method</b>.*/
54 method_bits(compress_method_t method
)
56 /* Bits+16 means "use gzip" in zlib >= 1.2 */
57 return method
== GZIP_METHOD
? 15+16 : 15;
60 /* These macros define the maximum allowable compression factor. Anything of
61 * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
62 * have an uncompression factor (uncompressed size:compressed size ratio) of
63 * any greater than MAX_UNCOMPRESSION_FACTOR.
65 * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
66 * be small to limit the attack multiplier, but we also want it to be large
67 * enough so that no legitimate document --even ones we might invent in the
68 * future -- ever compresses by a factor of greater than
69 * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
70 * large range of possible values. IMO, anything over 8 is probably safe; IMO
71 * anything under 50 is probably sufficient.
73 #define MAX_UNCOMPRESSION_FACTOR 25
74 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
76 /** Return true if uncompressing an input of size <b>in_size</b> to an input
77 * of size at least <b>size_out</b> looks like a compression bomb. */
79 is_compression_bomb(size_t size_in
, size_t size_out
)
81 if (size_in
== 0 || size_out
< CHECK_FOR_COMPRESSION_BOMB_AFTER
)
84 return (size_out
/ size_in
> MAX_UNCOMPRESSION_FACTOR
);
87 /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
88 * allocated buffer, using the method described in <b>method</b>. Store the
89 * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
90 * Return 0 on success, -1 on failure.
93 tor_gzip_compress(char **out
, size_t *out_len
,
94 const char *in
, size_t in_len
,
95 compress_method_t method
)
97 struct z_stream_s
*stream
= NULL
;
98 size_t out_size
, old_size
;
104 tor_assert(in_len
< UINT_MAX
);
108 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
109 /* Old zlib version don't support gzip in deflateInit2 */
110 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
114 stream
= tor_malloc_zero(sizeof(struct z_stream_s
));
115 stream
->zalloc
= Z_NULL
;
116 stream
->zfree
= Z_NULL
;
117 stream
->opaque
= NULL
;
118 stream
->next_in
= (unsigned char*) in
;
119 stream
->avail_in
= (unsigned int)in_len
;
121 if (deflateInit2(stream
, Z_BEST_COMPRESSION
, Z_DEFLATED
,
123 8, Z_DEFAULT_STRATEGY
) != Z_OK
) {
124 log_warn(LD_GENERAL
, "Error from deflateInit2: %s",
125 stream
->msg
?stream
->msg
:"<no message>");
129 /* Guess 50% compression. */
130 out_size
= in_len
/ 2;
131 if (out_size
< 1024) out_size
= 1024;
132 *out
= tor_malloc(out_size
);
133 stream
->next_out
= (unsigned char*)*out
;
134 stream
->avail_out
= (unsigned int)out_size
;
137 switch (deflate(stream
, Z_FINISH
))
142 /* In case zlib doesn't work as I think .... */
143 if (stream
->avail_out
>= stream
->avail_in
+16)
146 offset
= stream
->next_out
- ((unsigned char*)*out
);
149 if (out_size
< old_size
) {
150 log_warn(LD_GENERAL
, "Size overflow in compression.");
153 *out
= tor_realloc(*out
, out_size
);
154 stream
->next_out
= (unsigned char*)(*out
+ offset
);
155 if (out_size
- offset
> UINT_MAX
) {
156 log_warn(LD_BUG
, "Ran over unsigned int limit of zlib while "
160 stream
->avail_out
= (unsigned int)(out_size
- offset
);
163 log_warn(LD_GENERAL
, "Gzip compression didn't finish: %s",
164 stream
->msg
? stream
->msg
: "<no message>");
169 *out_len
= stream
->total_out
;
171 /* "Hey Rocky! Watch me change an unsigned field to a signed field in a
173 * "Oh, that trick will just make people do unsafe casts to the unsigned
174 * type in their cross-platform code!"
175 * "Don't be foolish. I'm _sure_ they'll have the good sense to make sure
176 * the newly unsigned field isn't negative." */
177 tor_assert(stream
->total_out
>= 0);
179 if (((size_t)stream
->total_out
) > out_size
+ 4097) {
180 /* If we're wasting more than 4k, don't. */
181 *out
= tor_realloc(*out
, stream
->total_out
+ 1);
183 if (deflateEnd(stream
)!=Z_OK
) {
184 log_warn(LD_BUG
, "Error freeing gzip structures");
189 if (is_compression_bomb(*out_len
, in_len
)) {
190 log_warn(LD_BUG
, "We compressed something and got an insanely high "
191 "compression factor; other Tors would think this was a zlib bomb.");
207 /** Given zero or more zlib-compressed or gzip-compressed strings of
209 * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
210 * buffer, using the method described in <b>method</b>. Store the uncompressed
211 * string in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on
212 * success, -1 on failure.
214 * If <b>complete_only</b> is true, we consider a truncated input as a
215 * failure; otherwise we decompress as much as we can. Warn about truncated
216 * or corrupt inputs at <b>protocol_warn_level</b>.
219 tor_gzip_uncompress(char **out
, size_t *out_len
,
220 const char *in
, size_t in_len
,
221 compress_method_t method
,
223 int protocol_warn_level
)
225 struct z_stream_s
*stream
= NULL
;
226 size_t out_size
, old_size
;
233 tor_assert(in_len
< UINT_MAX
);
235 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
236 /* Old zlib version don't support gzip in inflateInit2 */
237 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
243 stream
= tor_malloc_zero(sizeof(struct z_stream_s
));
244 stream
->zalloc
= Z_NULL
;
245 stream
->zfree
= Z_NULL
;
246 stream
->opaque
= NULL
;
247 stream
->next_in
= (unsigned char*) in
;
248 stream
->avail_in
= (unsigned int)in_len
;
250 if (inflateInit2(stream
,
251 method_bits(method
)) != Z_OK
) {
252 log_warn(LD_GENERAL
, "Error from inflateInit2: %s",
253 stream
->msg
?stream
->msg
:"<no message>");
257 out_size
= in_len
* 2; /* guess 50% compression. */
258 if (out_size
< 1024) out_size
= 1024;
259 if (out_size
>= SIZE_T_CEILING
|| out_size
> UINT_MAX
)
262 *out
= tor_malloc(out_size
);
263 stream
->next_out
= (unsigned char*)*out
;
264 stream
->avail_out
= (unsigned int)out_size
;
267 switch (inflate(stream
, complete_only
? Z_FINISH
: Z_SYNC_FLUSH
))
270 if (stream
->avail_in
== 0)
272 /* There may be more compressed data here. */
273 if ((r
= inflateEnd(stream
)) != Z_OK
) {
274 log_warn(LD_BUG
, "Error freeing gzip structures");
277 if (inflateInit2(stream
, method_bits(method
)) != Z_OK
) {
278 log_warn(LD_GENERAL
, "Error from second inflateInit2: %s",
279 stream
->msg
?stream
->msg
:"<no message>");
284 if (!complete_only
&& stream
->avail_in
== 0)
286 /* In case zlib doesn't work as I think.... */
287 if (stream
->avail_out
>= stream
->avail_in
+16)
290 if (stream
->avail_out
> 0) {
291 log_fn(protocol_warn_level
, LD_PROTOCOL
,
292 "possible truncated or corrupt zlib data");
295 offset
= stream
->next_out
- (unsigned char*)*out
;
298 if (out_size
< old_size
) {
299 log_warn(LD_GENERAL
, "Size overflow in uncompression.");
302 if (is_compression_bomb(in_len
, out_size
)) {
303 log_warn(LD_GENERAL
, "Input looks like a possible zlib bomb; "
307 if (out_size
>= SIZE_T_CEILING
) {
308 log_warn(LD_BUG
, "Hit SIZE_T_CEILING limit while uncompressing.");
311 *out
= tor_realloc(*out
, out_size
);
312 stream
->next_out
= (unsigned char*)(*out
+ offset
);
313 if (out_size
- offset
> UINT_MAX
) {
314 log_warn(LD_BUG
, "Ran over unsigned int limit of zlib while "
318 stream
->avail_out
= (unsigned int)(out_size
- offset
);
321 log_warn(LD_GENERAL
, "Gzip decompression returned an error: %s",
322 stream
->msg
? stream
->msg
: "<no message>");
327 *out_len
= stream
->next_out
- (unsigned char*)*out
;
328 r
= inflateEnd(stream
);
331 log_warn(LD_BUG
, "Error freeing gzip structures");
335 /* NUL-terminate output. */
336 if (out_size
== *out_len
)
337 *out
= tor_realloc(*out
, out_size
+ 1);
338 (*out
)[*out_len
] = '\0';
352 /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
353 * to be compressed or not. If it is, return the likeliest compression method.
354 * Otherwise, return UNKNOWN_METHOD.
357 detect_compression_method(const char *in
, size_t in_len
)
359 if (in_len
> 2 && !memcmp(in
, "\x1f\x8b", 2)) {
361 } else if (in_len
> 2 && (in
[0] & 0x0f) == 8 &&
362 (ntohs(get_uint16(in
)) % 31) == 0) {
365 return UNKNOWN_METHOD
;
369 /** Internal state for an incremental zlib compression/decompression. The
370 * body of this struct is not exposed. */
371 struct tor_zlib_state_t
{
372 struct z_stream_s stream
;
375 /* Number of bytes read so far. Used to detect zlib bombs. */
377 /* Number of bytes written so far. Used to detect zlib bombs. */
378 size_t output_so_far
;
381 /** Construct and return a tor_zlib_state_t object using <b>method</b>. If
382 * <b>compress</b>, it's for compression; otherwise it's for
385 tor_zlib_new(int compress
, compress_method_t method
)
387 tor_zlib_state_t
*out
;
389 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
390 /* Old zlib version don't support gzip in inflateInit2 */
391 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
395 out
= tor_malloc_zero(sizeof(tor_zlib_state_t
));
396 out
->stream
.zalloc
= Z_NULL
;
397 out
->stream
.zfree
= Z_NULL
;
398 out
->stream
.opaque
= NULL
;
399 out
->compress
= compress
;
401 if (deflateInit2(&out
->stream
, Z_BEST_COMPRESSION
, Z_DEFLATED
,
402 method_bits(method
), 8, Z_DEFAULT_STRATEGY
) != Z_OK
)
405 if (inflateInit2(&out
->stream
, method_bits(method
)) != Z_OK
)
415 /** Compress/decompress some bytes using <b>state</b>. Read up to
416 * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
417 * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true,
418 * we've reached the end of the input.
420 * Return TOR_ZLIB_DONE if we've finished the entire compression/decompression.
421 * Return TOR_ZLIB_OK if we're processed everything from the input.
422 * Return TOR_ZLIB_BUF_FULL if we're out of space on <b>out</b>.
423 * Return TOR_ZLIB_ERR if the stream is corrupt.
426 tor_zlib_process(tor_zlib_state_t
*state
,
427 char **out
, size_t *out_len
,
428 const char **in
, size_t *in_len
,
432 tor_assert(*in_len
<= UINT_MAX
);
433 tor_assert(*out_len
<= UINT_MAX
);
434 state
->stream
.next_in
= (unsigned char*) *in
;
435 state
->stream
.avail_in
= (unsigned int)*in_len
;
436 state
->stream
.next_out
= (unsigned char*) *out
;
437 state
->stream
.avail_out
= (unsigned int)*out_len
;
439 if (state
->compress
) {
440 err
= deflate(&state
->stream
, finish
? Z_FINISH
: Z_SYNC_FLUSH
);
442 err
= inflate(&state
->stream
, finish
? Z_FINISH
: Z_SYNC_FLUSH
);
445 state
->input_so_far
+= state
->stream
.next_in
- ((unsigned char*)*in
);
446 state
->output_so_far
+= state
->stream
.next_out
- ((unsigned char*)*out
);
448 *out
= (char*) state
->stream
.next_out
;
449 *out_len
= state
->stream
.avail_out
;
450 *in
= (const char *) state
->stream
.next_in
;
451 *in_len
= state
->stream
.avail_in
;
453 if (! state
->compress
&&
454 is_compression_bomb(state
->input_so_far
, state
->output_so_far
)) {
455 log_warn(LD_DIR
, "Possible zlib bomb; abandoning stream.");
462 return TOR_ZLIB_DONE
;
464 if (state
->stream
.avail_in
== 0)
466 return TOR_ZLIB_BUF_FULL
;
468 if (state
->stream
.avail_out
== 0 || finish
)
469 return TOR_ZLIB_BUF_FULL
;
472 log_warn(LD_GENERAL
, "Gzip returned an error: %s",
473 state
->stream
.msg
? state
->stream
.msg
: "<no message>");
478 /** Deallocate <b>state</b>. */
480 tor_zlib_free(tor_zlib_state_t
*state
)
485 deflateEnd(&state
->stream
);
487 inflateEnd(&state
->stream
);