1 /* Copyright (c) 2004, Roger Dingledine.
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2013, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
8 * \brief A simple in-memory gzip implementation.
19 #ifdef HAVE_NETINET_IN_H
20 #include <netinet/in.h>
27 /* zlib 1.2.4 and 1.2.5 do some "clever" things with macros. Instead of
28 saying "(defined(FOO) ? FOO : 0)" they like to say "FOO-0", on the theory
29 that nobody will care if the compile outputs a no-such-identifier warning.
31 Sorry, but we like -Werror over here, so I guess we need to define these.
32 I hope that zlib 1.2.6 doesn't break these too.
34 #ifndef _LARGEFILE64_SOURCE
35 #define _LARGEFILE64_SOURCE 0
37 #ifndef _LFS64_LARGEFILE
38 #define _LFS64_LARGEFILE 0
40 #ifndef _FILE_OFFSET_BITS
41 #define _FILE_OFFSET_BITS 0
44 #define off64_t int64_t
49 /** Set to 1 if zlib is a version that supports gzip; set to 0 if it doesn't;
50 * set to -1 if we haven't checked yet. */
51 static int gzip_is_supported
= -1;
53 /** Return true iff we support gzip-based compression. Otherwise, we need to
56 is_gzip_supported(void)
58 if (gzip_is_supported
>= 0)
59 return gzip_is_supported
;
61 if (!strcmpstart(ZLIB_VERSION
, "0.") ||
62 !strcmpstart(ZLIB_VERSION
, "1.0") ||
63 !strcmpstart(ZLIB_VERSION
, "1.1"))
64 gzip_is_supported
= 0;
66 gzip_is_supported
= 1;
68 return gzip_is_supported
;
71 /** Return a string representation of the version of the currently running
74 tor_zlib_get_version_str(void)
79 /** Return a string representation of the version of the version of zlib
80 * used at compilation. */
82 tor_zlib_get_header_version_str(void)
87 /** Return the 'bits' value to tell zlib to use <b>method</b>.*/
89 method_bits(compress_method_t method
)
91 /* Bits+16 means "use gzip" in zlib >= 1.2 */
92 return method
== GZIP_METHOD
? 15+16 : 15;
96 /* These macros define the maximum allowable compression factor. Anything of
97 * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
98 * have an uncompression factor (uncompressed size:compressed size ratio) of
99 * any greater than MAX_UNCOMPRESSION_FACTOR.
101 * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
102 * be small to limit the attack multiplier, but we also want it to be large
103 * enough so that no legitimate document --even ones we might invent in the
104 * future -- ever compresses by a factor of greater than
105 * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
106 * large range of possible values. IMO, anything over 8 is probably safe; IMO
107 * anything under 50 is probably sufficient.
109 #define MAX_UNCOMPRESSION_FACTOR 25
110 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
113 /** Return true if uncompressing an input of size <b>in_size</b> to an input
114 * of size at least <b>size_out</b> looks like a compression bomb. */
116 is_compression_bomb(size_t size_in
, size_t size_out
)
118 if (size_in
== 0 || size_out
< CHECK_FOR_COMPRESSION_BOMB_AFTER
)
121 return (size_out
/ size_in
> MAX_UNCOMPRESSION_FACTOR
);
124 /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
125 * allocated buffer, using the method described in <b>method</b>. Store the
126 * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
127 * Return 0 on success, -1 on failure.
130 tor_gzip_compress(char **out
, size_t *out_len
,
131 const char *in
, size_t in_len
,
132 compress_method_t method
)
134 struct z_stream_s
*stream
= NULL
;
135 size_t out_size
, old_size
;
141 tor_assert(in_len
< UINT_MAX
);
145 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
146 /* Old zlib version don't support gzip in deflateInit2 */
147 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
151 stream
= tor_malloc_zero(sizeof(struct z_stream_s
));
152 stream
->zalloc
= Z_NULL
;
153 stream
->zfree
= Z_NULL
;
154 stream
->opaque
= NULL
;
155 stream
->next_in
= (unsigned char*) in
;
156 stream
->avail_in
= (unsigned int)in_len
;
158 if (deflateInit2(stream
, Z_BEST_COMPRESSION
, Z_DEFLATED
,
160 8, Z_DEFAULT_STRATEGY
) != Z_OK
) {
161 log_warn(LD_GENERAL
, "Error from deflateInit2: %s",
162 stream
->msg
?stream
->msg
:"<no message>");
166 /* Guess 50% compression. */
167 out_size
= in_len
/ 2;
168 if (out_size
< 1024) out_size
= 1024;
169 *out
= tor_malloc(out_size
);
170 stream
->next_out
= (unsigned char*)*out
;
171 stream
->avail_out
= (unsigned int)out_size
;
174 switch (deflate(stream
, Z_FINISH
))
179 /* In case zlib doesn't work as I think .... */
180 if (stream
->avail_out
>= stream
->avail_in
+16)
183 offset
= stream
->next_out
- ((unsigned char*)*out
);
186 if (out_size
< old_size
) {
187 log_warn(LD_GENERAL
, "Size overflow in compression.");
190 *out
= tor_realloc(*out
, out_size
);
191 stream
->next_out
= (unsigned char*)(*out
+ offset
);
192 if (out_size
- offset
> UINT_MAX
) {
193 log_warn(LD_BUG
, "Ran over unsigned int limit of zlib while "
197 stream
->avail_out
= (unsigned int)(out_size
- offset
);
200 log_warn(LD_GENERAL
, "Gzip compression didn't finish: %s",
201 stream
->msg
? stream
->msg
: "<no message>");
206 *out_len
= stream
->total_out
;
208 /* "Hey Rocky! Watch me change an unsigned field to a signed field in a
210 * "Oh, that trick will just make people do unsafe casts to the unsigned
211 * type in their cross-platform code!"
212 * "Don't be foolish. I'm _sure_ they'll have the good sense to make sure
213 * the newly unsigned field isn't negative." */
214 tor_assert(stream
->total_out
>= 0);
216 if (((size_t)stream
->total_out
) > out_size
+ 4097) {
217 /* If we're wasting more than 4k, don't. */
218 *out
= tor_realloc(*out
, stream
->total_out
+ 1);
220 if (deflateEnd(stream
)!=Z_OK
) {
221 log_warn(LD_BUG
, "Error freeing gzip structures");
226 if (is_compression_bomb(*out_len
, in_len
)) {
227 log_warn(LD_BUG
, "We compressed something and got an insanely high "
228 "compression factor; other Tors would think this was a zlib bomb.");
242 /** Given zero or more zlib-compressed or gzip-compressed strings of
244 * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
245 * buffer, using the method described in <b>method</b>. Store the uncompressed
246 * string in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on
247 * success, -1 on failure.
249 * If <b>complete_only</b> is true, we consider a truncated input as a
250 * failure; otherwise we decompress as much as we can. Warn about truncated
251 * or corrupt inputs at <b>protocol_warn_level</b>.
254 tor_gzip_uncompress(char **out
, size_t *out_len
,
255 const char *in
, size_t in_len
,
256 compress_method_t method
,
258 int protocol_warn_level
)
260 struct z_stream_s
*stream
= NULL
;
261 size_t out_size
, old_size
;
268 tor_assert(in_len
< UINT_MAX
);
270 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
271 /* Old zlib version don't support gzip in inflateInit2 */
272 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
278 stream
= tor_malloc_zero(sizeof(struct z_stream_s
));
279 stream
->zalloc
= Z_NULL
;
280 stream
->zfree
= Z_NULL
;
281 stream
->opaque
= NULL
;
282 stream
->next_in
= (unsigned char*) in
;
283 stream
->avail_in
= (unsigned int)in_len
;
285 if (inflateInit2(stream
,
286 method_bits(method
)) != Z_OK
) {
287 log_warn(LD_GENERAL
, "Error from inflateInit2: %s",
288 stream
->msg
?stream
->msg
:"<no message>");
292 out_size
= in_len
* 2; /* guess 50% compression. */
293 if (out_size
< 1024) out_size
= 1024;
294 if (out_size
>= SIZE_T_CEILING
|| out_size
> UINT_MAX
)
297 *out
= tor_malloc(out_size
);
298 stream
->next_out
= (unsigned char*)*out
;
299 stream
->avail_out
= (unsigned int)out_size
;
302 switch (inflate(stream
, complete_only
? Z_FINISH
: Z_SYNC_FLUSH
))
305 if (stream
->avail_in
== 0)
307 /* There may be more compressed data here. */
308 if ((r
= inflateEnd(stream
)) != Z_OK
) {
309 log_warn(LD_BUG
, "Error freeing gzip structures");
312 if (inflateInit2(stream
, method_bits(method
)) != Z_OK
) {
313 log_warn(LD_GENERAL
, "Error from second inflateInit2: %s",
314 stream
->msg
?stream
->msg
:"<no message>");
319 if (!complete_only
&& stream
->avail_in
== 0)
321 /* In case zlib doesn't work as I think.... */
322 if (stream
->avail_out
>= stream
->avail_in
+16)
325 if (stream
->avail_out
> 0) {
326 log_fn(protocol_warn_level
, LD_PROTOCOL
,
327 "possible truncated or corrupt zlib data");
330 offset
= stream
->next_out
- (unsigned char*)*out
;
333 if (out_size
< old_size
) {
334 log_warn(LD_GENERAL
, "Size overflow in uncompression.");
337 if (is_compression_bomb(in_len
, out_size
)) {
338 log_warn(LD_GENERAL
, "Input looks like a possible zlib bomb; "
342 if (out_size
>= SIZE_T_CEILING
) {
343 log_warn(LD_BUG
, "Hit SIZE_T_CEILING limit while uncompressing.");
346 *out
= tor_realloc(*out
, out_size
);
347 stream
->next_out
= (unsigned char*)(*out
+ offset
);
348 if (out_size
- offset
> UINT_MAX
) {
349 log_warn(LD_BUG
, "Ran over unsigned int limit of zlib while "
353 stream
->avail_out
= (unsigned int)(out_size
- offset
);
356 log_warn(LD_GENERAL
, "Gzip decompression returned an error: %s",
357 stream
->msg
? stream
->msg
: "<no message>");
362 *out_len
= stream
->next_out
- (unsigned char*)*out
;
363 r
= inflateEnd(stream
);
366 log_warn(LD_BUG
, "Error freeing gzip structures");
370 /* NUL-terminate output. */
371 if (out_size
== *out_len
)
372 *out
= tor_realloc(*out
, out_size
+ 1);
373 (*out
)[*out_len
] = '\0';
387 /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
388 * to be compressed or not. If it is, return the likeliest compression method.
389 * Otherwise, return UNKNOWN_METHOD.
392 detect_compression_method(const char *in
, size_t in_len
)
394 if (in_len
> 2 && fast_memeq(in
, "\x1f\x8b", 2)) {
396 } else if (in_len
> 2 && (in
[0] & 0x0f) == 8 &&
397 (ntohs(get_uint16(in
)) % 31) == 0) {
400 return UNKNOWN_METHOD
;
404 /** Internal state for an incremental zlib compression/decompression. The
405 * body of this struct is not exposed. */
406 struct tor_zlib_state_t
{
407 struct z_stream_s stream
; /**< The zlib stream */
408 int compress
; /**< True if we are compressing; false if we are inflating */
410 /** Number of bytes read so far. Used to detect zlib bombs. */
412 /** Number of bytes written so far. Used to detect zlib bombs. */
413 size_t output_so_far
;
416 /** Construct and return a tor_zlib_state_t object using <b>method</b>. If
417 * <b>compress</b>, it's for compression; otherwise it's for
420 tor_zlib_new(int compress
, compress_method_t method
)
422 tor_zlib_state_t
*out
;
424 if (method
== GZIP_METHOD
&& !is_gzip_supported()) {
425 /* Old zlib version don't support gzip in inflateInit2 */
426 log_warn(LD_BUG
, "Gzip not supported with zlib %s", ZLIB_VERSION
);
430 out
= tor_malloc_zero(sizeof(tor_zlib_state_t
));
431 out
->stream
.zalloc
= Z_NULL
;
432 out
->stream
.zfree
= Z_NULL
;
433 out
->stream
.opaque
= NULL
;
434 out
->compress
= compress
;
436 if (deflateInit2(&out
->stream
, Z_BEST_COMPRESSION
, Z_DEFLATED
,
437 method_bits(method
), 8, Z_DEFAULT_STRATEGY
) != Z_OK
)
440 if (inflateInit2(&out
->stream
, method_bits(method
)) != Z_OK
)
450 /** Compress/decompress some bytes using <b>state</b>. Read up to
451 * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
452 * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true,
453 * we've reached the end of the input.
455 * Return TOR_ZLIB_DONE if we've finished the entire compression/decompression.
456 * Return TOR_ZLIB_OK if we're processed everything from the input.
457 * Return TOR_ZLIB_BUF_FULL if we're out of space on <b>out</b>.
458 * Return TOR_ZLIB_ERR if the stream is corrupt.
461 tor_zlib_process(tor_zlib_state_t
*state
,
462 char **out
, size_t *out_len
,
463 const char **in
, size_t *in_len
,
467 tor_assert(*in_len
<= UINT_MAX
);
468 tor_assert(*out_len
<= UINT_MAX
);
469 state
->stream
.next_in
= (unsigned char*) *in
;
470 state
->stream
.avail_in
= (unsigned int)*in_len
;
471 state
->stream
.next_out
= (unsigned char*) *out
;
472 state
->stream
.avail_out
= (unsigned int)*out_len
;
474 if (state
->compress
) {
475 err
= deflate(&state
->stream
, finish
? Z_FINISH
: Z_SYNC_FLUSH
);
477 err
= inflate(&state
->stream
, finish
? Z_FINISH
: Z_SYNC_FLUSH
);
480 state
->input_so_far
+= state
->stream
.next_in
- ((unsigned char*)*in
);
481 state
->output_so_far
+= state
->stream
.next_out
- ((unsigned char*)*out
);
483 *out
= (char*) state
->stream
.next_out
;
484 *out_len
= state
->stream
.avail_out
;
485 *in
= (const char *) state
->stream
.next_in
;
486 *in_len
= state
->stream
.avail_in
;
488 if (! state
->compress
&&
489 is_compression_bomb(state
->input_so_far
, state
->output_so_far
)) {
490 log_warn(LD_DIR
, "Possible zlib bomb; abandoning stream.");
497 return TOR_ZLIB_DONE
;
499 if (state
->stream
.avail_in
== 0)
501 return TOR_ZLIB_BUF_FULL
;
503 if (state
->stream
.avail_out
== 0 || finish
)
504 return TOR_ZLIB_BUF_FULL
;
507 log_warn(LD_GENERAL
, "Gzip returned an error: %s",
508 state
->stream
.msg
? state
->stream
.msg
: "<no message>");
513 /** Deallocate <b>state</b>. */
515 tor_zlib_free(tor_zlib_state_t
*state
)
521 deflateEnd(&state
->stream
);
523 inflateEnd(&state
->stream
);