2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/util/compression.h"
20 #include "hphp/util/exception.h"
21 #include "hphp/util/logger.h"
23 #define PHP_ZLIB_MODIFIER 1000
24 #define GZIP_HEADER_LENGTH 10
25 #define GZIP_FOOTER_LENGTH 8
29 static const int gz_magic
[2] = {0x1f, 0x8b}; /* gzip magic header */
30 ///////////////////////////////////////////////////////////////////////////////
31 // This check_header() function is copied from zlib 1.2.3 and re-factored to
32 // work with in-memory buffers (than file streams).
34 /* zlib.h -- interface of the 'zlib' general purpose compression library
35 version 1.2.3, July 18th, 2005
37 Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
39 This software is provided 'as-is', without any express or implied
40 warranty. In no event will the authors be held liable for any damages
41 arising from the use of this software.
43 Permission is granted to anyone to use this software for any purpose,
44 including commercial applications, and to alter it and redistribute it
45 freely, subject to the following restrictions:
47 1. The origin of this software must not be misrepresented; you must not
48 claim that you wrote the original software. If you use this software
49 in a product, an acknowledgment in the product documentation would be
50 appreciated but is not required.
51 2. Altered source versions must be plainly marked as such, and must not be
52 misrepresented as being the original software.
53 3. This notice may not be removed or altered from any source distribution.
55 Jean-loup Gailly Mark Adler
56 jloup@gzip.org madler@alumni.caltech.edu
59 The data format used by the zlib library is described by RFCs (Request for
60 Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
61 (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
65 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
66 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
67 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
68 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */
69 #define COMMENT 0x10 /* bit 4 set: file comment present */
70 #define RESERVED 0xE0 /* bits 5..7: reserved */
72 /* ===========================================================================
73 Read a byte from a gz_stream; update next_in and avail_in. Return EOF
75 IN assertion: the stream s has been successfully opened for reading.
77 static int get_byte(z_stream
&stream
) {
78 if (stream
.avail_in
== 0) {
82 return *(stream
.next_in
)++;
85 /* ===========================================================================
86 Check the gzip header of a gz_stream opened for reading. Set the stream
87 mode to transparent if the gzip magic header is not present; set s->err
88 to Z_DATA_ERROR if the magic header is present but the rest of the header
90 IN assertion: the stream s has already been created successfully;
91 s->stream.avail_in is zero for the first time, but may be non-zero
92 for concatenated .gz files.
94 static int check_header(z_stream
&stream
) {
95 int method
; /* method byte */
96 int flags
; /* flags byte */
100 /* Assure two bytes in the buffer so we can peek ahead -- handle case
101 where first byte of header is at the end of the buffer after the last
103 len
= stream
.avail_in
;
108 /* Peek ahead to check the gzip magic header */
109 if (stream
.next_in
[0] != gz_magic
[0] || stream
.next_in
[1] != gz_magic
[1]) {
112 stream
.avail_in
-= 2;
115 /* Check the rest of the gzip header */
116 method
= get_byte(stream
);
117 flags
= get_byte(stream
);
118 if (method
!= Z_DEFLATED
|| (flags
& RESERVED
) != 0) {
122 /* Discard time, xflags and OS code: */
123 for (len
= 0; len
< 6; len
++) (void)get_byte(stream
);
125 if ((flags
& EXTRA_FIELD
) != 0) { /* skip the extra field */
126 len
= (uInt
)get_byte(stream
);
127 len
+= ((uInt
)get_byte(stream
))<<8;
128 /* len is garbage if EOF but the loop below will quit anyway */
129 while (len
-- != 0 && get_byte(stream
) != EOF
) ;
131 if ((flags
& ORIG_NAME
) != 0) { /* skip the original file name */
132 while ((c
= get_byte(stream
)) != 0 && c
!= EOF
) ;
134 if ((flags
& COMMENT
) != 0) { /* skip the .gz file comment */
135 while ((c
= get_byte(stream
)) != 0 && c
!= EOF
) ;
137 if ((flags
& HEAD_CRC
) != 0) { /* skip the header crc */
138 for (len
= 0; len
< 2; len
++) (void)get_byte(stream
);
140 return stream
.avail_in
== 0 ? Z_DATA_ERROR
: Z_OK
;
143 ///////////////////////////////////////////////////////////////////////////////
145 bool is_compressible_file(const char *filename
) {
146 static const char *ext
[] = {
147 "gif", "png", "jpeg", "jpg", "tiff", "swf", "zip", "gz", "bz2", "cab",
148 "bmp", "xcf", "mp3", "wav", "rsrc", "ico", "jar", "exe", "dll", "so",
150 const char *dot
= nullptr;
151 for (const char *p
= filename
; *p
; p
++) {
152 if (*p
== '.') dot
= p
;
156 for (unsigned int i
= 0; i
< sizeof(ext
)/sizeof(ext
[0]); i
++) {
157 if (strcmp(dot
, ext
[i
]) == 0) {
165 ///////////////////////////////////////////////////////////////////////////////
168 StreamCompressor::StreamCompressor(int level
, int encoding_mode
, bool header
)
169 : m_encoding(encoding_mode
), m_header(header
),
171 if (level
< -1 || level
> 9) {
172 throw Exception("compression level(%d) must be within -1..9", level
);
174 if (encoding_mode
!= CODING_GZIP
&& encoding_mode
!= CODING_DEFLATE
) {
175 throw Exception("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
178 m_stream
.zalloc
= Z_NULL
;
179 m_stream
.zfree
= Z_NULL
;
180 m_stream
.opaque
= Z_NULL
;
181 m_stream
.total_in
= 0;
182 m_stream
.next_in
= Z_NULL
;
183 m_stream
.avail_in
= 0;
184 m_stream
.avail_out
= 0;
185 m_stream
.next_out
= Z_NULL
;
187 m_crc
= crc32(0L, Z_NULL
, 0);
190 switch (encoding_mode
) {
192 /* windowBits is passed < 0 to suppress zlib header & trailer */
193 if ((status
= deflateInit2(&m_stream
, level
, Z_DEFLATED
, -MAX_WBITS
,
194 MAX_MEM_LEVEL
, Z_DEFAULT_STRATEGY
)) != Z_OK
) {
195 throw Exception("%s", zError(status
));
199 if ((status
= deflateInit(&m_stream
, level
)) != Z_OK
) {
200 throw Exception("%s", zError(status
));
206 StreamCompressor::~StreamCompressor() {
208 deflateEnd(&m_stream
);
212 char *StreamCompressor::compress(const char *data
, int &len
, bool trailer
) {
213 // middle chunks should never be zero size
214 assert(len
|| trailer
);
216 m_stream
.next_in
= (Bytef
*)data
;
217 m_stream
.avail_in
= len
;
218 m_stream
.total_out
= 0;
220 m_stream
.avail_out
= m_stream
.avail_in
+
221 (m_stream
.avail_in
/ PHP_ZLIB_MODIFIER
) + 15 + 1; /* room for \0 */
222 char *s2
= (char *)malloc
223 (m_stream
.avail_out
+ GZIP_HEADER_LENGTH
+
224 ((trailer
&& m_encoding
== CODING_GZIP
) ? GZIP_FOOTER_LENGTH
: 0));
226 /* add gzip file header */
227 bool header
= m_header
;
232 s2
[3] = s2
[4] = s2
[5] = s2
[6] = s2
[7] = s2
[8] = 0; /* time set to 0 */
233 s2
[9] = 0x03; // OS_CODE
234 m_stream
.next_out
= (Bytef
*)&(s2
[GZIP_HEADER_LENGTH
]);
235 m_header
= false; // only the 1st chunnk got it
237 m_stream
.next_out
= (Bytef
*)s2
;
240 int status
= deflate(&m_stream
, trailer
? Z_FINISH
: Z_SYNC_FLUSH
);
241 if (status
== Z_BUF_ERROR
|| status
== Z_STREAM_END
) {
242 status
= deflateEnd(&m_stream
);
245 if (status
== Z_OK
) {
247 m_crc
= crc32(m_crc
, (const Bytef
*)data
, len
);
249 int new_len
= m_stream
.total_out
+ (header
? GZIP_HEADER_LENGTH
: 0);
251 if (trailer
&& m_encoding
== CODING_GZIP
) {
252 len
+= GZIP_FOOTER_LENGTH
;
253 char *strailer
= s2
+ new_len
;
255 /* write crc & stream.total_in in LSB order */
256 strailer
[0] = (char) m_crc
& 0xFF;
257 strailer
[1] = (char) (m_crc
>> 8) & 0xFF;
258 strailer
[2] = (char) (m_crc
>> 16) & 0xFF;
259 strailer
[3] = (char) (m_crc
>> 24) & 0xFF;
260 strailer
[4] = (char) m_stream
.total_in
& 0xFF;
261 strailer
[5] = (char) (m_stream
.total_in
>> 8) & 0xFF;
262 strailer
[6] = (char) (m_stream
.total_in
>> 16) & 0xFF;
263 strailer
[7] = (char) (m_stream
.total_in
>> 24) & 0xFF;
272 Logger::Error("%s", zError(status
));
276 ///////////////////////////////////////////////////////////////////////////////
278 char *gzencode(const char *data
, int &len
, int level
, int encoding_mode
) {
279 if (level
< -1 || level
> 9) {
280 Logger::Warning("compression level(%d) must be within -1..9", level
);
284 if (encoding_mode
!= CODING_GZIP
&& encoding_mode
!= CODING_DEFLATE
) {
285 Logger::Warning("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
290 stream
.zalloc
= Z_NULL
;
291 stream
.zfree
= Z_NULL
;
292 stream
.opaque
= Z_NULL
;
294 stream
.next_in
= (Bytef
*)data
;
295 stream
.avail_in
= len
;
297 stream
.avail_out
= stream
.avail_in
+ (stream
.avail_in
/ PHP_ZLIB_MODIFIER
) +
298 15 + 1; /* room for \0 */
299 char *s2
= (char *)malloc
300 (stream
.avail_out
+ GZIP_HEADER_LENGTH
+
301 (encoding_mode
== CODING_GZIP
? GZIP_FOOTER_LENGTH
: 0));
305 /* add gzip file header */
309 s2
[3] = s2
[4] = s2
[5] = s2
[6] = s2
[7] = s2
[8] = 0; /* time set to 0 */
310 s2
[9] = 0x03; // OS_CODE
312 stream
.next_out
= (Bytef
*)&(s2
[GZIP_HEADER_LENGTH
]);
315 switch (encoding_mode
) {
317 /* windowBits is passed < 0 to suppress zlib header & trailer */
318 if ((status
= deflateInit2(&stream
, level
, Z_DEFLATED
, -MAX_WBITS
,
319 MAX_MEM_LEVEL
, Z_DEFAULT_STRATEGY
)) != Z_OK
) {
320 Logger::Warning("%s", zError(status
));
325 if ((status
= deflateInit(&stream
, level
)) != Z_OK
) {
326 Logger::Warning("%s", zError(status
));
332 status
= deflate(&stream
, Z_FINISH
);
333 if (status
!= Z_STREAM_END
) {
335 if (status
== Z_OK
) {
336 status
= Z_BUF_ERROR
;
339 status
= deflateEnd(&stream
);
342 if (status
== Z_OK
) {
345 len
= stream
.total_out
+ GZIP_HEADER_LENGTH
+
346 (encoding_mode
== CODING_GZIP
? GZIP_FOOTER_LENGTH
: 0);
347 /* resize to buffer to the "right" size */
348 s2
= (char *)realloc(s2
, len
+ 1);
350 if (encoding_mode
== CODING_GZIP
) {
351 char *trailer
= s2
+ (stream
.total_out
+ GZIP_HEADER_LENGTH
);
352 uLong crc
= crc32(0L, Z_NULL
, 0);
354 crc
= crc32(crc
, (const Bytef
*)data
, old_len
);
356 /* write crc & stream.total_in in LSB order */
357 trailer
[0] = (char) crc
& 0xFF;
358 trailer
[1] = (char) (crc
>> 8) & 0xFF;
359 trailer
[2] = (char) (crc
>> 16) & 0xFF;
360 trailer
[3] = (char) (crc
>> 24) & 0xFF;
361 trailer
[4] = (char) stream
.total_in
& 0xFF;
362 trailer
[5] = (char) (stream
.total_in
>> 8) & 0xFF;
363 trailer
[6] = (char) (stream
.total_in
>> 16) & 0xFF;
364 trailer
[7] = (char) (stream
.total_in
>> 24) & 0xFF;
373 Logger::Warning("%s", zError(status
));
377 char *gzdecode(const char *data
, int &len
) {
379 stream
.zalloc
= (alloc_func
) Z_NULL
;
380 stream
.zfree
= (free_func
) Z_NULL
;
382 unsigned long length
;
384 unsigned int factor
= 4, maxfactor
= 16;
385 char *s1
= nullptr, *s2
= nullptr;
387 stream
.next_in
= (Bytef
*)data
;
388 stream
.avail_in
= (uInt
)len
+ 1; /* there is room for \0 */
389 if (check_header(stream
) != Z_OK
) {
390 Logger::Warning("gzdecode: header is in wrong format");
394 length
= len
* (1 << factor
++);
395 s2
= (char *)realloc(s1
, length
);
402 stream
.next_out
= (Bytef
*)s2
;
403 stream
.avail_out
= (uInt
)length
;
405 /* init with -MAX_WBITS disables the zlib internal headers */
406 status
= inflateInit2(&stream
, -MAX_WBITS
);
407 if (status
== Z_OK
) {
408 status
= inflate(&stream
, Z_FINISH
);
409 if (status
!= Z_STREAM_END
) {
411 if (status
== Z_OK
) {
412 status
= Z_BUF_ERROR
;
415 status
= inflateEnd(&stream
);
418 } while (status
== Z_BUF_ERROR
&& factor
< maxfactor
);
420 if (status
== Z_OK
) {
421 len
= stream
.total_out
;
423 // shrink the buffer down to what we really need since this can be 16
424 // times greater than we actually need.
425 s2
= (char *)realloc(s2
, len
+ 1);
432 Logger::Warning("%s", zError(status
));