declare_folded_class NO LONGER _in_file
[hiphop-php.git] / hphp / util / gzip.cpp
blob1c44df8b1f56c6f245e30205229c6512ea8cf309
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/util/gzip.h"
20 #include "hphp/util/alloc.h"
21 #include "hphp/util/exception.h"
22 #include "hphp/util/logger.h"
24 #define PHP_ZLIB_MODIFIER 1000
25 #define GZIP_HEADER_LENGTH 10
26 #define GZIP_FOOTER_LENGTH 8
28 namespace HPHP {
30 bool GzipCompressor::s_useLocalArena = false;
32 namespace {
34 void* local_zalloc(void* /* opaque */, unsigned items, unsigned size) {
35 auto const bytes = static_cast<size_t>(items) * size;
36 if (bytes == 0) return nullptr;
37 return local_malloc(bytes);
40 void local_zfree(void* /* opaque */, void* p) {
41 if (p) local_free(p);
46 static const int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
47 ///////////////////////////////////////////////////////////////////////////////
48 // This check_header() function is copied from zlib 1.2.3 and re-factored to
49 // work with in-memory buffers (than file streams).
51 /* zlib.h -- interface of the 'zlib' general purpose compression library
52 version 1.2.3, July 18th, 2005
54 Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
56 This software is provided 'as-is', without any express or implied
57 warranty. In no event will the authors be held liable for any damages
58 arising from the use of this software.
60 Permission is granted to anyone to use this software for any purpose,
61 including commercial applications, and to alter it and redistribute it
62 freely, subject to the following restrictions:
64 1. The origin of this software must not be misrepresented; you must not
65 claim that you wrote the original software. If you use this software
66 in a product, an acknowledgment in the product documentation would be
67 appreciated but is not required.
68 2. Altered source versions must be plainly marked as such, and must not be
69 misrepresented as being the original software.
70 3. This notice may not be removed or altered from any source distribution.
72 Jean-loup Gailly Mark Adler
73 jloup@gzip.org madler@alumni.caltech.edu
76 The data format used by the zlib library is described by RFCs (Request for
77 Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
78 (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
81 /* gzip flag byte */
82 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
83 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
84 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
85 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */
86 #define COMMENT 0x10 /* bit 4 set: file comment present */
87 #define RESERVED 0xE0 /* bits 5..7: reserved */
89 /* ===========================================================================
90 Read a byte from a gz_stream; update next_in and avail_in. Return EOF
91 for end of file.
92 IN assertion: the stream s has been successfully opened for reading.
94 static int get_byte(z_stream &stream) {
95 if (stream.avail_in == 0) {
96 return EOF;
98 stream.avail_in--;
99 return *(stream.next_in)++;
102 /* ===========================================================================
103 Check the gzip header of a gz_stream opened for reading. Set the stream
104 mode to transparent if the gzip magic header is not present; set s->err
105 to Z_DATA_ERROR if the magic header is present but the rest of the header
106 is incorrect.
107 IN assertion: the stream s has already been created successfully;
108 s->stream.avail_in is zero for the first time, but may be non-zero
109 for concatenated .gz files.
111 static int check_header(z_stream &stream) {
112 int method; /* method byte */
113 int flags; /* flags byte */
114 uInt len;
115 int c;
117 /* Assure two bytes in the buffer so we can peek ahead -- handle case
118 where first byte of header is at the end of the buffer after the last
119 gzip segment */
120 len = stream.avail_in;
121 if (len <= 2) {
122 return Z_DATA_ERROR;
125 /* Peek ahead to check the gzip magic header */
126 if (stream.next_in[0] != gz_magic[0] || stream.next_in[1] != gz_magic[1]) {
127 return Z_DATA_ERROR;
129 stream.avail_in -= 2;
130 stream.next_in += 2;
132 /* Check the rest of the gzip header */
133 method = get_byte(stream);
134 flags = get_byte(stream);
135 if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
136 return Z_DATA_ERROR;
139 /* Discard time, xflags and OS code: */
140 for (len = 0; len < 6; len++) (void)get_byte(stream);
142 if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
143 len = (uInt)get_byte(stream);
144 len += ((uInt)get_byte(stream))<<8;
145 /* len is garbage if EOF but the loop below will quit anyway */
146 while (len-- != 0 && get_byte(stream) != EOF) ;
148 if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
149 while ((c = get_byte(stream)) != 0 && c != EOF) ;
151 if ((flags & COMMENT) != 0) { /* skip the .gz file comment */
152 while ((c = get_byte(stream)) != 0 && c != EOF) ;
154 if ((flags & HEAD_CRC) != 0) { /* skip the header crc */
155 for (len = 0; len < 2; len++) (void)get_byte(stream);
157 return stream.avail_in == 0 ? Z_DATA_ERROR : Z_OK;
160 ///////////////////////////////////////////////////////////////////////////////
162 bool is_compressible_file(const char *filename) {
163 static const char *ext[] = {
164 "gif", "png", "jpeg", "jpg", "tiff", "swf", "zip", "gz", "bz2", "cab",
165 "bmp", "xcf", "mp3", "wav", "rsrc", "ico", "jar", "exe", "dll", "so",
167 const char *dot = nullptr;
168 for (const char *p = filename; *p; p++) {
169 if (*p == '.') dot = p;
171 if (dot) {
172 dot++;
173 for (unsigned int i = 0; i < sizeof(ext)/sizeof(ext[0]); i++) {
174 if (strcmp(dot, ext[i]) == 0) {
175 return false;
179 return true;
182 ///////////////////////////////////////////////////////////////////////////////
183 // GzipCompressor
185 GzipCompressor::GzipCompressor(int level, int encoding_mode, bool header)
186 : m_encoding(encoding_mode), m_header(header),
187 m_ended(false) {
188 if (level < -1 || level > 9) {
189 throw Exception("compression level(%d) must be within -1..9", level);
191 if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) {
192 throw Exception("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
195 if (s_useLocalArena) {
196 m_stream.zalloc = local_zalloc;
197 m_stream.zfree = local_zfree;
198 } else {
199 m_stream.zalloc = Z_NULL;
200 m_stream.zfree = Z_NULL;
202 m_stream.opaque = Z_NULL;
203 m_stream.total_in = 0;
204 m_stream.next_in = Z_NULL;
205 m_stream.avail_in = 0;
206 m_stream.avail_out = 0;
207 m_stream.next_out = Z_NULL;
209 m_crc = crc32(0L, Z_NULL, 0);
211 int status;
212 switch (encoding_mode) {
213 case CODING_GZIP:
214 /* windowBits is passed < 0 to suppress zlib header & trailer */
215 if ((status = deflateInit2(&m_stream, level, Z_DEFLATED, -MAX_WBITS,
216 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) {
217 throw Exception("%s", zError(status));
219 break;
220 case CODING_DEFLATE:
221 if ((status = deflateInit(&m_stream, level)) != Z_OK) {
222 throw Exception("%s", zError(status));
224 break;
228 GzipCompressor::~GzipCompressor() {
229 if (!m_ended) {
230 deflateEnd(&m_stream);
234 StringHolder
235 GzipCompressor::compress(const char *data, int &len, bool trailer) {
236 // middle chunks should never be zero size
237 assert(len || trailer);
239 m_stream.next_in = (Bytef *)data;
240 m_stream.avail_in = len;
241 m_stream.total_out = 0;
243 m_stream.avail_out = m_stream.avail_in +
244 (m_stream.avail_in / PHP_ZLIB_MODIFIER) + 15 + 1; /* room for \0 */
245 char *s2;
246 auto const allocSize = m_stream.avail_out + GZIP_HEADER_LENGTH +
247 ((trailer && m_encoding == CODING_GZIP) ? GZIP_FOOTER_LENGTH : 0);
248 if (s_useLocalArena) {
249 s2 = (char *)local_malloc(allocSize);
250 } else {
251 s2 = (char *)malloc(allocSize);
254 /* add gzip file header */
255 bool header = m_header;
256 if (header) {
257 s2[0] = (char)gz_magic[0];
258 s2[1] = (char)gz_magic[1];
259 s2[2] = Z_DEFLATED;
260 s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */
261 s2[9] = 0x03; // OS_CODE
262 m_stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]);
263 m_header = false; // only the 1st chunk got it
264 } else {
265 m_stream.next_out = (Bytef*)s2;
268 int status = deflate(&m_stream, trailer ? Z_FINISH : Z_SYNC_FLUSH);
269 if (status == Z_BUF_ERROR || status == Z_STREAM_END) {
270 status = deflateEnd(&m_stream);
271 m_ended = true;
273 if (status == Z_OK) {
274 if (len) {
275 m_crc = crc32(m_crc, (const Bytef *)data, len);
277 int new_len = m_stream.total_out + (header ? GZIP_HEADER_LENGTH : 0);
278 len = new_len;
279 if (trailer && m_encoding == CODING_GZIP) {
280 len += GZIP_FOOTER_LENGTH;
281 char *strailer = s2 + new_len;
283 /* write crc & stream.total_in in LSB order */
284 strailer[0] = (char) m_crc & 0xFF;
285 strailer[1] = (char) (m_crc >> 8) & 0xFF;
286 strailer[2] = (char) (m_crc >> 16) & 0xFF;
287 strailer[3] = (char) (m_crc >> 24) & 0xFF;
288 strailer[4] = (char) m_stream.total_in & 0xFF;
289 strailer[5] = (char) (m_stream.total_in >> 8) & 0xFF;
290 strailer[6] = (char) (m_stream.total_in >> 16) & 0xFF;
291 strailer[7] = (char) (m_stream.total_in >> 24) & 0xFF;
292 strailer[8] = '\0';
293 } else {
294 s2[len] = '\0';
296 return StringHolder(s2, len, s_useLocalArena ? FreeType::LocalFree
297 : FreeType::Free);
299 if (s_useLocalArena) {
300 local_free(s2);
301 } else {
302 free(s2);
304 Logger::Error("%s", zError(status));
305 return nullptr;
308 ///////////////////////////////////////////////////////////////////////////////
310 char *gzencode(const char *data, int &len, int level, int encoding_mode) {
311 if (level < -1 || level > 9) {
312 Logger::Warning("compression level(%d) must be within -1..9", level);
313 return nullptr;
316 if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) {
317 Logger::Warning("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
318 return nullptr;
321 z_stream stream;
322 stream.zalloc = Z_NULL;
323 stream.zfree = Z_NULL;
324 stream.opaque = Z_NULL;
326 stream.next_in = (Bytef *)data;
327 stream.avail_in = len;
329 stream.avail_out = stream.avail_in + (stream.avail_in / PHP_ZLIB_MODIFIER) +
330 15 + 1; /* room for \0 */
331 char *s2 = (char *)malloc
332 (stream.avail_out + GZIP_HEADER_LENGTH +
333 (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0));
334 if (!s2) {
335 return nullptr;
337 /* add gzip file header */
338 s2[0] = (char)gz_magic[0];
339 s2[1] = (char)gz_magic[1];
340 s2[2] = Z_DEFLATED;
341 s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */
342 s2[9] = 0x03; // OS_CODE
344 stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]);
346 int status;
347 switch (encoding_mode) {
348 case CODING_GZIP:
349 /* windowBits is passed < 0 to suppress zlib header & trailer */
350 if ((status = deflateInit2(&stream, level, Z_DEFLATED, -MAX_WBITS,
351 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) {
352 Logger::Warning("%s", zError(status));
353 return nullptr;
355 break;
356 case CODING_DEFLATE:
357 if ((status = deflateInit(&stream, level)) != Z_OK) {
358 Logger::Warning("%s", zError(status));
359 return nullptr;
361 break;
364 status = deflate(&stream, Z_FINISH);
365 if (status != Z_STREAM_END) {
366 deflateEnd(&stream);
367 if (status == Z_OK) {
368 status = Z_BUF_ERROR;
370 } else {
371 status = deflateEnd(&stream);
374 if (status == Z_OK) {
376 int old_len = len;
377 len = stream.total_out + GZIP_HEADER_LENGTH +
378 (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0);
379 /* resize to buffer to the "right" size */
380 s2 = (char *)realloc(s2, len + 1);
381 assert(s2);
382 if (encoding_mode == CODING_GZIP) {
383 char *trailer = s2 + (stream.total_out + GZIP_HEADER_LENGTH);
384 uLong crc = crc32(0L, Z_NULL, 0);
386 crc = crc32(crc, (const Bytef *)data, old_len);
388 /* write crc & stream.total_in in LSB order */
389 trailer[0] = (char) crc & 0xFF;
390 trailer[1] = (char) (crc >> 8) & 0xFF;
391 trailer[2] = (char) (crc >> 16) & 0xFF;
392 trailer[3] = (char) (crc >> 24) & 0xFF;
393 trailer[4] = (char) stream.total_in & 0xFF;
394 trailer[5] = (char) (stream.total_in >> 8) & 0xFF;
395 trailer[6] = (char) (stream.total_in >> 16) & 0xFF;
396 trailer[7] = (char) (stream.total_in >> 24) & 0xFF;
397 trailer[8] = '\0';
398 } else {
399 s2[len] = '\0';
401 return s2;
404 free(s2);
405 Logger::Warning("%s", zError(status));
406 return nullptr;
409 char *gzdecode(const char *data, int &len) {
410 z_stream stream;
411 stream.zalloc = (alloc_func) Z_NULL;
412 stream.zfree = (free_func) Z_NULL;
414 unsigned long length;
415 int status;
416 unsigned int factor = 4, maxfactor = 16;
417 char *s1 = nullptr, *s2 = nullptr;
418 do {
419 stream.next_in = (Bytef *)data;
420 stream.avail_in = (uInt)len + 1; /* there is room for \0 */
421 if (check_header(stream) != Z_OK) {
422 Logger::Warning("gzdecode: header is in wrong format");
423 return nullptr;
426 length = len * (1 << factor++);
427 s2 = (char *)realloc(s1, length);
428 if (!s2) {
429 if (s1) free(s1);
430 return nullptr;
432 s1 = s2;
434 stream.next_out = (Bytef*)s2;
435 stream.avail_out = (uInt)length;
437 /* init with -MAX_WBITS disables the zlib internal headers */
438 status = inflateInit2(&stream, -MAX_WBITS);
439 if (status == Z_OK) {
440 status = inflate(&stream, Z_FINISH);
441 if (status != Z_STREAM_END) {
442 inflateEnd(&stream);
443 if (status == Z_OK) {
444 status = Z_BUF_ERROR;
446 } else {
447 status = inflateEnd(&stream);
450 } while (status == Z_BUF_ERROR && factor < maxfactor);
452 if (status == Z_OK) {
453 len = stream.total_out;
455 // shrink the buffer down to what we really need since this can be 16
456 // times greater than we actually need.
457 s2 = (char *)realloc(s2, len + 1);
458 assert(s2);
459 s2[len] = '\0';
460 return s2;
463 free(s2);
464 Logger::Warning("%s", zError(status));
465 return nullptr;