Optional Two-phase heap tracing
[hiphop-php.git] / hphp / util / compression.cpp
blob2bb210d1c2f0c7a748b3fbc77cbe70189d58278c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/util/compression.h"
20 #include "hphp/util/exception.h"
21 #include "hphp/util/logger.h"
23 #define PHP_ZLIB_MODIFIER 1000
24 #define GZIP_HEADER_LENGTH 10
25 #define GZIP_FOOTER_LENGTH 8
27 namespace HPHP {
29 static const int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
30 ///////////////////////////////////////////////////////////////////////////////
31 // This check_header() function is copied from zlib 1.2.3 and re-factored to
32 // work with in-memory buffers (than file streams).
34 /* zlib.h -- interface of the 'zlib' general purpose compression library
35 version 1.2.3, July 18th, 2005
37 Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
39 This software is provided 'as-is', without any express or implied
40 warranty. In no event will the authors be held liable for any damages
41 arising from the use of this software.
43 Permission is granted to anyone to use this software for any purpose,
44 including commercial applications, and to alter it and redistribute it
45 freely, subject to the following restrictions:
47 1. The origin of this software must not be misrepresented; you must not
48 claim that you wrote the original software. If you use this software
49 in a product, an acknowledgment in the product documentation would be
50 appreciated but is not required.
51 2. Altered source versions must be plainly marked as such, and must not be
52 misrepresented as being the original software.
53 3. This notice may not be removed or altered from any source distribution.
55 Jean-loup Gailly Mark Adler
56 jloup@gzip.org madler@alumni.caltech.edu
59 The data format used by the zlib library is described by RFCs (Request for
60 Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
61 (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
64 /* gzip flag byte */
65 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
66 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
67 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
68 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */
69 #define COMMENT 0x10 /* bit 4 set: file comment present */
70 #define RESERVED 0xE0 /* bits 5..7: reserved */
72 /* ===========================================================================
73 Read a byte from a gz_stream; update next_in and avail_in. Return EOF
74 for end of file.
75 IN assertion: the stream s has been successfully opened for reading.
77 static int get_byte(z_stream &stream) {
78 if (stream.avail_in == 0) {
79 return EOF;
81 stream.avail_in--;
82 return *(stream.next_in)++;
85 /* ===========================================================================
86 Check the gzip header of a gz_stream opened for reading. Set the stream
87 mode to transparent if the gzip magic header is not present; set s->err
88 to Z_DATA_ERROR if the magic header is present but the rest of the header
89 is incorrect.
90 IN assertion: the stream s has already been created successfully;
91 s->stream.avail_in is zero for the first time, but may be non-zero
92 for concatenated .gz files.
94 static int check_header(z_stream &stream) {
95 int method; /* method byte */
96 int flags; /* flags byte */
97 uInt len;
98 int c;
100 /* Assure two bytes in the buffer so we can peek ahead -- handle case
101 where first byte of header is at the end of the buffer after the last
102 gzip segment */
103 len = stream.avail_in;
104 if (len <= 2) {
105 return Z_DATA_ERROR;
108 /* Peek ahead to check the gzip magic header */
109 if (stream.next_in[0] != gz_magic[0] || stream.next_in[1] != gz_magic[1]) {
110 return Z_DATA_ERROR;
112 stream.avail_in -= 2;
113 stream.next_in += 2;
115 /* Check the rest of the gzip header */
116 method = get_byte(stream);
117 flags = get_byte(stream);
118 if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
119 return Z_DATA_ERROR;
122 /* Discard time, xflags and OS code: */
123 for (len = 0; len < 6; len++) (void)get_byte(stream);
125 if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
126 len = (uInt)get_byte(stream);
127 len += ((uInt)get_byte(stream))<<8;
128 /* len is garbage if EOF but the loop below will quit anyway */
129 while (len-- != 0 && get_byte(stream) != EOF) ;
131 if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
132 while ((c = get_byte(stream)) != 0 && c != EOF) ;
134 if ((flags & COMMENT) != 0) { /* skip the .gz file comment */
135 while ((c = get_byte(stream)) != 0 && c != EOF) ;
137 if ((flags & HEAD_CRC) != 0) { /* skip the header crc */
138 for (len = 0; len < 2; len++) (void)get_byte(stream);
140 return stream.avail_in == 0 ? Z_DATA_ERROR : Z_OK;
143 ///////////////////////////////////////////////////////////////////////////////
145 bool is_compressible_file(const char *filename) {
146 static const char *ext[] = {
147 "gif", "png", "jpeg", "jpg", "tiff", "swf", "zip", "gz", "bz2", "cab",
148 "bmp", "xcf", "mp3", "wav", "rsrc", "ico", "jar", "exe", "dll", "so",
150 const char *dot = nullptr;
151 for (const char *p = filename; *p; p++) {
152 if (*p == '.') dot = p;
154 if (dot) {
155 dot++;
156 for (unsigned int i = 0; i < sizeof(ext)/sizeof(ext[0]); i++) {
157 if (strcmp(dot, ext[i]) == 0) {
158 return false;
162 return true;
165 ///////////////////////////////////////////////////////////////////////////////
166 // StreamCompressor
168 StreamCompressor::StreamCompressor(int level, int encoding_mode, bool header)
169 : m_encoding(encoding_mode), m_header(header),
170 m_ended(false) {
171 if (level < -1 || level > 9) {
172 throw Exception("compression level(%d) must be within -1..9", level);
174 if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) {
175 throw Exception("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
178 m_stream.zalloc = Z_NULL;
179 m_stream.zfree = Z_NULL;
180 m_stream.opaque = Z_NULL;
181 m_stream.total_in = 0;
182 m_stream.next_in = Z_NULL;
183 m_stream.avail_in = 0;
184 m_stream.avail_out = 0;
185 m_stream.next_out = Z_NULL;
187 m_crc = crc32(0L, Z_NULL, 0);
189 int status;
190 switch (encoding_mode) {
191 case CODING_GZIP:
192 /* windowBits is passed < 0 to suppress zlib header & trailer */
193 if ((status = deflateInit2(&m_stream, level, Z_DEFLATED, -MAX_WBITS,
194 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) {
195 throw Exception("%s", zError(status));
197 break;
198 case CODING_DEFLATE:
199 if ((status = deflateInit(&m_stream, level)) != Z_OK) {
200 throw Exception("%s", zError(status));
202 break;
206 StreamCompressor::~StreamCompressor() {
207 if (!m_ended) {
208 deflateEnd(&m_stream);
212 char *StreamCompressor::compress(const char *data, int &len, bool trailer) {
213 // middle chunks should never be zero size
214 assert(len || trailer);
216 m_stream.next_in = (Bytef *)data;
217 m_stream.avail_in = len;
218 m_stream.total_out = 0;
220 m_stream.avail_out = m_stream.avail_in +
221 (m_stream.avail_in / PHP_ZLIB_MODIFIER) + 15 + 1; /* room for \0 */
222 char *s2 = (char *)malloc
223 (m_stream.avail_out + GZIP_HEADER_LENGTH +
224 ((trailer && m_encoding == CODING_GZIP) ? GZIP_FOOTER_LENGTH : 0));
226 /* add gzip file header */
227 bool header = m_header;
228 if (header) {
229 s2[0] = gz_magic[0];
230 s2[1] = gz_magic[1];
231 s2[2] = Z_DEFLATED;
232 s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */
233 s2[9] = 0x03; // OS_CODE
234 m_stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]);
235 m_header = false; // only the 1st chunnk got it
236 } else {
237 m_stream.next_out = (Bytef*)s2;
240 int status = deflate(&m_stream, trailer ? Z_FINISH : Z_SYNC_FLUSH);
241 if (status == Z_BUF_ERROR || status == Z_STREAM_END) {
242 status = deflateEnd(&m_stream);
243 m_ended = true;
245 if (status == Z_OK) {
246 if (len) {
247 m_crc = crc32(m_crc, (const Bytef *)data, len);
249 int new_len = m_stream.total_out + (header ? GZIP_HEADER_LENGTH : 0);
250 len = new_len;
251 if (trailer && m_encoding == CODING_GZIP) {
252 len += GZIP_FOOTER_LENGTH;
253 char *strailer = s2 + new_len;
255 /* write crc & stream.total_in in LSB order */
256 strailer[0] = (char) m_crc & 0xFF;
257 strailer[1] = (char) (m_crc >> 8) & 0xFF;
258 strailer[2] = (char) (m_crc >> 16) & 0xFF;
259 strailer[3] = (char) (m_crc >> 24) & 0xFF;
260 strailer[4] = (char) m_stream.total_in & 0xFF;
261 strailer[5] = (char) (m_stream.total_in >> 8) & 0xFF;
262 strailer[6] = (char) (m_stream.total_in >> 16) & 0xFF;
263 strailer[7] = (char) (m_stream.total_in >> 24) & 0xFF;
264 strailer[8] = '\0';
265 } else {
266 s2[len] = '\0';
268 return s2;
271 free(s2);
272 Logger::Error("%s", zError(status));
273 return nullptr;
276 ///////////////////////////////////////////////////////////////////////////////
278 char *gzencode(const char *data, int &len, int level, int encoding_mode) {
279 if (level < -1 || level > 9) {
280 Logger::Warning("compression level(%d) must be within -1..9", level);
281 return nullptr;
284 if (encoding_mode != CODING_GZIP && encoding_mode != CODING_DEFLATE) {
285 Logger::Warning("encoding mode must be FORCE_GZIP or FORCE_DEFLATE");
286 return nullptr;
289 z_stream stream;
290 stream.zalloc = Z_NULL;
291 stream.zfree = Z_NULL;
292 stream.opaque = Z_NULL;
294 stream.next_in = (Bytef *)data;
295 stream.avail_in = len;
297 stream.avail_out = stream.avail_in + (stream.avail_in / PHP_ZLIB_MODIFIER) +
298 15 + 1; /* room for \0 */
299 char *s2 = (char *)malloc
300 (stream.avail_out + GZIP_HEADER_LENGTH +
301 (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0));
302 if (!s2) {
303 return nullptr;
305 /* add gzip file header */
306 s2[0] = gz_magic[0];
307 s2[1] = gz_magic[1];
308 s2[2] = Z_DEFLATED;
309 s2[3] = s2[4] = s2[5] = s2[6] = s2[7] = s2[8] = 0; /* time set to 0 */
310 s2[9] = 0x03; // OS_CODE
312 stream.next_out = (Bytef*)&(s2[GZIP_HEADER_LENGTH]);
314 int status;
315 switch (encoding_mode) {
316 case CODING_GZIP:
317 /* windowBits is passed < 0 to suppress zlib header & trailer */
318 if ((status = deflateInit2(&stream, level, Z_DEFLATED, -MAX_WBITS,
319 MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY)) != Z_OK) {
320 Logger::Warning("%s", zError(status));
321 return nullptr;
323 break;
324 case CODING_DEFLATE:
325 if ((status = deflateInit(&stream, level)) != Z_OK) {
326 Logger::Warning("%s", zError(status));
327 return nullptr;
329 break;
332 status = deflate(&stream, Z_FINISH);
333 if (status != Z_STREAM_END) {
334 deflateEnd(&stream);
335 if (status == Z_OK) {
336 status = Z_BUF_ERROR;
338 } else {
339 status = deflateEnd(&stream);
342 if (status == Z_OK) {
344 int old_len = len;
345 len = stream.total_out + GZIP_HEADER_LENGTH +
346 (encoding_mode == CODING_GZIP ? GZIP_FOOTER_LENGTH : 0);
347 /* resize to buffer to the "right" size */
348 s2 = (char *)realloc(s2, len + 1);
349 assert(s2);
350 if (encoding_mode == CODING_GZIP) {
351 char *trailer = s2 + (stream.total_out + GZIP_HEADER_LENGTH);
352 uLong crc = crc32(0L, Z_NULL, 0);
354 crc = crc32(crc, (const Bytef *)data, old_len);
356 /* write crc & stream.total_in in LSB order */
357 trailer[0] = (char) crc & 0xFF;
358 trailer[1] = (char) (crc >> 8) & 0xFF;
359 trailer[2] = (char) (crc >> 16) & 0xFF;
360 trailer[3] = (char) (crc >> 24) & 0xFF;
361 trailer[4] = (char) stream.total_in & 0xFF;
362 trailer[5] = (char) (stream.total_in >> 8) & 0xFF;
363 trailer[6] = (char) (stream.total_in >> 16) & 0xFF;
364 trailer[7] = (char) (stream.total_in >> 24) & 0xFF;
365 trailer[8] = '\0';
366 } else {
367 s2[len] = '\0';
369 return s2;
372 free(s2);
373 Logger::Warning("%s", zError(status));
374 return nullptr;
377 char *gzdecode(const char *data, int &len) {
378 z_stream stream;
379 stream.zalloc = (alloc_func) Z_NULL;
380 stream.zfree = (free_func) Z_NULL;
382 unsigned long length;
383 int status;
384 unsigned int factor = 4, maxfactor = 16;
385 char *s1 = nullptr, *s2 = nullptr;
386 do {
387 stream.next_in = (Bytef *)data;
388 stream.avail_in = (uInt)len + 1; /* there is room for \0 */
389 if (check_header(stream) != Z_OK) {
390 Logger::Warning("gzdecode: header is in wrong format");
391 return nullptr;
394 length = len * (1 << factor++);
395 s2 = (char *)realloc(s1, length);
396 if (!s2) {
397 if (s1) free(s1);
398 return nullptr;
400 s1 = s2;
402 stream.next_out = (Bytef*)s2;
403 stream.avail_out = (uInt)length;
405 /* init with -MAX_WBITS disables the zlib internal headers */
406 status = inflateInit2(&stream, -MAX_WBITS);
407 if (status == Z_OK) {
408 status = inflate(&stream, Z_FINISH);
409 if (status != Z_STREAM_END) {
410 inflateEnd(&stream);
411 if (status == Z_OK) {
412 status = Z_BUF_ERROR;
414 } else {
415 status = inflateEnd(&stream);
418 } while (status == Z_BUF_ERROR && factor < maxfactor);
420 if (status == Z_OK) {
421 len = stream.total_out;
423 // shrink the buffer down to what we really need since this can be 16
424 // times greater than we actually need.
425 s2 = (char *)realloc(s2, len + 1);
426 assert(s2);
427 s2[len] = '\0';
428 return s2;
431 free(s2);
432 Logger::Warning("%s", zError(status));
433 return nullptr;