1 /* deflate/gzip encoding backend */
19 #include "encoding/deflate.h"
20 #include "encoding/encoding.h"
21 #include "util/memory.h"
23 /* How many bytes of compressed data to read before decompressing. */
24 #define ELINKS_DEFLATE_BUFFER_LENGTH 5000
26 struct deflate_enc_data
{
27 z_stream deflate_stream
;
29 /* The file descriptor from which we read. */
32 unsigned int last_read
:1;
33 unsigned int after_first_read
:1;
34 unsigned int after_end
:1;
36 /* A buffer for data that has been read from the file but not
37 * yet decompressed. z_stream.next_in and z_stream.avail_in
38 * refer to this buffer. */
39 unsigned char buf
[ELINKS_DEFLATE_BUFFER_LENGTH
];
43 deflate_open(int window_size
, struct stream_encoded
*stream
, int fd
)
45 /* A zero-initialized z_stream. The compiler ensures that all
46 * pointer members in it are null. (Can't do this with memset
47 * because C99 does not require all-bits-zero to be a null
49 static const z_stream null_z_stream
= {0};
52 struct deflate_enc_data
*data
= mem_alloc(sizeof(*data
));
59 /* Initialize all members of *data, except data->buf[], which
60 * will be initialized on demand by deflate_read. */
61 copy_struct(&data
->deflate_stream
, &null_z_stream
);
64 data
->after_first_read
= 0;
67 err
= inflateInit2(&data
->deflate_stream
, window_size
);
78 deflate_raw_open(struct stream_encoded
*stream
, int fd
)
80 /* raw DEFLATE with neither zlib nor gzip header */
81 return deflate_open(-MAX_WBITS
, stream
, fd
);
85 deflate_gzip_open(struct stream_encoded
*stream
, int fd
)
87 /* detect gzip header, else assume zlib header */
88 return deflate_open(MAX_WBITS
+ 32, stream
, fd
);
92 deflate_read(struct stream_encoded
*stream
, unsigned char *buf
, int len
)
94 struct deflate_enc_data
*data
= (struct deflate_enc_data
*) stream
->data
;
102 if (data
->last_read
) return 0;
104 data
->deflate_stream
.avail_out
= len
;
105 data
->deflate_stream
.next_out
= buf
;
108 if (data
->deflate_stream
.avail_in
== 0) {
109 l
= safe_read(data
->fdread
, data
->buf
,
110 ELINKS_DEFLATE_BUFFER_LENGTH
);
116 return -1; /* I/O error */
118 /* EOF. It is error: we wait for more bytes */
122 data
->deflate_stream
.next_in
= data
->buf
;
123 data
->deflate_stream
.avail_in
= l
;
126 err
= inflate(&data
->deflate_stream
, Z_SYNC_FLUSH
);
127 if (err
== Z_DATA_ERROR
&& !data
->after_first_read
128 && data
->deflate_stream
.next_out
== buf
) {
129 /* RFC 2616 requires a zlib header for
130 * "Content-Encoding: deflate", but some HTTP
131 * servers (Microsoft-IIS/6.0 at blogs.msdn.com,
132 * and reportedly Apache with mod_deflate) omit
133 * that, causing Z_DATA_ERROR. Clarification of
134 * the term "deflate" has been requested for the
135 * next version of HTTP:
136 * http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73
138 * Try to recover by telling zlib not to expect
139 * the header. If the error does not happen on
140 * the first inflate() call, then it is too late
141 * to recover because ELinks may already have
142 * discarded part of the input data.
144 * TODO: This fallback to raw DEFLATE is currently
145 * enabled for "Content-Encoding: gzip" too. It
146 * might be better to fall back to no compression
147 * at all, because Apache can send that header for
148 * uncompressed *.gz.md5 files. */
149 data
->after_first_read
= 1;
150 inflateEnd(&data
->deflate_stream
);
151 data
->deflate_stream
.avail_out
= len
;
152 data
->deflate_stream
.next_out
= buf
;
153 data
->deflate_stream
.next_in
= data
->buf
;
154 data
->deflate_stream
.avail_in
= l
;
155 err
= inflateInit2(&data
->deflate_stream
, -MAX_WBITS
);
156 if (err
== Z_OK
) goto restart
;
158 data
->after_first_read
= 1;
159 if (err
== Z_STREAM_END
) {
162 } else if (err
!= Z_OK
) {
166 } while (data
->deflate_stream
.avail_out
> 0);
168 assert(len
- data
->deflate_stream
.avail_out
== data
->deflate_stream
.next_out
- buf
);
169 return len
- data
->deflate_stream
.avail_out
;
172 static unsigned char *
173 deflate_decode_buffer(struct stream_encoded
*st
, int window_size
, unsigned char *data
, int len
, int *new_len
)
175 struct deflate_enc_data
*enc_data
= (struct deflate_enc_data
*) st
->data
;
176 z_stream
*stream
= &enc_data
->deflate_stream
;
177 unsigned char *buffer
= NULL
;
180 *new_len
= 0; /* default, left there if an error occurs */
182 if (!len
) return NULL
;
183 stream
->next_in
= data
;
184 stream
->avail_in
= len
;
185 stream
->total_out
= 0;
188 unsigned char *new_buffer
;
189 size_t size
= stream
->total_out
+ MAX_STR_LEN
;
191 new_buffer
= mem_realloc(buffer
, size
);
198 stream
->next_out
= buffer
+ stream
->total_out
;
199 stream
->avail_out
= MAX_STR_LEN
;
201 error
= inflate(stream
, Z_SYNC_FLUSH
);
202 if (error
== Z_STREAM_END
) {
205 } while (error
== Z_OK
&& stream
->avail_in
> 0);
207 if (error
== Z_STREAM_END
) {
209 enc_data
->after_end
= 1;
214 *new_len
= stream
->total_out
;
217 if (buffer
) mem_free(buffer
);
222 static unsigned char *
223 deflate_raw_decode_buffer(struct stream_encoded
*st
, unsigned char *data
, int len
, int *new_len
)
225 /* raw DEFLATE with neither zlib nor gzip header */
226 return deflate_decode_buffer(st
, -MAX_WBITS
, data
, len
, new_len
);
229 static unsigned char *
230 deflate_gzip_decode_buffer(struct stream_encoded
*st
, unsigned char *data
, int len
, int *new_len
)
232 /* detect gzip header, else assume zlib header */
233 return deflate_decode_buffer(st
, MAX_WBITS
+ 32, data
, len
, new_len
);
237 deflate_close(struct stream_encoded
*stream
)
239 struct deflate_enc_data
*data
= (struct deflate_enc_data
*) stream
->data
;
242 if (!data
->after_end
) {
243 inflateEnd(&data
->deflate_stream
);
245 if (data
->fdread
!= -1) {
253 static const unsigned char *const deflate_extensions
[] = { NULL
};
255 const struct decoding_backend deflate_decoding_backend
= {
260 deflate_raw_decode_buffer
,
264 static const unsigned char *const gzip_extensions
[] = { ".gz", ".tgz", NULL
};
266 const struct decoding_backend gzip_decoding_backend
= {
271 deflate_gzip_decode_buffer
,