BZ_bzRead2 is based on BZ_bzRead from the bzlib library.
[elinks.git] / src / encoding / bzip2.c
blobeb6f90cb43bff475e4723bd788cb7ee887974d24
1 /* Bzip2 encoding (ENCODING_BZIP2) backend */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdio.h>
8 #include <string.h>
9 #ifdef HAVE_UNISTD_H
10 #include <unistd.h>
11 #endif
12 #ifdef HAVE_BZLIB_H
13 #include <bzlib.h> /* Everything needs this after stdio.h */
14 #endif
16 #include "elinks.h"
18 #include "encoding/bzip2.h"
19 #include "encoding/encoding.h"
20 #include "util/memory.h"
22 struct bz2_enc_data {
23 FILE *file;
24 BZFILE *bzfile;
25 int last_read; /* If err after last bzRead() was BZ_STREAM_END.. */
28 struct bzFile {
29 FILE *handle;
30 char buf[BZ_MAX_UNUSED];
31 int bufN;
32 unsigned char writing;
33 bz_stream strm;
34 int lastErr;
35 unsigned char initialisedOk;
38 /* TODO: When it'll be official, use bzdopen() from Yoshioka Tsuneo. --pasky */
40 static int
41 bzip2_open(struct stream_encoded *stream, int fd)
43 struct bz2_enc_data *data = mem_alloc(sizeof(*data));
44 int err;
46 if (!data) {
47 return -1;
49 data->last_read = 0;
51 data->file = fdopen(fd, "rb");
53 data->bzfile = BZ2_bzReadOpen(&err, data->file, 0, 0, NULL, 0);
54 if (!data->bzfile) {
55 mem_free(data);
56 return -1;
59 stream->data = data;
61 return 0;
64 static unsigned char
65 myfeof(FILE *f)
67 int c = fgetc (f);
69 if (c == EOF) return 1;
70 ungetc(c, f);
71 return 0;
74 #define BZ_SETERR(eee) \
75 { \
76 if (bzerror != NULL) *bzerror = eee; \
77 if (bzf != NULL) bzf->lastErr = eee; \
80 static int
81 BZ2_bzRead2(int *bzerror, BZFILE *b, void *buf, int len)
83 int n, ret, pi = 0;
84 struct bzFile *bzf = (struct bzFile *)b;
86 BZ_SETERR(BZ_OK);
88 if (bzf == NULL || buf == NULL || len < 0) {
89 BZ_SETERR(BZ_PARAM_ERROR);
90 return 0;
93 if (bzf->writing) {
94 BZ_SETERR(BZ_SEQUENCE_ERROR);
95 return 0;
98 if (len == 0) {
99 BZ_SETERR(BZ_OK);
100 return 0;
103 bzf->strm.avail_out = len;
104 bzf->strm.next_out = buf;
106 while (1) {
107 if (ferror(bzf->handle)) {
108 BZ_SETERR(BZ_IO_ERROR);
109 return 0;
112 if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
113 n = fread(bzf->buf, 1, BZ_MAX_UNUSED, bzf->handle);
114 if (ferror(bzf->handle)) {
115 if (n < 0) {
116 BZ_SETERR(BZ_IO_ERROR);
117 return 0;
118 } else {
119 BZ_SETERR(BZ_OK);
120 pi = 1;
123 bzf->bufN = n;
124 bzf->strm.avail_in = bzf->bufN;
125 bzf->strm.next_in = bzf->buf;
128 ret = BZ2_bzDecompress ( &(bzf->strm) );
129 if (ret != BZ_OK && ret != BZ_STREAM_END) {
130 BZ_SETERR(ret);
131 return 0;
134 if (ret == BZ_OK && myfeof(bzf->handle) &&
135 bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) {
136 if (!pi) {
137 BZ_SETERR(BZ_UNEXPECTED_EOF);
138 return 0;
139 } else {
140 return len - bzf->strm.avail_out;
144 if (ret == BZ_STREAM_END) {
145 BZ_SETERR(BZ_STREAM_END);
146 return len - bzf->strm.avail_out;
149 if (bzf->strm.avail_out == 0) {
150 BZ_SETERR(BZ_OK);
151 return len;
154 return 0; /*not reached*/
156 #undef BZ_STRERR
158 static int
159 bzip2_read(struct stream_encoded *stream, unsigned char *buf, int len)
161 struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data;
162 int err = 0;
163 struct bzFile *bzf = (struct bzFile *)data->bzfile;
165 if (data->last_read)
166 return 0;
168 clearerr(bzf->handle);
169 len = BZ2_bzRead2(&err, data->bzfile, buf, len);
171 if (err == BZ_STREAM_END)
172 data->last_read = 1;
173 else if (err)
174 return -1;
176 return len;
179 static unsigned char *
180 bzip2_decode(struct stream_encoded *stream, unsigned char *data, int len,
181 int *new_len)
183 *new_len = len;
184 return data;
187 #ifdef CONFIG_SMALL
188 #define BZIP2_SMALL 1
189 #else
190 #define BZIP2_SMALL 0
191 #endif
193 static unsigned char *
194 bzip2_decode_buffer(unsigned char *data, int len, int *new_len)
196 bz_stream stream;
197 unsigned char *buffer = NULL;
198 int error;
200 memset(&stream, 0, sizeof(bz_stream));
201 stream.next_in = data;
202 stream.avail_in = len;
204 if (BZ2_bzDecompressInit(&stream, 0, BZIP2_SMALL) != BZ_OK)
205 return NULL;
207 do {
208 unsigned char *new_buffer;
209 size_t size = stream.total_out_lo32 + MAX_STR_LEN;
211 /* FIXME: support for 64 bit. real size is
213 * (total_in_hi32 << * 32) + total_in_lo32
215 * --jonas */
216 assertm(!stream.total_out_hi32, "64 bzip2 decoding not supported");
218 new_buffer = mem_realloc(buffer, size);
219 if (!new_buffer) {
220 error = BZ_MEM_ERROR;
221 break;
224 buffer = new_buffer;
225 stream.next_out = buffer + stream.total_out_lo32;
226 stream.avail_out = MAX_STR_LEN;
228 error = BZ2_bzDecompress(&stream);
229 if (error == BZ_STREAM_END) {
230 *new_len = stream.total_out_lo32;
231 error = BZ_OK;
232 break;
235 /* Apparently BZ_STREAM_END is not forced when the end of input
236 * is reached. At least lindi- reported that it caused a
237 * reproducable infinite loop. Maybe it has to do with decoding
238 * an incomplete file. */
239 } while (error == BZ_OK && stream.avail_in > 0);
241 BZ2_bzDecompressEnd(&stream);
243 if (error != BZ_OK) {
244 if (buffer) mem_free(buffer);
245 *new_len = 0;
246 return NULL;
249 return buffer;
252 static void
253 bzip2_close(struct stream_encoded *stream)
255 struct bz2_enc_data *data = (struct bz2_enc_data *) stream->data;
256 int err;
258 BZ2_bzReadClose(&err, data->bzfile);
259 fclose(data->file);
260 mem_free(data);
263 static unsigned char *bzip2_extensions[] = { ".bz2", ".tbz", NULL };
265 struct decoding_backend bzip2_decoding_backend = {
266 "bzip2",
267 bzip2_extensions,
268 bzip2_open,
269 bzip2_read,
270 bzip2_decode,
271 bzip2_decode_buffer,
272 bzip2_close,