Experimental brotli encoding support.
[elinks.git] / src / encoding / encoding.c
blobd072da7fdded6ce8aeb186f8142ce973bb7c9bef
1 /* Stream reading and decoding (mostly decompression) */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <string.h>
10 #include <sys/stat.h> /* OS/2 needs this after sys/types.h */
11 #include <sys/types.h>
12 #ifdef HAVE_FCNTL_H
13 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
14 #endif
15 #ifdef HAVE_UNISTD_H
16 #include <unistd.h>
17 #endif
19 #include "elinks.h"
21 #include "config/options.h"
22 #include "encoding/encoding.h"
23 #include "network/state.h"
24 #include "osdep/osdep.h"
25 #include "util/memory.h"
26 #include "util/string.h"
29 /*************************************************************************
30 Dummy encoding (ENCODING_NONE)
31 *************************************************************************/
33 struct dummy_enc_data {
34 int fd;
37 static int
38 dummy_open(struct stream_encoded *stream, int fd)
40 stream->data = mem_alloc(sizeof(struct dummy_enc_data));
41 if (!stream->data) return -1;
43 ((struct dummy_enc_data *) stream->data)->fd = fd;
45 return 0;
48 static int
49 dummy_read(struct stream_encoded *stream, unsigned char *data, int len)
51 return safe_read(((struct dummy_enc_data *) stream->data)->fd, data, len);
54 static unsigned char *
55 dummy_decode_buffer(struct stream_encoded *stream, unsigned char *data, int len, int *new_len)
57 unsigned char *buffer = memacpy(data, len);
59 if (!buffer) return NULL;
61 *new_len = len;
62 return buffer;
65 static void
66 dummy_close(struct stream_encoded *stream)
68 close(((struct dummy_enc_data *) stream->data)->fd);
69 mem_free(stream->data);
72 static const unsigned char *const dummy_extensions[] = { NULL };
74 static const struct decoding_backend dummy_decoding_backend = {
75 "none",
76 dummy_extensions,
77 dummy_open,
78 dummy_read,
79 dummy_decode_buffer,
80 dummy_close,
84 /* Dynamic backend area */
86 #include "encoding/brotli.h"
87 #include "encoding/bzip2.h"
88 #include "encoding/deflate.h"
89 #include "encoding/lzma.h"
91 static const struct decoding_backend *const decoding_backends[] = {
92 &dummy_decoding_backend,
93 &gzip_decoding_backend,
94 &bzip2_decoding_backend,
95 &lzma_decoding_backend,
96 &deflate_decoding_backend,
97 &brotli_decoding_backend,
101 /*************************************************************************
102 Public functions
103 *************************************************************************/
106 /* Associates encoded stream with a fd. */
107 struct stream_encoded *
108 open_encoded(int fd, enum stream_encoding encoding)
110 struct stream_encoded *stream;
112 stream = mem_alloc(sizeof(*stream));
113 if (!stream) return NULL;
115 stream->encoding = encoding;
116 if (decoding_backends[stream->encoding]->open(stream, fd) >= 0)
117 return stream;
119 mem_free(stream);
120 return NULL;
123 /* Read available data from stream and decode them. Note that when data change
124 * their size during decoding, 'len' indicates desired size of _returned_ data,
125 * not desired size of data read from stream. */
127 read_encoded(struct stream_encoded *stream, unsigned char *data, int len)
129 return decoding_backends[stream->encoding]->read(stream, data, len);
132 /* Decode an entire file from a buffer. This function is not suitable
133 * for parts of files. @data contains the original data, @len bytes
134 * long. The resulting decoded data chunk is *@new_len bytes long. */
135 unsigned char *
136 decode_encoded_buffer(struct stream_encoded *stream, enum stream_encoding encoding, unsigned char *data, int len,
137 int *new_len)
139 return decoding_backends[encoding]->decode_buffer(stream, data, len, new_len);
142 /* Closes encoded stream. Note that fd associated with the stream will be
143 * closed here. */
144 void
145 close_encoded(struct stream_encoded *stream)
147 decoding_backends[stream->encoding]->close(stream);
148 mem_free(stream);
152 /* Return a list of extensions associated with that encoding. */
153 const unsigned char *const *listext_encoded(enum stream_encoding encoding)
155 return decoding_backends[encoding]->extensions;
158 enum stream_encoding
159 guess_encoding(unsigned char *filename)
161 int fname_len = strlen(filename);
162 unsigned char *fname_end = filename + fname_len;
163 int enc;
165 for (enc = 1; enc < ENCODINGS_KNOWN; enc++) {
166 const unsigned char *const *ext = decoding_backends[enc]->extensions;
168 while (ext && *ext) {
169 int len = strlen(*ext);
171 if (fname_len >= len && !strcmp(fname_end - len, *ext))
172 return enc;
174 ext++;
178 return ENCODING_NONE;
181 const unsigned char *
182 get_encoding_name(enum stream_encoding encoding)
184 return decoding_backends[encoding]->name;
188 /* File reading */
190 /* Tries to open @prefixname with each of the supported encoding extensions
191 * appended. */
192 static inline enum stream_encoding
193 try_encoding_extensions(struct string *filename, int *fd)
195 int length = filename->length;
196 int encoding;
198 /* No file of that name was found, try some others names. */
199 for (encoding = 1; encoding < ENCODINGS_KNOWN; encoding++) {
200 const unsigned char *const *ext = listext_encoded(encoding);
202 for (; ext && *ext; ext++) {
203 add_to_string(filename, *ext);
205 /* We try with some extensions. */
206 *fd = open(filename->source, O_RDONLY | O_NOCTTY);
208 if (*fd >= 0)
209 /* Ok, found one, use it. */
210 return encoding;
212 filename->source[length] = 0;
213 filename->length = length;
217 return ENCODING_NONE;
220 /** Reads the file from @a stream in chunks of size @a readsize.
222 * @a stream should be in blocking mode. If it is in non-blocking
223 * mode, this function can return an empty string in @a page just
224 * because no more data is available yet, and the caller cannot know
225 * whether the true end of the stream has been reached.
227 * @return a connection state. S_OK if all is well. */
228 struct connection_state
229 read_file(struct stream_encoded *stream, int readsize, struct string *page)
231 if (!init_string(page)) return connection_state(S_OUT_OF_MEM);
233 /* We read with granularity of stt.st_size (given as @readsize) - this
234 * does best job for uncompressed files, and doesn't hurt for
235 * compressed ones anyway - very large files usually tend to inflate
236 * fast anyway. At least I hope ;). --pasky */
237 /* Also there because of bug in Linux. Read returns -EACCES when
238 * reading 0 bytes to invalid address so ensure never to try and
239 * allocate zero number of bytes. */
240 if (!readsize) readsize = 4096;
242 while (realloc_string(page, page->length + readsize)) {
243 unsigned char *string_pos = page->source + page->length;
244 int readlen = read_encoded(stream, string_pos, readsize);
246 if (readlen < 0) {
247 done_string(page);
249 /* If it is some I/O error (and errno is set) that will
250 * do. Since errno == 0 == S_WAIT and we cannot have
251 * that. */
252 if (errno)
253 return connection_state_for_errno(errno);
255 /* FIXME: This is indeed an internal error. If readed from a
256 * corrupted encoded file nothing or only some of the
257 * data will be read. */
258 return connection_state(S_ENCODE_ERROR);
260 } else if (readlen == 0) {
261 /* NUL-terminate just in case */
262 page->source[page->length] = '\0';
263 return connection_state(S_OK);
266 page->length += readlen;
267 #if 0
268 /* This didn't work so well as it should (I had to implement
269 * end of stream handling to bzip2 anyway), so I rather
270 * disabled this. */
271 if (readlen < readsize) {
272 /* This is much safer. It should always mean that we
273 * already read everything possible, and it permits us
274 * more elegant of handling end of file with bzip2. */
275 break;
277 #endif
280 done_string(page);
281 return connection_state(S_OUT_OF_MEM);
284 static inline int
285 is_stdin_pipe(struct stat *stt, struct string *filename)
287 /* On Mac OS X, /dev/stdin has type S_IFSOCK. (bug 616) */
288 return !strlcmp(filename->source, filename->length, "/dev/stdin", 10)
289 && (
290 #ifdef S_ISSOCK
291 S_ISSOCK(stt->st_mode) ||
292 #endif
293 S_ISFIFO(stt->st_mode));
296 struct connection_state
297 read_encoded_file(struct string *filename, struct string *page)
299 struct stream_encoded *stream;
300 struct stat stt;
301 enum stream_encoding encoding = ENCODING_NONE;
302 int fd = open(filename->source, O_RDONLY | O_NOCTTY);
303 struct connection_state state = connection_state_for_errno(errno);
305 if (fd == -1 && get_opt_bool("protocol.file.try_encoding_extensions", NULL)) {
306 encoding = try_encoding_extensions(filename, &fd);
308 } else if (fd != -1) {
309 encoding = guess_encoding(filename->source);
312 if (fd == -1) {
313 #ifdef HAVE_SYS_CYGWIN_H
314 /* There is no /dev/stdin on Cygwin. */
315 if (!strlcmp(filename->source, filename->length, "/dev/stdin", 10)) {
316 fd = STDIN_FILENO;
317 } else
318 #endif
319 return state;
322 /* Some file was opened so let's get down to bi'ness */
323 set_bin(fd);
325 /* Do all the necessary checks before trying to read the file.
326 * @state code is used to block further progress. */
327 if (fstat(fd, &stt)) {
328 state = connection_state_for_errno(errno);
330 } else if (!S_ISREG(stt.st_mode) && encoding != ENCODING_NONE) {
331 /* We only want to open regular encoded files. */
332 /* Leave @state being the saved errno */
334 } else if (!S_ISREG(stt.st_mode) && !is_stdin_pipe(&stt, filename)
335 && !get_opt_bool("protocol.file.allow_special_files", NULL)) {
336 state = connection_state(S_FILE_TYPE);
338 } else if (!(stream = open_encoded(fd, encoding))) {
339 state = connection_state(S_OUT_OF_MEM);
341 } else {
342 int readsize = (int) stt.st_size;
344 /* Check if st_size will cause overflow. */
345 /* FIXME: See bug 497 for info about support for big files. */
346 if (readsize != stt.st_size || readsize < 0) {
347 #ifdef EFBIG
348 state = connection_state_for_errno(EFBIG);
349 #else
350 state = connection_state(S_FILE_ERROR);
351 #endif
353 } else {
354 state = read_file(stream, stt.st_size, page);
356 close_encoded(stream);
359 close(fd);
360 return state;