1 /* Stream reading and decoding (mostly decompression) */
10 #include <sys/stat.h> /* OS/2 needs this after sys/types.h */
11 #include <sys/types.h>
13 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
21 #include "config/options.h"
22 #include "encoding/encoding.h"
23 #include "network/state.h"
24 #include "osdep/osdep.h"
25 #include "util/memory.h"
26 #include "util/string.h"
29 /*************************************************************************
30 Dummy encoding (ENCODING_NONE)
31 *************************************************************************/
33 struct dummy_enc_data
{
38 dummy_open(struct stream_encoded
*stream
, int fd
)
40 stream
->data
= mem_alloc(sizeof(struct dummy_enc_data
));
41 if (!stream
->data
) return -1;
43 ((struct dummy_enc_data
*) stream
->data
)->fd
= fd
;
49 dummy_read(struct stream_encoded
*stream
, unsigned char *data
, int len
)
51 return safe_read(((struct dummy_enc_data
*) stream
->data
)->fd
, data
, len
);
54 static unsigned char *
55 dummy_decode_buffer(struct stream_encoded
*stream
, unsigned char *data
, int len
, int *new_len
)
57 unsigned char *buffer
= memacpy(data
, len
);
59 if (!buffer
) return NULL
;
66 dummy_close(struct stream_encoded
*stream
)
68 close(((struct dummy_enc_data
*) stream
->data
)->fd
);
69 mem_free(stream
->data
);
72 static const unsigned char *const dummy_extensions
[] = { NULL
};
74 static const struct decoding_backend dummy_decoding_backend
= {
84 /* Dynamic backend area */
86 #include "encoding/brotli.h"
87 #include "encoding/bzip2.h"
88 #include "encoding/deflate.h"
89 #include "encoding/lzma.h"
91 static const struct decoding_backend
*const decoding_backends
[] = {
92 &dummy_decoding_backend
,
93 &gzip_decoding_backend
,
94 &bzip2_decoding_backend
,
95 &lzma_decoding_backend
,
96 &deflate_decoding_backend
,
97 &brotli_decoding_backend
,
101 /*************************************************************************
103 *************************************************************************/
106 /* Associates encoded stream with a fd. */
107 struct stream_encoded
*
108 open_encoded(int fd
, enum stream_encoding encoding
)
110 struct stream_encoded
*stream
;
112 stream
= mem_alloc(sizeof(*stream
));
113 if (!stream
) return NULL
;
115 stream
->encoding
= encoding
;
116 if (decoding_backends
[stream
->encoding
]->open(stream
, fd
) >= 0)
123 /* Read available data from stream and decode them. Note that when data change
124 * their size during decoding, 'len' indicates desired size of _returned_ data,
125 * not desired size of data read from stream. */
127 read_encoded(struct stream_encoded
*stream
, unsigned char *data
, int len
)
129 return decoding_backends
[stream
->encoding
]->read(stream
, data
, len
);
132 /* Decode an entire file from a buffer. This function is not suitable
133 * for parts of files. @data contains the original data, @len bytes
134 * long. The resulting decoded data chunk is *@new_len bytes long. */
136 decode_encoded_buffer(struct stream_encoded
*stream
, enum stream_encoding encoding
, unsigned char *data
, int len
,
139 return decoding_backends
[encoding
]->decode_buffer(stream
, data
, len
, new_len
);
142 /* Closes encoded stream. Note that fd associated with the stream will be
145 close_encoded(struct stream_encoded
*stream
)
147 decoding_backends
[stream
->encoding
]->close(stream
);
152 /* Return a list of extensions associated with that encoding. */
153 const unsigned char *const *listext_encoded(enum stream_encoding encoding
)
155 return decoding_backends
[encoding
]->extensions
;
159 guess_encoding(unsigned char *filename
)
161 int fname_len
= strlen(filename
);
162 unsigned char *fname_end
= filename
+ fname_len
;
165 for (enc
= 1; enc
< ENCODINGS_KNOWN
; enc
++) {
166 const unsigned char *const *ext
= decoding_backends
[enc
]->extensions
;
168 while (ext
&& *ext
) {
169 int len
= strlen(*ext
);
171 if (fname_len
>= len
&& !strcmp(fname_end
- len
, *ext
))
178 return ENCODING_NONE
;
181 const unsigned char *
182 get_encoding_name(enum stream_encoding encoding
)
184 return decoding_backends
[encoding
]->name
;
190 /* Tries to open @prefixname with each of the supported encoding extensions
192 static inline enum stream_encoding
193 try_encoding_extensions(struct string
*filename
, int *fd
)
195 int length
= filename
->length
;
198 /* No file of that name was found, try some others names. */
199 for (encoding
= 1; encoding
< ENCODINGS_KNOWN
; encoding
++) {
200 const unsigned char *const *ext
= listext_encoded(encoding
);
202 for (; ext
&& *ext
; ext
++) {
203 add_to_string(filename
, *ext
);
205 /* We try with some extensions. */
206 *fd
= open(filename
->source
, O_RDONLY
| O_NOCTTY
);
209 /* Ok, found one, use it. */
212 filename
->source
[length
] = 0;
213 filename
->length
= length
;
217 return ENCODING_NONE
;
220 /** Reads the file from @a stream in chunks of size @a readsize.
222 * @a stream should be in blocking mode. If it is in non-blocking
223 * mode, this function can return an empty string in @a page just
224 * because no more data is available yet, and the caller cannot know
225 * whether the true end of the stream has been reached.
227 * @return a connection state. S_OK if all is well. */
228 struct connection_state
229 read_file(struct stream_encoded
*stream
, int readsize
, struct string
*page
)
231 if (!init_string(page
)) return connection_state(S_OUT_OF_MEM
);
233 /* We read with granularity of stt.st_size (given as @readsize) - this
234 * does best job for uncompressed files, and doesn't hurt for
235 * compressed ones anyway - very large files usually tend to inflate
236 * fast anyway. At least I hope ;). --pasky */
237 /* Also there because of bug in Linux. Read returns -EACCES when
238 * reading 0 bytes to invalid address so ensure never to try and
239 * allocate zero number of bytes. */
240 if (!readsize
) readsize
= 4096;
242 while (realloc_string(page
, page
->length
+ readsize
)) {
243 unsigned char *string_pos
= page
->source
+ page
->length
;
244 int readlen
= read_encoded(stream
, string_pos
, readsize
);
249 /* If it is some I/O error (and errno is set) that will
250 * do. Since errno == 0 == S_WAIT and we cannot have
253 return connection_state_for_errno(errno
);
255 /* FIXME: This is indeed an internal error. If readed from a
256 * corrupted encoded file nothing or only some of the
257 * data will be read. */
258 return connection_state(S_ENCODE_ERROR
);
260 } else if (readlen
== 0) {
261 /* NUL-terminate just in case */
262 page
->source
[page
->length
] = '\0';
263 return connection_state(S_OK
);
266 page
->length
+= readlen
;
268 /* This didn't work so well as it should (I had to implement
269 * end of stream handling to bzip2 anyway), so I rather
271 if (readlen
< readsize
) {
272 /* This is much safer. It should always mean that we
273 * already read everything possible, and it permits us
274 * more elegant of handling end of file with bzip2. */
281 return connection_state(S_OUT_OF_MEM
);
285 is_stdin_pipe(struct stat
*stt
, struct string
*filename
)
287 /* On Mac OS X, /dev/stdin has type S_IFSOCK. (bug 616) */
288 return !strlcmp(filename
->source
, filename
->length
, "/dev/stdin", 10)
291 S_ISSOCK(stt
->st_mode
) ||
293 S_ISFIFO(stt
->st_mode
));
296 struct connection_state
297 read_encoded_file(struct string
*filename
, struct string
*page
)
299 struct stream_encoded
*stream
;
301 enum stream_encoding encoding
= ENCODING_NONE
;
302 int fd
= open(filename
->source
, O_RDONLY
| O_NOCTTY
);
303 struct connection_state state
= connection_state_for_errno(errno
);
305 if (fd
== -1 && get_opt_bool("protocol.file.try_encoding_extensions", NULL
)) {
306 encoding
= try_encoding_extensions(filename
, &fd
);
308 } else if (fd
!= -1) {
309 encoding
= guess_encoding(filename
->source
);
313 #ifdef HAVE_SYS_CYGWIN_H
314 /* There is no /dev/stdin on Cygwin. */
315 if (!strlcmp(filename
->source
, filename
->length
, "/dev/stdin", 10)) {
322 /* Some file was opened so let's get down to bi'ness */
325 /* Do all the necessary checks before trying to read the file.
326 * @state code is used to block further progress. */
327 if (fstat(fd
, &stt
)) {
328 state
= connection_state_for_errno(errno
);
330 } else if (!S_ISREG(stt
.st_mode
) && encoding
!= ENCODING_NONE
) {
331 /* We only want to open regular encoded files. */
332 /* Leave @state being the saved errno */
334 } else if (!S_ISREG(stt
.st_mode
) && !is_stdin_pipe(&stt
, filename
)
335 && !get_opt_bool("protocol.file.allow_special_files", NULL
)) {
336 state
= connection_state(S_FILE_TYPE
);
338 } else if (!(stream
= open_encoded(fd
, encoding
))) {
339 state
= connection_state(S_OUT_OF_MEM
);
342 int readsize
= (int) stt
.st_size
;
344 /* Check if st_size will cause overflow. */
345 /* FIXME: See bug 497 for info about support for big files. */
346 if (readsize
!= stt
.st_size
|| readsize
< 0) {
348 state
= connection_state_for_errno(EFBIG
);
350 state
= connection_state(S_FILE_ERROR
);
354 state
= read_file(stream
, stt
.st_size
, page
);
356 close_encoded(stream
);