file_case.{h,cpp}: wether->whether; 2017
[s-roff.git] / src / lib-roff / file_case.cpp
blob231c55a18faab52b51f86e5869913a664624a68e
1 /*@ file_case: input file encapsulator
3 * Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include "config.h"
19 #include "lib.h"
21 #include <assert.h>
22 #include <errno.h>
23 #include <stdio.h>
25 #ifdef HAVE_ZLIB
26 # include <zlib.h>
27 #endif
29 #include "errarg.h"
30 #include "error.h"
31 #include "posix.h"
32 #include "nonposix.h"
34 #include "file_case.h"
36 #undef getc
37 #undef a_getc
38 #ifdef HAVE_DECL_GETC_UNLOCKED
39 # define a_getc getc_unlocked
40 #else
41 # define a_getc fgetc
42 #endif
44 // Support decompression XXX configure should say `no popen() - no unpacking'
45 #ifdef POPEN_MISSING
46 # undef HAVE_UNPACK
47 #endif
49 // (Enclosed by HAVE_UNPACK) Directly support decompression library layer?
50 // XXX We yet only support a zlib a_LAYER, which is why we directly address zlib
51 // XXX functions instead of furtherly abstracting into a struct iolayer or sth.
52 // XXX If we would, that can only have read_buf() and close() and we should
53 // XXX deal with buffer handling entirely ourselfs, in which case even the
54 // XXX popen(3) code path could be enwrapped into struct iolayer; i.e., then
55 // XXX the entire public read interface could internally be driven by iolayer
56 #ifndef HAVE_ZLIB
57 # define HAVE_ZLIB 0
58 #endif
59 #if HAVE_ZLIB
60 # define a_LAYER
61 #endif
63 struct a_args{
64 FILE *a_fp;
65 void *a_layer;
66 char const *a_path;
67 size_t a_path_len;
68 char const *a_mode; // Mode for fopen(3), if used
69 uint32_t a_flags;
70 int32_t a_errno;
73 #ifdef HAVE_UNPACK
74 struct a_zproc{
75 uint8_t zp_popen;
76 uint8_t zp_ext_len; // Extension including `.' (<period>)
77 uint8_t zp_cmd_len;
78 uint8_t zp_layer; // Uses I/O layer (zlib)
79 char zp_ext[5];
80 char zp_cmd[15];
83 static a_zproc const a_zprocs[] = {
84 # undef a_X
85 # define a_X(L,C,E) {true, sizeof(E) -1, sizeof(C) -1, L, E, C}
86 # ifdef HAVE_UNPACK_BZ2
87 a_X(0, "bzip2 -cdf", ".bz2"),
88 # endif
89 # ifdef HAVE_UNPACK_GZ
90 a_X(HAVE_ZLIB, "gzip -cdf", ".gz"),
91 # endif
92 # ifdef HAVE_UNPACK_XZ
93 a_X(0, "xz -cdf", ".xz")
94 # endif
95 # undef a_X
97 #endif // HAVE_UNPACK
99 #ifdef HAVE_UNPACK
100 // Check whether path was explicitly specified with a packer extension.
101 // This returns a ternary: false only if we knew the extension, applied the
102 // zproc and that failed to perform, true otherwise (ap->a_fp is 2nd indicator)
103 static bool a_is_ext(a_args *ap);
105 // Plain file didn't exist, iterate over the supported packer extensions
106 // and see if a matching file exists instead; NULL if not / on error.
107 // Note that ap->a_errno is ENOENT on entry and only overwritten if we run
108 // a zproc and that fails XXX ENOENT is blindly used in codebase, but not ISO C
109 static bool a_try_all_ext(a_args *ap);
111 // Create a FILE* according to zp, return NULL on error
112 static a_args *a__run_zproc(a_args *ap, a_zproc const *zp);
114 // Callee needs seek()ing or STD I/O, unpack into temporary file, NULL on error
115 static a_args *a__unpack(a_args *ap);
116 #endif // HAVE_UNPACK
118 #ifdef HAVE_UNPACK
119 static bool
120 a_is_ext(a_args *ap){
121 for(a_zproc const *zp = a_zprocs; zp < a_zprocs + NELEM(a_zprocs); ++zp){
122 size_t el = zp->zp_ext_len;
123 if(ap->a_path_len <= el)
124 continue;
125 if(memcmp(ap->a_path + ap->a_path_len - el, zp->zp_ext, el))
126 continue;
128 ap = a__run_zproc(ap, zp);
129 break;
131 return (ap != NULL);
134 static bool
135 a_try_all_ext(a_args *ap){
136 for(a_zproc const *zp = a_zprocs; zp < a_zprocs + NELEM(a_zprocs); ++zp){
137 char *np = new char[ap->a_path_len + zp->zp_ext_len +1];
138 memcpy(np, ap->a_path, ap->a_path_len);
140 struct ::stat sb;
141 memcpy(np + ap->a_path_len, zp->zp_ext, zp->zp_ext_len +1);
142 if(stat(np, &sb)){
143 a_delete np;
144 continue;
147 // That's our zproc, let it make the deal
148 char const *pb_save = ap->a_path;
149 size_t pl_save = ap->a_path_len;
150 ap->a_path = np;
151 ap->a_path_len = pl_save + zp->zp_ext_len;
152 if((ap = a__run_zproc(ap, zp)) != NULL){
153 ap->a_path = pb_save;
154 ap->a_path_len = pl_save;
156 a_delete np;
157 goto jleave;
159 ap = NULL;
160 jleave:
161 return (ap != NULL);
164 static a_args *
165 a__run_zproc(a_args *ap, a_zproc const *zp){
166 # ifdef a_LAYER
167 if(zp->zp_layer){
168 if((ap->a_layer = gzopen(ap->a_path, "rb")) == NULL){
169 ap->a_errno = errno;
170 ap = NULL;
171 }else if(ap->a_flags &
172 (file_case::mux_need_seek | file_case::mux_need_stdio))
173 ap = a__unpack(ap);
174 }else{
175 # endif
176 char *np = new char[zp->zp_cmd_len + 1 + ap->a_path_len +1];
177 size_t l;
178 memcpy(np, zp->zp_cmd, l = zp->zp_cmd_len);
179 np[l++] = ' ';
180 memcpy(np + l, ap->a_path, ap->a_path_len +1);
182 if((ap->a_fp = popen(np, "r")) == NULL){
183 ap->a_errno = errno;
184 ap = NULL;
185 }else if(ap->a_flags & file_case::mux_need_seek)
186 ap = a__unpack(ap);
187 else
188 ap->a_flags |= file_case::fc_pipe | file_case::fc_have_stdio;
190 a_delete np;
191 # ifdef a_LAYER
193 # endif
195 return ap;
198 static a_args *
199 a__unpack(a_args *ap){
200 size_t const buf_len = (BUFSIZ + 0 > 1 << 15) ? BUFSIZ : 1 << 15;
201 uint8_t *buf = new uint8_t[buf_len];
203 // xtmpfile uses binary mode and fatal()s on error
204 FILE *decomp = xtmpfile(NULL, "groff_unpack"), *decomp_save = decomp;
205 for(;;){
206 size_t oc;
208 # ifdef a_LAYER
209 if(ap->a_layer != NULL){
210 int i = gzread((gzFile)ap->a_layer, buf, buf_len);
211 if(i == -1){
212 ap->a_errno = errno;
213 decomp = NULL;
214 break;
215 }else if(i == 0)
216 break;
217 oc = (size_t)i;
218 }else
219 # endif
220 if((oc = fread(buf, sizeof *buf, buf_len, ap->a_fp)) == 0){
221 if(!feof(ap->a_fp)){
222 ap->a_errno = errno;
223 decomp = NULL;
225 break;
228 if(decomp != NULL){
229 for(uint8_t *target = buf; oc > 0;){
230 size_t i = fwrite(target, sizeof *buf, oc, decomp);
231 if(i == 0)
232 break;
233 oc -= i;
234 target += i;
236 if(oc > 0){
237 ap->a_errno = errno;
238 decomp = NULL;
243 # ifdef a_LAYER
244 if(ap->a_layer != NULL){
245 if(gzclose((gzFile)ap->a_layer) != Z_OK)
246 error("decompressor gzclose(3) failed");
247 ap->a_layer = NULL;
248 }else
249 # endif
250 if(pclose(ap->a_fp) != 0)
251 error("decompressor pipe pclose(3) didn't exit cleanly");
253 if(decomp != NULL){
254 ap->a_flags |= file_case::fc_have_stdio;
255 rewind(ap->a_fp = decomp);
256 }else{
257 fclose(decomp_save);
258 ap->a_fp = NULL;
259 ap = NULL;
262 a_delete buf;
263 return ap;
265 #endif // HAVE_UNPACK
267 bool
268 file_case::close(void){
269 assert((_file != NULL && _layer == NULL) ||
270 (_file == NULL && _layer != NULL));
272 if(!(_flags & fc_const_path))
273 a_delete _path;
275 bool rv;
276 if(_flags & fc_dont_close)
277 rv = true;
278 #ifdef a_LAYER
279 else if(_layer != NULL)
280 rv = (gzclose((gzFile)_layer) == Z_OK);
281 #endif
282 #ifdef HAVE_UNPACK
283 else if(_flags & fc_pipe)
284 rv = (pclose(_file) == 0);
285 #endif
286 else
287 rv = (fclose(_file) == 0);
289 #ifndef NDEBUG
290 _path = NULL;
291 _file = NULL;
292 _layer = NULL;
293 _flags = fc_none;
294 #endif
295 return rv;
298 bool
299 file_case::is_eof(void) const{
300 bool rv;
301 #ifdef a_LAYER
302 if(_layer != NULL)
303 rv = (gzeof((gzFile)_layer) != 0);
304 else
305 #endif
306 rv = (feof(_file) != 0);
307 return rv;
311 file_case::get_c(void){
312 int rv;
313 #ifdef a_LAYER
314 if(_layer != NULL)
315 rv = gzgetc((gzFile)_layer);
316 else
317 #endif
318 rv = a_getc(_file);
319 return rv;
323 file_case::unget_c(int c){
324 int rv;
325 #ifdef a_LAYER
326 if(_layer != NULL)
327 rv = gzungetc(c, (gzFile)_layer);
328 else
329 #endif
330 rv = ungetc(c, _file);
331 return rv;
334 char *
335 file_case::get_line(char *buf, size_t buf_size){
336 #ifdef a_LAYER
337 if(_layer != NULL)
338 buf = gzgets((gzFile)_layer, buf, (int)buf_size);
339 else
340 #endif
341 buf = fgets(buf, (int)buf_size, _file);
342 return buf;
345 size_t
346 file_case::get_buf(void *buf, size_t buf_size){
347 size_t rv;
348 #ifdef a_LAYER
349 if(_layer != NULL){
350 int i = gzread((gzFile)_layer, buf, (unsigned int)buf_size);
351 rv = (i <= 0) ? 0 : (size_t)i;
352 }else
353 #endif
354 rv = fread(buf, 1, buf_size, _file);
355 return rv;
358 void
359 file_case::rewind(void){
360 #ifdef a_LAYER
361 if(_layer != NULL)
362 gzrewind((gzFile)_layer);
363 else
364 #endif
365 ::rewind(_file);
369 file_case::seek(long offset, seek_whence whence){
370 int x = (whence == seek_set ? SEEK_SET :
371 (whence == seek_cur ? SEEK_CUR : SEEK_END));
372 #ifdef a_LAYER
373 if(_layer != NULL)
374 x = gzseek((gzFile)_layer, (z_off_t)offset, x);
375 else
376 #endif
377 x = fseek(_file, offset, x);
378 return x;
381 /*static*/ file_case *
382 file_case::muxer(char const *path, uint32_t flags){
383 enum {tmpbit = 1<<(_mux_freebit+0)};
385 assert(!(flags & (fc_dont_close | fc_pipe)));
386 assert(!(flags & (fc_const_path | fc_take_path)) ||
387 !(flags & fc_const_path) != !(flags & fc_take_path));
388 assert(!(flags & (mux_unpack | mux_no_unpack)) ||
389 !(flags & mux_unpack) != !(flags & mux_no_unpack));
391 if(path == NULL || (path[0] == '-' && path[1] == '\0')){
392 path = "-";
393 flags &= ~fc_take_path;
394 flags |= fc_const_path | tmpbit;
395 }else if(!(flags & (fc_const_path | fc_take_path))){
396 path = strsave(path);
397 flags |= fc_take_path;
399 if(!(flags & (mux_unpack | mux_no_unpack)))
400 flags |= _mux_unpack_default;
402 file_case *fcp;
403 a_args a;
404 a.a_fp = NULL;
405 a.a_layer = NULL;
406 a.a_path_len = strlen(a.a_path = path);
407 a.a_mode = (flags & mux_need_binary) ? "rb" : "r";
408 a.a_flags = flags;
409 a.a_errno = 0;
411 // Shorthand: support "-" to mean stdin
412 if(flags & tmpbit){
413 clearerr(stdin);
414 if(flags & mux_need_binary)
415 SET_BINARY(fileno(stdin));
416 a.a_fp = stdin;
417 a.a_flags |= fc_dont_close | fc_const_path | fc_have_stdio;
418 goto jnew;
421 // If we support unpacking then check whether the path already includes
422 // a packer's extension, i.e., explicitly. Anyway unpack then, despite flags
423 #ifdef HAVE_UNPACK
424 if(!a_is_ext(&a)){
425 assert(a.a_fp == NULL && a.a_layer == NULL);
426 goto jerror;
428 if(a.a_fp != NULL || a.a_layer != NULL)
429 goto jnew;
430 #endif
432 // Try a plain open
433 errno = 0;
434 if((a.a_fp = fopen(a.a_path, a.a_mode)) != NULL){
435 a.a_flags |= fc_have_stdio;
436 jnew:
437 assert((a.a_fp != NULL && a.a_layer == NULL) ||
438 (a.a_fp == NULL && a.a_layer != NULL));
439 fcp = new file_case(a.a_fp, path, a.a_flags & fc_mask); // XXX real path?
440 fcp->_layer = a.a_layer;
441 goto jleave;
443 a.a_errno = errno;
445 // Then auto-expand the given path if so desired
446 #ifdef HAVE_UNPACK
447 if(a.a_errno == ENOENT && (a.a_flags & mux_unpack) && a_try_all_ext(&a))
448 goto jnew;
450 jerror:
451 #endif
452 if(!(a.a_flags & fc_const_path))
453 a_delete a.a_path;
454 errno = a.a_errno;
455 fcp = NULL;
456 jleave:
457 return fcp;
460 // s-it2-mode