1 /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/param.h>
36 #include <sys/types.h>
58 #include "freebsd-compat.h"
60 #define MAXBUFSIZ (32 * 1024)
63 static gzFile gzbufdesc
;
65 static lzma_stream lstrm
= LZMA_STREAM_INIT
;
66 static lzma_action laction
;
67 static uint8_t lin_buf
[MAXBUFSIZ
];
70 static BZFILE
* bzbufdesc
;
73 static unsigned char *buffer
;
74 static unsigned char *bufpos
;
78 static unsigned char *lnbuf
;
79 static size_t lnbuflen
;
82 grep_refill(struct file
*f
)
86 if (filebehave
== FILE_MMAP
)
92 if (filebehave
== FILE_GZIP
) {
93 nr
= gzread(gzbufdesc
, buffer
, MAXBUFSIZ
);
95 } else if (filebehave
== FILE_BZIP
&& bzbufdesc
!= NULL
) {
98 nr
= BZ2_bzRead(&bzerr
, bzbufdesc
, buffer
, MAXBUFSIZ
);
102 /* No problem, nr will be okay */
104 case BZ_DATA_ERROR_MAGIC
:
106 * As opposed to gzread(), which simply returns the
107 * plain file data, if it is not in the correct
108 * compressed format, BZ2_bzRead() instead aborts.
110 * So, just restart at the beginning of the file again,
111 * and use plain reads from now on.
113 BZ2_bzReadClose(&bzerr
, bzbufdesc
);
115 if (lseek(f
->fd
, 0, SEEK_SET
) == -1)
117 nr
= read(f
->fd
, buffer
, MAXBUFSIZ
);
120 /* Make sure we exit with an error */
125 } else if ((filebehave
== FILE_XZ
) || (filebehave
== FILE_LZMA
)) {
127 lstrm
.next_out
= buffer
;
130 if (lstrm
.avail_in
== 0) {
131 lstrm
.next_in
= lin_buf
;
132 nr
= read(f
->fd
, lin_buf
, MAXBUFSIZ
);
137 laction
= LZMA_FINISH
;
142 ret
= lzma_code(&lstrm
, laction
);
144 if (ret
!= LZMA_OK
&& ret
!= LZMA_STREAM_END
)
147 if (lstrm
.avail_out
== 0 || ret
== LZMA_STREAM_END
) {
148 bufrem
= MAXBUFSIZ
- lstrm
.avail_out
;
149 lstrm
.next_out
= buffer
;
150 lstrm
.avail_out
= MAXBUFSIZ
;
152 } while (bufrem
== 0 && ret
!= LZMA_STREAM_END
);
155 #endif /* WIHTOUT_LZMA */
157 nr
= read(f
->fd
, buffer
, MAXBUFSIZ
);
167 grep_lnbufgrow(size_t newlen
)
170 if (lnbuflen
< newlen
) {
171 lnbuf
= grep_realloc(lnbuf
, newlen
);
179 grep_fgetln(struct file
*f
, size_t *lenp
)
187 /* Fill the buffer, if necessary */
188 if (bufrem
== 0 && grep_refill(f
) != 0)
192 /* Return zero length to indicate EOF */
197 /* Look for a newline in the remaining part of the buffer */
198 if ((p
= memchr(bufpos
, fileeol
, bufrem
)) != NULL
) {
199 ++p
; /* advance over newline */
208 /* We have to copy the current buffered data to the line buffer */
209 for (len
= bufrem
, off
= 0; ; len
+= bufrem
) {
210 /* Make sure there is room for more data */
211 if (grep_lnbufgrow(len
+ LNBUFBUMP
))
213 memcpy(lnbuf
+ off
, bufpos
, len
- off
);
214 /* With FILE_MMAP, this is EOF; there's no more to refill */
215 if (filebehave
== FILE_MMAP
) {
220 /* Fetch more to try and find EOL/EOF */
221 if (grep_refill(f
) != 0)
224 /* EOF: return partial line */
226 if ((p
= memchr(bufpos
, fileeol
, bufrem
)) == NULL
)
228 /* got it: finish up the line (like code above) */
232 if (grep_lnbufgrow(len
))
234 memcpy(lnbuf
+ off
, bufpos
, diff
);
248 * Opens a file for processing.
251 grep_open(const char *path
)
255 f
= grep_malloc(sizeof *f
);
256 memset(f
, 0, sizeof *f
);
258 /* Processing stdin implies --line-buffered. */
260 f
->fd
= STDIN_FILENO
;
261 } else if ((f
->fd
= open(path
, O_RDONLY
)) == -1)
264 if (filebehave
== FILE_MMAP
) {
267 if ((fstat(f
->fd
, &st
) == -1) || (st
.st_size
> OFF_MAX
) ||
268 (!S_ISREG(st
.st_mode
)))
269 filebehave
= FILE_STDIO
;
271 int flags
= MAP_PRIVATE
| MAP_NOCORE
| MAP_NOSYNC
;
272 #ifdef MAP_PREFAULT_READ
273 flags
|= MAP_PREFAULT_READ
;
276 buffer
= mmap(NULL
, fsiz
, PROT_READ
, flags
,
278 if (buffer
== MAP_FAILED
)
279 filebehave
= FILE_STDIO
;
283 madvise(buffer
, st
.st_size
, MADV_SEQUENTIAL
);
288 if ((buffer
== NULL
) || (buffer
== MAP_FAILED
))
289 buffer
= grep_malloc(MAXBUFSIZ
);
291 if (filebehave
== FILE_GZIP
&&
292 (gzbufdesc
= gzdopen(f
->fd
, "r")) == NULL
)
295 #ifndef WITHOUT_BZIP2
296 if (filebehave
== FILE_BZIP
&&
297 (bzbufdesc
= BZ2_bzdopen(f
->fd
, "r")) == NULL
)
301 else if ((filebehave
== FILE_XZ
) || (filebehave
== FILE_LZMA
)) {
304 ret
= (filebehave
== FILE_XZ
) ?
305 lzma_stream_decoder(&lstrm
, UINT64_MAX
,
307 lzma_alone_decoder(&lstrm
, UINT64_MAX
);
313 lstrm
.avail_out
= MAXBUFSIZ
;
318 /* Fill read buffer, also catches errors early */
319 if (bufrem
== 0 && grep_refill(f
) != 0)
322 /* Check for binary stuff, if necessary */
323 if (binbehave
!= BINFILE_TEXT
&& fileeol
!= '\0' &&
324 memchr(bufpos
, '\0', bufrem
) != NULL
)
340 grep_close(struct file
*f
)
345 /* Reset read buffer and line buffer */
346 if (filebehave
== FILE_MMAP
) {
347 munmap(buffer
, fsiz
);