Merge branch 'master' of git://repo.or.cz/unleashed into uadmin
[unleashed.git] / bin / grep / file.c
blobf55a7712847303f77c2fdf85e6d8946d71f66441
1 /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
2 /* $FreeBSD$ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <sys/param.h>
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <stddef.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <wchar.h>
46 #include <wctype.h>
47 #include <zlib.h>
49 #ifndef WITHOUT_LZMA
50 #include <lzma.h>
51 #endif
53 #ifndef WITHOUT_BZIP2
54 #include <bzlib.h>
55 #endif
57 #include "grep.h"
58 #include "freebsd-compat.h"
60 #define MAXBUFSIZ (32 * 1024)
61 #define LNBUFBUMP 80
63 static gzFile gzbufdesc;
64 #ifndef WITHOUT_LZMA
65 static lzma_stream lstrm = LZMA_STREAM_INIT;
66 static lzma_action laction;
67 static uint8_t lin_buf[MAXBUFSIZ];
68 #endif
69 #ifndef WITHOUT_BZIP2
70 static BZFILE* bzbufdesc;
71 #endif
73 static unsigned char *buffer;
74 static unsigned char *bufpos;
75 static size_t bufrem;
76 static size_t fsiz;
78 static unsigned char *lnbuf;
79 static size_t lnbuflen;
81 static inline int
82 grep_refill(struct file *f)
84 ssize_t nr;
86 if (filebehave == FILE_MMAP)
87 return (0);
89 bufpos = buffer;
90 bufrem = 0;
92 if (filebehave == FILE_GZIP) {
93 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
94 #ifndef WITHOUT_BZIP2
95 } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
96 int bzerr;
98 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
99 switch (bzerr) {
100 case BZ_OK:
101 case BZ_STREAM_END:
102 /* No problem, nr will be okay */
103 break;
104 case BZ_DATA_ERROR_MAGIC:
106 * As opposed to gzread(), which simply returns the
107 * plain file data, if it is not in the correct
108 * compressed format, BZ2_bzRead() instead aborts.
110 * So, just restart at the beginning of the file again,
111 * and use plain reads from now on.
113 BZ2_bzReadClose(&bzerr, bzbufdesc);
114 bzbufdesc = NULL;
115 if (lseek(f->fd, 0, SEEK_SET) == -1)
116 return (-1);
117 nr = read(f->fd, buffer, MAXBUFSIZ);
118 break;
119 default:
120 /* Make sure we exit with an error */
121 nr = -1;
123 #endif
124 #ifndef WITHOUT_LZMA
125 } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
126 lzma_ret ret;
127 lstrm.next_out = buffer;
129 do {
130 if (lstrm.avail_in == 0) {
131 lstrm.next_in = lin_buf;
132 nr = read(f->fd, lin_buf, MAXBUFSIZ);
134 if (nr < 0)
135 return (-1);
136 else if (nr == 0)
137 laction = LZMA_FINISH;
139 lstrm.avail_in = nr;
142 ret = lzma_code(&lstrm, laction);
144 if (ret != LZMA_OK && ret != LZMA_STREAM_END)
145 return (-1);
147 if (lstrm.avail_out == 0 || ret == LZMA_STREAM_END) {
148 bufrem = MAXBUFSIZ - lstrm.avail_out;
149 lstrm.next_out = buffer;
150 lstrm.avail_out = MAXBUFSIZ;
152 } while (bufrem == 0 && ret != LZMA_STREAM_END);
154 return (0);
155 #endif /* WIHTOUT_LZMA */
156 } else
157 nr = read(f->fd, buffer, MAXBUFSIZ);
159 if (nr < 0)
160 return (-1);
162 bufrem = nr;
163 return (0);
166 static inline int
167 grep_lnbufgrow(size_t newlen)
170 if (lnbuflen < newlen) {
171 lnbuf = grep_realloc(lnbuf, newlen);
172 lnbuflen = newlen;
175 return (0);
178 char *
179 grep_fgetln(struct file *f, size_t *lenp)
181 unsigned char *p;
182 char *ret;
183 size_t len;
184 size_t off;
185 ptrdiff_t diff;
187 /* Fill the buffer, if necessary */
188 if (bufrem == 0 && grep_refill(f) != 0)
189 goto error;
191 if (bufrem == 0) {
192 /* Return zero length to indicate EOF */
193 *lenp = 0;
194 return (bufpos);
197 /* Look for a newline in the remaining part of the buffer */
198 if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) {
199 ++p; /* advance over newline */
200 ret = bufpos;
201 len = p - bufpos;
202 bufrem -= len;
203 bufpos = p;
204 *lenp = len;
205 return (ret);
208 /* We have to copy the current buffered data to the line buffer */
209 for (len = bufrem, off = 0; ; len += bufrem) {
210 /* Make sure there is room for more data */
211 if (grep_lnbufgrow(len + LNBUFBUMP))
212 goto error;
213 memcpy(lnbuf + off, bufpos, len - off);
214 /* With FILE_MMAP, this is EOF; there's no more to refill */
215 if (filebehave == FILE_MMAP) {
216 bufrem -= len;
217 break;
219 off = len;
220 /* Fetch more to try and find EOL/EOF */
221 if (grep_refill(f) != 0)
222 goto error;
223 if (bufrem == 0)
224 /* EOF: return partial line */
225 break;
226 if ((p = memchr(bufpos, fileeol, bufrem)) == NULL)
227 continue;
228 /* got it: finish up the line (like code above) */
229 ++p;
230 diff = p - bufpos;
231 len += diff;
232 if (grep_lnbufgrow(len))
233 goto error;
234 memcpy(lnbuf + off, bufpos, diff);
235 bufrem -= diff;
236 bufpos = p;
237 break;
239 *lenp = len;
240 return (lnbuf);
242 error:
243 *lenp = 0;
244 return (NULL);
248 * Opens a file for processing.
250 struct file *
251 grep_open(const char *path)
253 struct file *f;
255 f = grep_malloc(sizeof *f);
256 memset(f, 0, sizeof *f);
257 if (path == NULL) {
258 /* Processing stdin implies --line-buffered. */
259 lbflag = true;
260 f->fd = STDIN_FILENO;
261 } else if ((f->fd = open(path, O_RDONLY)) == -1)
262 goto error1;
264 if (filebehave == FILE_MMAP) {
265 struct stat st;
267 if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
268 (!S_ISREG(st.st_mode)))
269 filebehave = FILE_STDIO;
270 else {
271 int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
272 #ifdef MAP_PREFAULT_READ
273 flags |= MAP_PREFAULT_READ;
274 #endif
275 fsiz = st.st_size;
276 buffer = mmap(NULL, fsiz, PROT_READ, flags,
277 f->fd, (off_t)0);
278 if (buffer == MAP_FAILED)
279 filebehave = FILE_STDIO;
280 else {
281 bufrem = st.st_size;
282 bufpos = buffer;
283 madvise(buffer, st.st_size, MADV_SEQUENTIAL);
288 if ((buffer == NULL) || (buffer == MAP_FAILED))
289 buffer = grep_malloc(MAXBUFSIZ);
291 if (filebehave == FILE_GZIP &&
292 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
293 goto error2;
295 #ifndef WITHOUT_BZIP2
296 if (filebehave == FILE_BZIP &&
297 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
298 goto error2;
299 #endif
300 #ifndef WITHOUT_LZMA
301 else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
302 lzma_ret ret;
304 ret = (filebehave == FILE_XZ) ?
305 lzma_stream_decoder(&lstrm, UINT64_MAX,
306 LZMA_CONCATENATED) :
307 lzma_alone_decoder(&lstrm, UINT64_MAX);
309 if (ret != LZMA_OK)
310 goto error2;
312 lstrm.avail_in = 0;
313 lstrm.avail_out = MAXBUFSIZ;
314 laction = LZMA_RUN;
316 #endif
318 /* Fill read buffer, also catches errors early */
319 if (bufrem == 0 && grep_refill(f) != 0)
320 goto error2;
322 /* Check for binary stuff, if necessary */
323 if (binbehave != BINFILE_TEXT && fileeol != '\0' &&
324 memchr(bufpos, '\0', bufrem) != NULL)
325 f->binary = true;
327 return (f);
329 error2:
330 close(f->fd);
331 error1:
332 free(f);
333 return (NULL);
337 * Closes a file.
339 void
340 grep_close(struct file *f)
343 close(f->fd);
345 /* Reset read buffer and line buffer */
346 if (filebehave == FILE_MMAP) {
347 munmap(buffer, fsiz);
348 buffer = NULL;
350 bufpos = buffer;
351 bufrem = 0;
353 free(lnbuf);
354 lnbuf = NULL;
355 lnbuflen = 0;