drm/i915: Update to Linux 4.6
[dragonfly.git] / contrib / grep / src / dosbuf.c
blob0e8f6f7b04aee3a6ee9b22388c1d775a439676f0
1 /* dosbuf.c
2 Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
19 /* Messy DOS-specific code for correctly treating binary, Unix text
20 and DOS text files.
22 This has several aspects:
24 * Guessing the file type (unless the user tells us);
25 * Stripping CR characters from DOS text files (otherwise regex
26 functions won't work correctly);
27 * Reporting correct byte count with -b for any kind of file.
31 #include <config.h>
33 typedef enum {
34 UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
35 } File_type;
37 struct dos_map {
38 off_t pos; /* position in buffer passed to matcher */
39 off_t add; /* how much to add when reporting char position */
42 static int dos_report_unix_offset = 0;
44 static File_type dos_file_type = UNKNOWN;
45 static File_type dos_use_file_type = UNKNOWN;
46 static off_t dos_stripped_crs = 0;
47 static struct dos_map *dos_pos_map;
48 static int dos_pos_map_size = 0;
49 static int dos_pos_map_used = 0;
50 static int inp_map_idx = 0, out_map_idx = 1;
52 /* Set default DOS file type to binary. */
53 static void
54 dos_binary (void)
56 if (O_BINARY)
57 dos_use_file_type = DOS_BINARY;
60 /* Tell DOS routines to report Unix offset. */
61 static void
62 dos_unix_byte_offsets (void)
64 if (O_BINARY)
65 dos_report_unix_offset = 1;
68 /* Guess DOS file type by looking at its contents. */
69 static File_type
70 guess_type (char *buf, size_t buflen)
72 int crlf_seen = 0;
73 char *bp = buf;
75 while (buflen--)
77 /* Treat a file as binary if it has a NUL character. */
78 if (!*bp)
79 return DOS_BINARY;
81 /* CR before LF means DOS text file (unless we later see
82 binary characters). */
83 else if (*bp == '\r' && buflen && bp[1] == '\n')
84 crlf_seen = 1;
86 bp++;
89 return crlf_seen ? DOS_TEXT : UNIX_TEXT;
92 /* Convert external DOS file representation to internal.
93 Return the count of bytes left in the buffer.
94 Build table to map character positions when reporting byte counts. */
95 static size_t
96 undossify_input (char *buf, size_t buflen)
98 if (! O_BINARY)
99 return buflen;
101 size_t bytes_left = 0;
103 if (totalcc == 0)
105 /* New file: forget everything we knew about character
106 position mapping table and file type. */
107 inp_map_idx = 0;
108 out_map_idx = 1;
109 dos_pos_map_used = 0;
110 dos_stripped_crs = 0;
111 dos_file_type = dos_use_file_type;
114 /* Guess if this file is binary, unless we already know that. */
115 if (dos_file_type == UNKNOWN)
116 dos_file_type = guess_type(buf, buflen);
118 /* If this file is to be treated as DOS Text, strip the CR characters
119 and maybe build the table for character position mapping on output. */
120 if (dos_file_type == DOS_TEXT)
122 char *destp = buf;
124 while (buflen--)
126 if (*buf != '\r')
128 *destp++ = *buf++;
129 bytes_left++;
131 else
133 buf++;
134 if (out_byte && !dos_report_unix_offset)
136 dos_stripped_crs++;
137 while (buflen && *buf == '\r')
139 dos_stripped_crs++;
140 buflen--;
141 buf++;
143 if (inp_map_idx >= dos_pos_map_size - 1)
145 dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000;
146 dos_pos_map = xrealloc(dos_pos_map,
147 dos_pos_map_size *
148 sizeof(struct dos_map));
151 if (!inp_map_idx)
153 /* Add sentinel entry. */
154 dos_pos_map[inp_map_idx].pos = 0;
155 dos_pos_map[inp_map_idx++].add = 0;
157 /* Initialize first real entry. */
158 dos_pos_map[inp_map_idx].add = 0;
161 /* Put the new entry. If the stripped CR characters
162 precede a Newline (the usual case), pretend that
163 they were found *after* the Newline. This makes
164 displayed byte offsets more reasonable in some
165 cases, and fits better the intuitive notion that
166 the line ends *before* the CR, not *after* it. */
167 inp_map_idx++;
168 dos_pos_map[inp_map_idx-1].pos =
169 (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc;
170 dos_pos_map[inp_map_idx].add = dos_stripped_crs;
171 dos_pos_map_used = inp_map_idx;
173 /* The following will be updated on the next pass. */
174 dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1;
179 return bytes_left;
182 return buflen;
185 /* Convert internal byte count into external. */
186 static off_t
187 dossified_pos (off_t byteno)
189 if (! O_BINARY)
190 return byteno;
192 off_t pos_lo;
193 off_t pos_hi;
195 if (dos_file_type != DOS_TEXT || dos_report_unix_offset)
196 return byteno;
198 /* Optimization: usually the file will be scanned sequentially.
199 So in most cases, this byte position will be found in the
200 table near the previous one, as recorded in 'out_map_idx'. */
201 pos_lo = dos_pos_map[out_map_idx-1].pos;
202 pos_hi = dos_pos_map[out_map_idx].pos;
204 /* If the initial guess failed, search up or down, as
205 appropriate, beginning with the previous place. */
206 if (byteno >= pos_hi)
208 out_map_idx++;
209 while (out_map_idx < dos_pos_map_used
210 && byteno >= dos_pos_map[out_map_idx].pos)
211 out_map_idx++;
214 else if (byteno < pos_lo)
216 out_map_idx--;
217 while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos)
218 out_map_idx--;
221 return byteno + dos_pos_map[out_map_idx].add;