Add support for the chgat(3) family. It is a useful extension from
[netbsd-mini2440.git] / usr.bin / sort / fields.c
blobf1a3cbad4567f26db3c5a961c3e78825ccf7f91e
1 /* $NetBSD: fields.c,v 1.19 2008/04/28 20:24:15 martin Exp $ */
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
64 /* Subroutines to generate sort keys. */
66 #include "sort.h"
68 #ifndef lint
69 __RCSID("$NetBSD: fields.c,v 1.19 2008/04/28 20:24:15 martin Exp $");
70 __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
71 #endif /* not lint */
73 #define SKIP_BLANKS(ptr) { \
74 if (BLANK & d_mask[*(ptr)]) \
75 while (BLANK & d_mask[*(++(ptr))]); \
78 #define NEXTCOL(pos) { \
79 if (!SEP_FLAG) \
80 while (BLANK & l_d_mask[*(++pos)]); \
81 while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
84 static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
85 static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
87 #define DECIMAL '.'
88 #define OFFSET 128
90 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
91 u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
92 u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
93 u_char fnum[NBINS], rnum[NBINS];
96 * constructs sort key with leading recheader, followed by the key,
97 * followed by the original line.
99 length_t
100 enterkey(keybuf, line, size, fieldtable)
101 RECHEADER *keybuf; /* pointer to start of key */
102 DBT *line;
103 int size;
104 struct field fieldtable[];
106 int i;
107 u_char *l_d_mask;
108 u_char *lineend, *pos;
109 u_char *endkey, *keypos;
110 struct coldesc *clpos;
111 int col = 1;
112 struct field *ftpos;
113 l_d_mask = d_mask;
114 pos = (u_char *) line->data - 1;
115 lineend = (u_char *) line->data + line->size-1;
116 /* don't include rec_delimiter */
118 for (i = 0; i < ncols; i++) {
119 clpos = clist + i;
120 for (; (col < clpos->num) && (pos < lineend); col++) {
121 NEXTCOL(pos);
123 if (pos >= lineend)
124 break;
125 clpos->start = SEP_FLAG ? pos + 1 : pos;
126 NEXTCOL(pos);
127 clpos->end = pos;
128 col++;
129 if (pos >= lineend) {
130 clpos->end = lineend;
131 i++;
132 break;
135 for (; i <= ncols; i++)
136 clist[i].start = clist[i].end = lineend;
137 if (clist[0].start < (u_char *) line->data)
138 clist[0].start++;
140 keypos = keybuf->data;
141 endkey = (u_char *) keybuf + size - line->size;
142 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
143 if ((keypos = enterfield(keypos, endkey, ftpos,
144 fieldtable->flags)) == NULL)
145 return (1);
147 keybuf->offset = keypos - keybuf->data;
148 keybuf->length = keybuf->offset + line->size;
149 if (keybuf->length + sizeof(TRECHEADER) > (length_t)size) {
150 /* line too long for buffer */
151 return (1);
155 * Make [s]radixsort() only sort by relevant part of key if:
156 * 1. we want to choose unique items by relevant field[s]
157 * 2. we want stable sort and so the items should be sorted only by
158 * the relevant field[s]
160 if (UNIQUE || (stable_sort && keybuf->offset < line->size))
161 keypos[-1] = REC_D;
163 memcpy(keybuf->data + keybuf->offset, line->data, line->size);
164 return (0);
168 * constructs a field (as defined by -k) within a key
170 static u_char *
171 enterfield(tablepos, endkey, cur_fld, gflags)
172 struct field *cur_fld;
173 u_char *tablepos, *endkey;
174 int gflags;
176 u_char *start, *end, *lineend, *mask, *lweight;
177 struct column icol, tcol;
178 u_int flags;
179 u_int Rflag;
181 icol = cur_fld->icol;
182 tcol = cur_fld->tcol;
183 flags = cur_fld->flags;
184 start = icol.p->start;
185 lineend = clist[ncols].end;
186 if (flags & BI)
187 SKIP_BLANKS(start);
188 start += icol.indent;
189 start = min(start, lineend);
191 if (!tcol.num)
192 end = lineend;
193 else {
194 if (tcol.indent) {
195 end = tcol.p->start;
196 if (flags & BT)
197 SKIP_BLANKS(end);
198 end += tcol.indent;
199 end = min(end, lineend);
200 } else
201 end = tcol.p->end;
204 if (flags & N) {
205 Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
206 return number(tablepos, endkey, start, end, Rflag);
209 mask = cur_fld->mask;
210 lweight = cur_fld->weights;
211 for (; start < end; start++)
212 if (mask[*start]) {
213 if (*start <= 1) {
214 if (tablepos+2 >= endkey)
215 return (NULL);
216 *tablepos++ = lweight[1];
217 *tablepos++ = lweight[*start ? 2 : 1];
218 } else {
219 if (tablepos+1 >= endkey)
220 return (NULL);
221 *tablepos++ = lweight[*start];
224 *tablepos++ = lweight[0];
225 return (tablepos == endkey ? NULL : tablepos);
228 /* Uses the first bin to assign sign, expsign, 0, and the first
229 * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
230 * When sorting in forward order:
231 * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
232 * else use (0-99)->(2-102).
233 * If the exponent is >=61, use another byte for each additional 253
234 * in the exponent. Cutoff is at 567.
235 * To avoid confusing the exponent and the mantissa, use a field delimiter
236 * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
237 * only time a field delimiter can come in that position.
238 * Reverse order is done analagously.
241 static u_char *
242 number(pos, bufend, line, lineend, Rflag)
243 u_char *line, *pos, *bufend, *lineend;
244 int Rflag;
246 int or_sign, parity = 0;
247 int expincr = 1, exponent = -1;
248 int bite, expsign = 1, sign = 1, zeroskip = 0;
249 u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS;
250 u_char *nweights;
252 if (Rflag)
253 nweights = rnum;
254 else
255 nweights = fnum;
256 if (pos > bufend - 8)
257 return (NULL);
259 * or_sign sets the sort direction:
260 * (-r: +/-)(sign: +/-)(expsign: +/-)
262 or_sign = sign ^ expsign ^ Rflag;
263 SKIP_BLANKS(line);
264 if (*line == '-') { /* set the sign */
265 or_sign ^= 1;
266 sign = 0;
267 line++;
269 /* eat initial zeroes */
270 for (; *line == '0' && line < lineend; line++)
271 zeroskip = 1;
272 /* calculate exponents < 0 */
273 if (*line == DECIMAL) {
274 exponent = 1;
275 while (*++line == '0' && line < lineend)
276 exponent++;
277 expincr = 0;
278 expsign = 0;
280 /* next character better be a digit */
281 if (*line < '1' || *line > '9' || line >= lineend) {
282 /* only exit if we didn't skip any zero number */
283 if (!zeroskip) {
284 *pos++ = nweights[127];
285 return (pos);
288 if (expincr) {
289 for (tline = line-1; *++tline >= '0' &&
290 *tline <= '9' && tline < lineend;)
291 exponent++;
293 if (exponent > 567) {
294 *pos++ = nweights[sign ? (expsign ? 254 : 128)
295 : (expsign ? 0 : 126)];
296 warnx("exponent out of bounds");
297 return (pos);
299 bite = min(exponent, 61);
300 *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
301 : (expsign ? 64-bite : 64+bite)];
302 if (bite >= 61) {
303 do {
304 exponent -= bite;
305 bite = min(exponent, 254);
306 *pos++ = nweights[or_sign ? 254-bite : bite];
307 } while (bite == 254);
309 C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
310 for (; line < lineend; line++) {
311 if (*line >= '0' && *line <= '9') {
312 if (parity) {
313 *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
314 : *line);
315 if (pos == bufend)
316 return (NULL);
317 if (*line != '0' || lastvalue != '0')
318 nonzero = pos;
319 } else
320 lastvalue = *line;
321 parity ^= 1;
322 } else if (*line == DECIMAL) {
323 if (!expincr) /* a decimal already occurred once */
324 break;
325 expincr = 0;
326 } else
327 break;
329 if ((parity && lastvalue != '0') || !nonzero) {
330 *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
331 OFF_TENS[lastvalue] + '0';
332 } else
333 pos = nonzero;
334 if (pos > bufend-1)
335 return (NULL);
336 *pos++ = or_sign ? nweights[254] : nweights[0];
337 return (pos);
340 /* This forces a gap around the record delimiter
341 * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
342 * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
344 void
345 num_init()
347 int i;
348 TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
349 NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
350 OFF_TENS = TENS - '0';
351 OFF_NTENS = NEGTENS - '0';
352 for (i = 1; i < 10; i++) {
353 TENS[i] = TENS[i - 1] + 10;
354 NEGTENS[i] = NEGTENS[i - 1] - 10;
356 for (i = 0; i < REC_D; i++) {
357 fnum[i] = i;
358 rnum[255 - i] = i;
360 for (i = REC_D; i <255; i++) {
361 fnum[i] = i + 1;
362 rnum[255 - i] = i - 1;