Retire __SCCSID. It has only archeological value now. Also retire lint
[netbsd-mini2440.git] / usr.bin / sort / init.c
blob45acba889ce87bc54e4ddce5773171aef3f417c1
1 /* $NetBSD: init.c,v 1.23 2009/09/10 22:02:40 dsl Exp $ */
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
64 #include "sort.h"
66 __RCSID("$NetBSD: init.c,v 1.23 2009/09/10 22:02:40 dsl Exp $");
68 #include <ctype.h>
69 #include <string.h>
71 static void insertcol(struct field *);
72 static const char *setcolumn(const char *, struct field *);
75 * masks of ignored characters.
77 static u_char dtable[NBINS], itable[NBINS];
80 * parsed key options
82 struct coldesc *clist = NULL;
83 int ncols = 0;
86 * clist (list of columns which correspond to one or more icol or tcol)
87 * is in increasing order of columns.
88 * Fields are kept in increasing order of fields.
91 /*
92 * keep clist in order--inserts a column in a sorted array
94 static void
95 insertcol(struct field *field)
97 int i;
98 struct coldesc *p;
100 /* Make space for new item */
101 p = realloc(clist, (ncols + 2) * sizeof(*clist));
102 if (!p)
103 err(1, "realloc");
104 clist = p;
105 memset(&clist[ncols], 0, sizeof(clist[ncols]));
107 for (i = 0; i < ncols; i++)
108 if (field->icol.num <= clist[i].num)
109 break;
110 if (field->icol.num != clist[i].num) {
111 memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
112 clist[i].num = field->icol.num;
113 ncols++;
115 if (field->tcol.num && field->tcol.num != field->icol.num) {
116 for (i = 0; i < ncols; i++)
117 if (field->tcol.num <= clist[i].num)
118 break;
119 if (field->tcol.num != clist[i].num) {
120 memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
121 clist[i].num = field->tcol.num;
122 ncols++;
128 * matches fields with the appropriate columns--n^2 but who cares?
130 void
131 fldreset(struct field *fldtab)
133 int i;
135 fldtab[0].tcol.p = clist + ncols - 1;
136 for (++fldtab; fldtab->icol.num; ++fldtab) {
137 for (i = 0; fldtab->icol.num != clist[i].num; i++)
139 fldtab->icol.p = clist + i;
140 if (!fldtab->tcol.num)
141 continue;
142 for (i = 0; fldtab->tcol.num != clist[i].num; i++)
144 fldtab->tcol.p = clist + i;
149 * interprets a column in a -k field
151 static const char *
152 setcolumn(const char *pos, struct field *cur_fld)
154 struct column *col;
155 char *npos;
156 int tmp;
157 col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
158 col->num = (int) strtol(pos, &npos, 10);
159 pos = npos;
160 if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
161 errx(2, "field numbers must be positive");
162 if (*pos == '.') {
163 if (!col->num)
164 errx(2, "cannot indent end of line");
165 ++pos;
166 col->indent = (int) strtol(pos, &npos, 10);
167 pos = npos;
168 if (&cur_fld->icol == col)
169 col->indent--;
170 if (col->indent < 0)
171 errx(2, "illegal offset");
173 for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
174 cur_fld->flags |= tmp;
175 if (cur_fld->icol.num == 0)
176 cur_fld->icol.num = 1;
177 return (pos);
181 setfield(const char *pos, struct field *cur_fld, int gflag)
183 cur_fld->mask = NULL;
185 pos = setcolumn(pos, cur_fld);
186 if (*pos == '\0') /* key extends to EOL. */
187 cur_fld->tcol.num = 0;
188 else {
189 if (*pos != ',')
190 errx(2, "illegal field descriptor");
191 setcolumn((++pos), cur_fld);
193 if (!cur_fld->flags)
194 cur_fld->flags = gflag;
195 if (REVERSE)
196 /* A local 'r' doesn't invert the global one */
197 cur_fld->flags &= ~R;
199 /* Assign appropriate mask table and weight table. */
200 cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
201 if (cur_fld->flags & I)
202 cur_fld->mask = itable;
203 else if (cur_fld->flags & D)
204 cur_fld->mask = dtable;
206 cur_fld->flags |= (gflag & (BI | BT));
207 if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
208 cur_fld->flags &= ~BT;
210 if (cur_fld->tcol.num
211 && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
212 && (cur_fld->tcol.num <= cur_fld->icol.num
213 /* indent if 0 -> end of field, i.e. okay */
214 && cur_fld->tcol.indent != 0
215 && cur_fld->tcol.indent < cur_fld->icol.indent))
216 errx(2, "fields out of order");
218 insertcol(cur_fld);
219 return (cur_fld->tcol.num);
223 optval(int desc, int tcolflag)
225 switch(desc) {
226 case 'b':
227 if (!tcolflag)
228 return (BI);
229 else
230 return (BT);
231 case 'd': return (D);
232 case 'f': return (F);
233 case 'i': return (I);
234 case 'n': return (N);
235 case 'r': return (R);
236 default: return (0);
241 * Replace historic +SPEC arguments with appropriate -kSPEC.
243 void
244 fixit(int *argc, char **argv)
246 int i, j, fplus=0;
247 char *vpos, *tpos, spec[20];
248 int col, indent;
249 size_t sz;
251 for (i = 1; i < *argc; i++) {
252 if (argv[i][0] != '+' && !fplus)
253 continue;
255 if (fplus && (argv[i][0] != '-' || !isdigit((unsigned char)argv[i][1]))) {
256 fplus = 0;
257 if (argv[i][0] != '+') {
258 /* not a -POS argument, skip */
259 continue;
263 /* parse spec */
264 tpos = argv[i]+1;
265 col = (int)strtol(tpos, &tpos, 10);
266 if (*tpos == '.') {
267 ++tpos;
268 indent = (int) strtol(tpos, &tpos, 10);
269 } else
270 indent = 0;
271 /* tpos points to optional flags now */
274 * For x.y, the obsolescent variant assumed 0 == beginning
275 * of line, while the new form uses 0 == end of line.
276 * Convert accordingly.
278 if (!fplus) {
279 /* +POS */
280 col += 1;
281 indent += 1;
282 } else {
283 /* -POS */
284 if (indent > 0)
285 col += 1;
288 /* new style spec */
289 sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
290 tpos);
292 if (!fplus) {
293 /* Replace the +POS argument with new-style -kSPEC */
294 asprintf(&vpos, "-k%s", spec);
295 argv[i] = vpos;
296 fplus = 1;
297 } else {
299 * Append the spec to one previously generated from
300 * +POS argument, and remove the argv element.
302 asprintf(&vpos, "%s,%s", argv[i-1], spec);
303 free(argv[i-1]);
304 argv[i-1] = vpos;
305 for (j=i; j < *argc; j++)
306 argv[j] = argv[j+1];
307 *argc -= 1;
308 i--;
314 * ascii, Rascii, Ftable, and RFtable map
316 * Sorting 'weight' tables.
317 * Convert 'ascii' characters into their sort order.
318 * The 'F' variants fold lower case to upper equivalent
319 * The 'R' variants are for reverse sorting.
321 * The record separator (REC_D) never needs a weight, this frees one
322 * byte value as an 'end of key' marker. This must be 0 for normal
323 * weight tables, and 0xff for reverse weight tables - and is used
324 * to terminate keys so that short keys sort before (after if reverse)
325 * longer keys.
327 * The field separator has a normal weight - although it cannot occur
328 * within a key unless it is the default (space+tab).
330 * All other bytes map to the appropriate value for the sort order.
331 * Numeric sorts don't need any tables, they are reversed by negation.
333 * Global reverse sorts are done by writing the sorted keys in reverse
334 * order - the sort itself is stil forwards.
335 * This means that weights are only ever used when generating keys, any
336 * sort of the original data bytes is always forwards and unweighted.
338 * Note: this is only good for ASCII sorting. For different LC 's,
339 * all bets are off.
341 * itable[] and dtable[] are the masks for -i (ignore non-printables)
342 * and -d (only sort blank and alphanumerics).
344 void
345 settables(void)
347 int i;
348 int next_weight = 1;
349 int rev_weight = 254;
351 ascii[REC_D] = 0;
352 Rascii[REC_D] = 255;
353 Ftable[REC_D] = 0;
354 RFtable[REC_D] = 255;
356 for (i = 0; i < 256; i++) {
357 if (i == REC_D)
358 continue;
359 ascii[i] = next_weight;
360 Rascii[i] = rev_weight;
361 if (Ftable[i] == 0) {
362 Ftable[i] = next_weight;
363 RFtable[i] = rev_weight;
364 Ftable[tolower(i)] = next_weight;
365 RFtable[tolower(i)] = rev_weight;
367 next_weight++;
368 rev_weight--;
370 if (i == '\n' || isprint(i))
371 itable[i] = 1;
373 if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
374 dtable[i] = 1;