Added lance entry to drivers.conf.
[minix3-old.git] / commands / simple / sort.c
blob09a7d348c5294fad6aa8392cdf93407a1029b1b5
1 /* sort - sort a file of lines Author: Michiel Huisjes */
3 /* SYNOPSIS:
4 * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
6 * [opts] can be any of
7 * -f : Fold upper case to lower.
8 * -n : Sort to numeric value (optional decimal point) implies -b
9 * -b : Skip leading blanks
10 * -i : Ignore chars outside ASCII range (040 - 0176)
11 * -r : Reverse the sense of comparisons.
12 * -d : Sort to dictionary order. Only letters, digits, comma's and points
13 * are compared.
14 * If any of these flags are used in [opts], then they override all global
15 * ordering for this field.
17 * I/O control flags are:
18 * -u : Print uniq lines only once.
19 * -c : Check if files are sorted in order.
20 * -m : Merge already sorted files.
21 * -o outfile : Name of output file. (Can be one of the input files).
22 * Default is stdout.
23 * - : Take stdin as input.
25 * Fields:
26 * -t'x' : Field separating character is 'x'
27 * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
28 * taken to be 0.
29 * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
30 * taken to be 0.
31 * A missing -a.b means the rest of the line.
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <signal.h>
38 #include <unistd.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdio.h>
42 #include <limits.h>
44 #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
45 #if __minix_vmd
46 #define MEMORY_SIZE (1024 * 1024)
47 #else
48 #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
49 #endif
50 /* Total mem_size */
51 #define LINE_SIZE (1024 >> 1) /* Max length of a line */
52 #define IO_SIZE (2 * 1024) /* Size of buffered output */
53 #define STD_OUT 1 /* Fd of terminal */
55 /* Return status of functions */
56 #define OK 0
57 #define ERROR -1
58 #define NIL_PTR ((char *) 0)
60 /* Compare return values */
61 #define LOWER -1
62 #define SAME 0
63 #define HIGHER 1
65 /* Table definitions. */
66 #define DICT 0x001 /* Alpha, numeric, letters and . */
67 #define ASCII 0x002 /* All between ' ' and '~' */
68 #define BLANK 0x004 /* ' ' and '\t' */
69 #define DIGIT 0x008 /* 0-9 */
70 #define UPPER 0x010 /* A-Z */
72 typedef int BOOL;
74 #define FALSE 0
75 #define TRUE 1
77 typedef struct {
78 int fd; /* Fd of file */
79 char *buffer; /* Buffer for reads */
80 int read_chars; /* Nr of chars actually read in buffer */
81 int cnt; /* Nr of chars taken out of buffer */
82 char *line; /* Contains line currently used */
83 } MERGE;
85 #define NIL_MERGE ((MERGE *) 0)
86 MERGE merge_f[OPEN_FILES]; /* Merge structs */
87 int buf_size; /* Size of core available for each struct */
89 #define FIELDS_LIMIT 10 /* 1 global + 9 user */
90 #define GLOBAL 0
92 typedef struct {
93 int beg_field, beg_pos; /* Begin field + offset */
94 int end_field, end_pos; /* End field + offset. ERROR == EOLN */
95 BOOL reverse; /* TRUE if rev. flag set on this field */
96 BOOL blanks;
97 BOOL dictionary;
98 BOOL fold_case;
99 BOOL ascii;
100 BOOL numeric;
101 } FIELD;
103 /* Field declarations. A total of FILEDS_LIMIT is allowed */
104 FIELD fields[FIELDS_LIMIT];
105 int field_cnt; /* Nr of field actually assigned */
107 /* Various output control flags */
108 BOOL check = FALSE;
109 BOOL only_merge = FALSE;
110 BOOL uniq = FALSE;
112 char *mem_top; /* Mem_top points to lowest pos of memory. */
113 char *cur_pos; /* First free position in mem */
114 char **line_table; /* Pointer to the internal line table */
115 BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */
117 /* Place where temp_files should be made */
118 char temp_files[] = "/tmp/sort.XXXXX.XX";
119 char *output_file; /* Name of output file */
120 int out_fd; /* Fd to output file (could be STD_OUT) */
121 char out_buffer[IO_SIZE]; /* For buffered output */
123 char **argptr; /* Pointer to argv structure */
124 int args_offset; /* Nr of args spilled on options */
125 int args_limit; /* Nr of args given */
127 char separator; /* Char that separates fields */
128 int nr_of_files = 0; /* Nr_of_files to be merged */
129 int disabled; /* Nr of files done */
131 char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
133 /* Forward declarations */
134 _PROTOTYPE(int main, (int argc, char **argv));
135 _PROTOTYPE(void get_opts, (char *ptr, FIELD * field));
136 _PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));
137 _PROTOTYPE(void adjust_options, (FIELD * field));
138 _PROTOTYPE(void error, (BOOL quit, char *message, char *arg));
139 _PROTOTYPE(void open_outfile, (void));
140 _PROTOTYPE(void get_file, (int fd, off_t size));
141 _PROTOTYPE(int last_line, (void));
142 _PROTOTYPE(void print_table, (int fd));
143 _PROTOTYPE(char *file_name, (int nr));
144 _PROTOTYPE(void mread, (int fd, char *address, int bytes));
145 _PROTOTYPE(void mwrite, (int fd, char *address, int bytes));
146 _PROTOTYPE(void sort, (void));
147 _PROTOTYPE(void sort_table, (int nel));
148 _PROTOTYPE(void incr, (int si, int ei));
149 _PROTOTYPE(int cmp_fields, (char *el1, char *el2));
150 _PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));
151 _PROTOTYPE(char *skip_fields, (char *str, int nf));
152 _PROTOTYPE(int compare, (char *el1, char *el2));
153 _PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));
154 _PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));
155 _PROTOTYPE(void files_merge, (int file_cnt));
156 _PROTOTYPE(void merge, (int start_file, int limit_file));
157 _PROTOTYPE(void put_line, (char *line));
158 _PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));
159 _PROTOTYPE(int read_line, (MERGE * merg));
160 _PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));
161 _PROTOTYPE(void uniq_lines, (MERGE * merg));
162 _PROTOTYPE(void check_file, (int fd, char *file));
163 _PROTOTYPE(int length, (char *line));
164 _PROTOTYPE(void copy, (char *dest, char *src));
165 _PROTOTYPE(char *msbrk, (int size));
166 _PROTOTYPE(void mbrk, (char *address));
167 _PROTOTYPE(void catch, (int dummy));
169 /* Table of all chars. 0 means no special meaning. */
170 char table[256] = {
171 /* '^@' to space */
172 0, 0, 0, 0, 0, 0, 0, 0,
173 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
177 /* Space to '0' */
178 BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
179 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
180 ASCII, ASCII,
182 /* '0' until '9' */
183 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
184 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
185 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
186 DIGIT | DICT | ASCII,
188 /* ASCII from ':' to '@' */
189 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
191 /* Upper case letters 'A' to 'Z' */
192 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
193 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
194 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
195 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
196 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
197 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
198 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
199 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
200 UPPER | DICT | ASCII, UPPER | DICT | ASCII,
202 /* ASCII from '[' to '`' */
203 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
205 /* Lower case letters from 'a' to 'z' */
206 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
207 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
208 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
209 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
210 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
211 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
212 DICT | ASCII, DICT | ASCII,
214 /* ASCII from '{' to '~' */
215 ASCII, ASCII, ASCII, ASCII,
217 /* Stuff from -1 to -177 */
218 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 0, 0, 0, 0, 0, 0, 0
231 * Get_opts () assigns the options into the field structure as described in ptr.
232 * This field structure could be the GLOBAL one.
234 void get_opts(ptr, field)
235 register char *ptr;
236 register FIELD *field;
238 switch (*ptr) {
239 case 'b': /* Skip leading blanks */
240 field->blanks = TRUE;
241 break;
242 case 'd': /* Dictionary order */
243 field->dictionary = TRUE;
244 break;
245 case 'f': /* Fold upper case to lower */
246 field->fold_case = TRUE;
247 break;
248 case 'i': /* Skip chars outside ' ' '~' */
249 field->ascii = TRUE;
250 break;
251 case 'n': /* Sort on numeric */
252 field->numeric = TRUE;
253 field->blanks = TRUE;
254 break;
255 case 'r': /* Reverse comparisons */
256 field->reverse = TRUE;
257 break;
258 default: /* Illegal options */
259 error(TRUE, USAGE, NIL_PTR);
263 /* New_field () assigns a new field as described by the arguments.
264 * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
265 * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
266 * is offset from field.
268 void new_field(field, offset, beg_fl)
269 register FIELD *field; /* Field to assign */
270 int *offset; /* Offset in argv structure */
271 BOOL beg_fl; /* Assign beg or end of field */
273 register char *ptr;
275 ptr = argptr[*offset];
276 *offset += 1; /* Incr offset to next arg */
277 ptr++;
279 if (beg_fl)
280 field->beg_field = atoi(ptr); /* Assign int of first field */
281 else
282 field->end_field = atoi(ptr);
284 while (table[*ptr] & DIGIT) /* Skip all digits */
285 ptr++;
287 if (*ptr == '.') { /* Check for offset */
288 ptr++;
289 if (beg_fl)
290 field->beg_pos = atoi(ptr);
291 else
292 field->end_pos = atoi(ptr);
293 while (table[*ptr] & DIGIT) /* Skip digits */
294 ptr++;
296 if (beg_fl) {
297 while (*ptr != '\0') /* Check options after field */
298 get_opts(ptr++, field);
300 if (beg_fl) { /* Check for end pos */
301 ptr = argptr[*offset];
302 if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
303 new_field(field, offset, FALSE);
304 if (field->beg_field > field->end_field)
305 error(TRUE, "End field is before start field!", NIL_PTR);
306 } else /* No end pos. */
307 field->end_field = ERROR;
311 int main(argc, argv)
312 int argc;
313 char *argv[];
315 int arg_count = 1; /* Offset in argv */
316 struct stat st;
317 register char *ptr; /* Ptr to *argv in use */
318 register int fd;
319 int pid, pow;
321 argptr = argv;
322 cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */
324 while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
325 if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */
326 break;
327 if (*ptr == '+') { /* Assign field. */
328 if (++field_cnt == FIELDS_LIMIT)
329 error(TRUE, "Too many fields", NIL_PTR);
330 new_field(&fields[field_cnt], &arg_count, TRUE);
331 } else { /* Get output options */
332 while (*++ptr) {
333 switch (*ptr) {
334 case 'c': /* Only check file */
335 check = TRUE;
336 break;
337 case 'm': /* Merge (sorted) files */
338 only_merge = TRUE;
339 break;
340 case 'u': /* Only give uniq lines */
341 uniq = TRUE;
342 break;
343 case 'o': /* Name of output file */
344 output_file = argv[++arg_count];
345 break;
346 case 't': /* Field separator */
347 ptr++;
348 separator = *ptr;
349 break;
350 default: /* Sort options */
351 get_opts(ptr, &fields[GLOBAL]);
354 arg_count++;
358 for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
360 /* Create name of tem_files 'sort.pid.aa' */
361 ptr = &temp_files[10];
362 pid = getpid();
363 pow = 10000;
364 while (pow != 0) {
365 *ptr++ = pid / pow + '0';
366 pid %= pow;
367 pow /= 10;
370 signal(SIGINT, catch);
372 /* Only merge files. Set up */
373 if (only_merge) {
374 args_limit = args_offset = arg_count;
375 while (argv[args_limit] != NIL_PTR)
376 args_limit++; /* Find nr of args */
377 files_merge(args_limit - arg_count);
378 exit(0);
380 if (arg_count == argc) { /* No args left. Use stdin */
381 if (check)
382 check_file(0, NIL_PTR);
383 else
384 get_file(0, (off_t) 0);
385 } else
386 while (arg_count < argc) { /* Sort or check args */
387 if (strcmp(argv[arg_count], "-") == 0)
388 fd = 0;
389 else if (stat(argv[arg_count], &st) < 0) {
390 error(FALSE, "Cannot find ", argv[arg_count++]);
391 continue;
394 /* Open files */
395 else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
396 error(FALSE, "Cannot open ", argv[arg_count++]);
397 continue;
399 if (check)
400 check_file(fd, argv[arg_count]);
401 else /* Get_file reads whole file */
402 get_file(fd, st.st_size);
403 arg_count++;
406 if (check) exit(0);
408 sort(); /* Sort whatever is left */
410 if (nr_of_files == 1) /* Only one file sorted -> don't merge */
411 exit(0);
413 files_merge(nr_of_files);
414 return(0);
417 /* Adjust_options() assigns all global variables set also in the fields
418 * assigned.
420 void adjust_options(field)
421 register FIELD *field;
423 register FIELD *gfield = &fields[GLOBAL];
425 if (gfield->reverse) field->reverse = TRUE;
426 if (gfield->blanks) field->blanks = TRUE;
427 if (gfield->dictionary) field->dictionary = TRUE;
428 if (gfield->fold_case) field->fold_case = TRUE;
429 if (gfield->ascii) field->ascii = TRUE;
430 if (gfield->numeric) field->numeric = TRUE;
433 /* Error () prints the error message on stderr and exits if quit == TRUE. */
434 void error(quit, message, arg)
435 register BOOL quit;
436 register char *message, *arg;
438 write(2, message, strlen(message));
439 if (arg != NIL_PTR) write(2, arg, strlen(arg));
440 perror(" ");
441 if (quit) exit(1);
444 /* Open_outfile () assigns to out_fd the fd where the output must go when all
445 * the sorting is done.
447 void open_outfile()
449 if (output_file == NIL_PTR)
450 out_fd = STD_OUT;
451 else if ((out_fd = creat(output_file, 0644)) < 0)
452 error(TRUE, "Cannot creat ", output_file);
455 /* Get_file reads the whole file of filedescriptor fd. If the file is too big
456 * to keep in core, a partial sort is done, and the output is stashed somewhere.
458 void get_file(fd, size)
459 int fd; /* Fd of file to read */
460 register off_t size; /* Size of file */
462 register int i;
463 int rest; /* Rest in memory */
464 char save_ch; /* Used in stdin readings */
466 rest = MEMORY_SIZE - (cur_pos - mem_top);
467 if (fd == 0) { /* We're reding stdin */
468 while ((i = read(0, cur_pos, rest)) > 0) {
469 if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
470 in_core = FALSE;
471 i = last_line(); /* End of last line */
472 save_ch = mem_top[i];
473 mem_top[i] = '\0';
474 sort(); /* Sort core */
475 mem_top[i] = save_ch; /* Restore erased char */
476 /* Restore last (half read) line */
477 for (rest = 0; i + rest != MEMORY_SIZE; rest++)
478 mem_top[rest] = mem_top[i + rest];
479 /* Assign current pos. in memory */
480 cur_pos = &mem_top[rest];
481 } else { /* Fits, just assign position in mem. */
482 cur_pos = cur_pos + i;
483 *cur_pos = '\0';
486 /* Calculate rest of mem */
487 rest = MEMORY_SIZE - (cur_pos - mem_top);
491 /* Reading file. Check size */
492 else if (size > rest) { /* Won't fit */
493 mread(fd, cur_pos, rest);
494 in_core = FALSE;
495 i = last_line(); /* Get pos. of last line */
496 mem_top[i] = '\0'; /* Truncate */
497 (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */
498 size = size - rest - i + MEMORY_SIZE; /* Calculate rest */
499 cur_pos = mem_top; /* Reset mem */
500 sort(); /* Sort core */
501 get_file(fd, size); /* Get rest of file */
502 } else { /* Fits. Just read in */
503 rest = size;
504 mread(fd, cur_pos, rest);
505 cur_pos = cur_pos + rest; /* Reassign cur_pos */
506 *cur_pos = '\0';
507 (void) close(fd); /* File completed */
511 /* Last_line () find the last line in core and retuns the offset from the top
512 * of the memory.
514 int last_line()
516 register int i;
518 for (i = MEMORY_SIZE - 2; i > 0; i--)
519 if (mem_top[i] == '\n') break;
520 return i + 1;
523 /* Print_table prints the line table in the given file_descriptor. If the fd
524 * equals ERROR, it opens a temp_file itself.
526 void print_table(fd)
527 int fd;
529 register char **line_ptr; /* Ptr in line_table */
530 register char *ptr; /* Ptr to line */
531 int index = 0; /* Index in output buffer */
533 if (fd == ERROR) {
534 if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
535 error(TRUE, "Cannot creat ", file_name(nr_of_files));
537 for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) {
538 ptr = *line_ptr;
539 /* Skip all same lines if uniq is set */
540 if (uniq && *(line_ptr + 1) != NIL_PTR) {
541 if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
543 do { /* Print line in a buffered way */
544 out_buffer[index++] = *ptr;
545 if (index == IO_SIZE) {
546 mwrite(fd, out_buffer, IO_SIZE);
547 index = 0;
549 } while (*ptr++ != '\n');
551 mwrite(fd, out_buffer, index);/* Flush buffer */
552 (void) close(fd); /* Close file */
553 nr_of_files++; /* Increment nr_of_files to merge */
556 /* File_name () returns the nr argument from the argument list, or a uniq
557 * filename if the nr is too high, or the arguments were not merge files.
559 char *file_name(nr)
560 register int nr;
562 if (only_merge) {
563 if (args_offset + nr < args_limit) return argptr[args_offset + nr];
565 temp_files[16] = nr / 26 + 'a';
566 temp_files[17] = nr % 26 + 'a';
568 return temp_files;
571 /* Mread () performs a normal read (), but checks the return value. */
572 void mread(fd, address, bytes)
573 int fd;
574 char *address;
575 register int bytes;
577 if (read(fd, address, bytes) < 0 && bytes != 0)
578 error(TRUE, "Read error", NIL_PTR);
581 /* Mwrite () performs a normal write (), but checks the return value. */
582 void mwrite(fd, address, bytes)
583 int fd;
584 char *address;
585 register int bytes;
587 if (write(fd, address, bytes) != bytes && bytes != 0)
588 error(TRUE, "Write error", NIL_PTR);
591 /* Sort () sorts the input in memory starting at mem_top. */
592 void sort()
594 register char *ptr = mem_top;
595 register int count = 0;
597 /* Count number of lines in memory */
598 while (*ptr) {
599 if (*ptr++ == '\n') count++;
602 /* Set up the line table */
603 line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
605 count = 1;
606 ptr = line_table[0] = mem_top;
607 while (*ptr) {
608 if (*ptr++ == '\n') line_table[count++] = ptr;
611 line_table[count - 1] = NIL_PTR;
613 /* Sort the line table */
614 sort_table(count - 1);
616 /* Stash output somewhere */
617 if (in_core) {
618 open_outfile();
619 print_table(out_fd);
620 } else
621 print_table(ERROR);
623 /* Free line table */
624 mbrk((char *) line_table);
627 /* Sort_table () sorts the line table consisting of nel elements. */
628 void sort_table(nel)
629 register int nel;
631 char *tmp;
632 register int i;
634 /* Make heap */
635 for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
637 /* Sort from heap */
638 for (i = nel; i > 1; i--) {
639 tmp = line_table[0];
640 line_table[0] = line_table[i - 1];
641 line_table[i - 1] = tmp;
642 incr(1, i - 1);
646 /* Incr () increments the heap. */
647 void incr(si, ei)
648 register int si, ei;
650 char *tmp;
652 while (si <= (ei >> 1)) {
653 si <<= 1;
654 if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
655 si++;
656 if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
657 return;
658 tmp = line_table[(si >> 1) - 1];
659 line_table[(si >> 1) - 1] = line_table[si - 1];
660 line_table[si - 1] = tmp;
664 /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
665 * puts it into the line1 and line2 arrays. It then calls the cmp () routine
666 * with the field describing the arguments.
668 int cmp_fields(el1, el2)
669 register char *el1, *el2;
671 int i, ret;
672 char line1[LINE_SIZE], line2[LINE_SIZE];
674 for (i = 0; i < field_cnt; i++) { /* Setup line parts */
675 build_field(line1, &fields[i + 1], el1);
676 build_field(line2, &fields[i + 1], el2);
677 if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
678 &fields[i + 1])) != SAME)
679 break; /* If equal, try next field */
682 /* Check for reverse flag */
683 if (i != field_cnt && fields[i + 1].reverse) return -ret;
685 /* Else return the last return value of cmp () */
686 return ret;
689 /* Build_field builds a new line from the src as described by the field.
690 * The result is put in dest.
692 void build_field(dest, field, src)
693 char *dest; /* Holds result */
694 register FIELD *field; /* Field description */
695 register char *src; /* Source line */
697 char *begin = src; /* Remember start location */
698 char *last; /* Pointer to end location */
699 int i;
701 /* Skip begin fields */
702 src = skip_fields(src, field->beg_field);
704 /* Skip begin positions */
705 for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
707 /* Copy whatever is left */
708 copy(dest, src);
710 /* If end field is assigned truncate (perhaps) the part copied */
711 if (field->end_field != ERROR) { /* Find last field */
712 last = skip_fields(begin, field->end_field);
713 /* Skip positions as given by end fields description */
714 for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
715 dest[last - src] = '\n';/* Truncate line */
719 /* Skip_fields () skips nf fields of the line pointed to by str. */
720 char *skip_fields(str, nf)
721 register char *str;
722 int nf;
724 while (nf-- > 0) {
725 if (separator == '\0') {/* Means ' ' or '\t' */
726 while (*str != ' ' && *str != '\t' && *str != '\n') str++;
727 while (table[*str] & BLANK) str++;
728 } else {
729 while (*str != separator && *str != '\n') str++;
730 if (*str == separator) str++;
733 return str; /* Return pointer to indicated field */
736 /* Compare is called by all sorting routines. It checks if fields assignments
737 * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
738 * reversed the return value if the (global) reverse flag is set.
740 int compare(el1, el2)
741 register char *el1, *el2;
743 int ret;
745 if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
747 ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
748 return(fields[GLOBAL].reverse) ? -ret : ret;
751 /* Cmp () is the actual compare routine. It compares according to the
752 * description given in the field pointer.
754 int cmp(el1, el2, field)
755 register unsigned char *el1, *el2;
756 FIELD *field;
758 int c1, c2;
760 if (field->blanks) { /* Skip leading blanks */
761 while (table[*el1] & BLANK) el1++;
762 while (table[*el2] & BLANK) el2++;
764 if (field->numeric) /* Compare numeric */
765 return digits((char *) el1, (char *) el2, TRUE);
767 for (;;) {
768 while (*el1 == *el2) {
769 if (*el1++ == '\n') /* EOLN on both strings */
770 return SAME;
771 el2++;
773 if (*el1 == '\n') /* EOLN on string one */
774 return LOWER;
775 if (*el2 == '\n') return HIGHER;
776 if (field->ascii) { /* Skip chars outside 040 - 0177 */
777 if ((table[*el1] & ASCII) == 0) {
778 do {
779 el1++;
780 } while ((table[*el1] & ASCII) == 0);
781 continue;
783 if ((table[*el2] & ASCII) == 0) {
784 do {
785 el2++;
786 } while ((table[*el2] & ASCII) == 0);
787 continue;
790 if (field->dictionary) {/* Skip non-dict chars */
791 if ((table[*el1] & DICT) == 0) {
792 do {
793 el1++;
794 } while ((table[*el1] & DICT) == 0);
795 continue;
797 if ((table[*el2] & DICT) == 0) {
798 do {
799 el2++;
800 } while ((table[*el2] & DICT) == 0);
801 continue;
804 if (field->fold_case) { /* Fold upper case to lower */
805 if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
806 if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
807 if (c1 == c2) continue;
808 return c1 - c2;
810 return *el1 - *el2;
813 /* NOTREACHED */
817 * Digits compares () the two strings that point to a number of digits followed
818 * by an optional decimal point.
820 int digits(str1, str2, check_sign)
821 register char *str1, *str2;
822 BOOL check_sign; /* True if sign must be checked */
824 BOOL negative = FALSE; /* True if negative numbers */
825 int diff, pow, ret;
827 /* Check for optional minus or plus sign */
828 if (check_sign) {
829 if (*str1 == '-') {
830 negative = TRUE;
831 str1++;
832 } else if (*str1 == '+')
833 str1++;
835 if (*str2 == '-') {
836 if (negative == FALSE) return HIGHER;
837 str2++;
838 } else if (negative)
839 return LOWER;
840 else if (*str2 == '+')
841 str2++;
844 /* Keep incrementing as long as digits are available and equal */
845 while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
846 if (*str1 != *str2) break;
847 str1++;
848 str2++;
851 /* First check for the decimal point. */
852 if (*str1 == '.' || *str2 == '.') {
853 if (*str1 == '.') {
854 if (*str2 == '.') /* Both. Check decimal part */
855 ret = digits(str1 + 1, str2 + 1, FALSE);
856 else
857 ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
858 } else
859 ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
862 /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
863 else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
864 diff = *str1 - *str2; /* Basic difference */
865 pow = 0; /* Check power of numbers */
866 while (table[*str1++] & DIGIT) pow++;
867 while (table[*str2++] & DIGIT) pow--;
868 ret = (pow == 0) ? diff : pow;
871 /* Unknown char. Check on which string it occurred */
872 else {
873 if ((table[*str1] & DIGIT) == 0)
874 ret = (table[*str2] & DIGIT) ? LOWER : SAME;
875 else
876 ret = HIGHER;
879 /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
880 return(negative) ? -ret : ret;
883 /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
884 * in numbers of files that can be opened at the same time. (OPEN_FILES)
886 void files_merge(file_cnt)
887 register int file_cnt; /* Nr_of_files to merge */
889 register int i;
890 int limit;
892 for (i = 0; i < file_cnt; i += OPEN_FILES) {
893 /* Merge last files and store in output file */
894 if ((limit = i + OPEN_FILES) >= file_cnt) {
895 open_outfile();
896 limit = file_cnt;
897 } else { /* Merge OPEN_FILES files and store in temp
898 * file */
899 temp_files[16] = file_cnt / 26 + 'a';
900 temp_files[17] = file_cnt % 26 + 'a';
901 if ((out_fd = creat(temp_files, 0644)) < 0)
902 error(TRUE, "Cannot creat ", temp_files);
903 file_cnt++;
905 merge(i, limit);
908 /* Cleanup mess */
909 i = (only_merge) ? args_limit - args_offset : 0;
910 while (i < file_cnt) (void) unlink(file_name(i++));
913 /* Merge () merges the files between start_file and limit_file. */
914 void merge(start_file, limit_file)
915 int start_file, limit_file;
917 register MERGE *smallest; /* Keeps track of smallest line */
918 register int i;
919 int file_cnt = limit_file - start_file; /* Nr of files to merge */
921 /* Calculate size in core available for file_cnt merge structs */
922 buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
924 mbrk(mem_top); /* First reset mem to lowest loc. */
925 disabled = 0; /* All files not done yet */
927 /* Set up merge structures. */
928 for (i = start_file; i < limit_file; i++) {
929 smallest = &merge_f[i - start_file];
930 if (!strcmp(file_name(i), "-")) /* File is stdin */
931 smallest->fd = 0;
932 else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
933 smallest->fd = ERROR;
934 error(FALSE, "Cannot open ", file_name(i));
935 disabled++; /* Done this file */
936 continue;
938 smallest->buffer = msbrk(buf_size);
939 smallest->line = msbrk(LINE_SIZE);
940 smallest->cnt = smallest->read_chars = 0;
941 (void) read_line(smallest); /* Read first line */
944 if (disabled == file_cnt) { /* Couldn't open files */
945 (void) close(out_fd);
946 return;
949 /* Find a merg struct to assign smallest. */
950 for (i = 0; i < file_cnt; i++) {
951 if (merge_f[i].fd != ERROR) {
952 smallest = &merge_f[i];
953 break;
957 /* Loop until all files minus one are done */
958 while (disabled < file_cnt - 1) {
959 if (uniq) /* Skip all same lines */
960 smallest = skip_lines(smallest, file_cnt);
961 else { /* Find smallest line */
962 for (i = 0; i < file_cnt; i++) {
963 if (merge_f[i].fd == ERROR)
964 continue; /* We've had this one */
965 if (compare(merge_f[i].line, smallest->line) < 0)
966 smallest = &merge_f[i];
968 } /* Print line and read next */
969 smallest = print(smallest, file_cnt);
972 if (only_merge && uniq)
973 uniq_lines(smallest); /* Print only uniq lines */
974 else /* Print rest of file */
975 while (print(smallest, file_cnt) != NIL_MERGE);
977 put_line(NIL_PTR); /* Flush output buffer */
980 /* Put_line () prints the line into the out_fd filedescriptor. If line equals
981 * NIL_PTR, the out_fd is flushed and closed.
983 void put_line(line)
984 register char *line;
986 static int index = 0; /* Index in out_buffer */
988 if (line == NIL_PTR) { /* Flush and close */
989 mwrite(out_fd, out_buffer, index);
990 index = 0;
991 (void) close(out_fd);
992 return;
994 do { /* Fill out_buffer with line */
995 out_buffer[index++] = *line;
996 if (index == IO_SIZE) {
997 mwrite(out_fd, out_buffer, IO_SIZE);
998 index = 0;
1000 } while (*line++ != '\n');
1004 * Print () prints the line of the merg structure and tries to read another one.
1005 * If this fails, it returns the next merg structure which file_descriptor is
1006 * still open. If none could be found, a NIL structure is returned.
1008 MERGE *print(merg, file_cnt)
1009 register MERGE *merg;
1010 int file_cnt; /* Nr of files that are being merged */
1012 register int i;
1014 put_line(merg->line); /* Print the line */
1016 if (read_line(merg) == ERROR) { /* Read next line */
1017 for (i = 0; i < file_cnt; i++) {
1018 if (merge_f[i].fd != ERROR) {
1019 merg = &merge_f[i];
1020 break;
1023 if (i == file_cnt) /* No more files left */
1024 return NIL_MERGE;
1026 return merg;
1029 /* Read_line () reads a line from the fd from the merg struct. If the read
1030 * failed, disabled is incremented and the file is closed. Readings are
1031 * done in buf_size bytes.
1032 * Lines longer than LINE_SIZE are silently truncated.
1034 int read_line(merg)
1035 register MERGE *merg;
1037 register char *ptr = merg->line - 1; /* Ptr buf that will hold line */
1039 do {
1040 ptr++;
1041 if (merg->cnt == merg->read_chars) { /* Read new buffer */
1042 if ((merg->read_chars =
1043 read(merg->fd, merg->buffer, buf_size)) <= 0) {
1044 (void) close(merg->fd); /* OOPS */
1045 merg->fd = ERROR;
1046 disabled++;
1047 return ERROR;
1049 merg->cnt = 0;
1051 *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */
1052 if (ptr - merg->line == LINE_SIZE - 1)
1053 *ptr = '\n'; /* Truncate very long lines */
1054 } while (*ptr != '\n' && *ptr != '\0');
1056 if (*ptr == '\0') /* Add '\n' to last line */
1057 *ptr = '\n';
1058 *++ptr = '\0'; /* Add '\0' */
1059 return OK;
1062 /* Skip_lines () skips all same lines in all the files currently being merged.
1063 * It returns a pointer to the merge struct containing the smallest line.
1065 MERGE *skip_lines(smallest, file_cnt)
1066 register MERGE *smallest;
1067 int file_cnt;
1069 register int i;
1070 int ret;
1072 if (disabled == file_cnt - 1) /* We've had all */
1073 return smallest;
1075 for (i = 0; i < file_cnt; i++) {
1076 if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
1077 continue; /* Don't check same file */
1078 while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
1079 if (read_line(&merge_f[i]) == ERROR) break; /* EOF */
1081 if (ret < 0) /* Line wasn't smallest. Try again */
1082 return skip_lines(&merge_f[i], file_cnt);
1084 return smallest;
1087 /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
1088 void uniq_lines(merg)
1089 register MERGE *merg;
1091 char lastline[LINE_SIZE]; /* Buffer to hold last line */
1093 for (;;) {
1094 put_line(merg->line); /* Print this line */
1095 copy(lastline, merg->line); /* and save it */
1096 if (read_line(merg) == ERROR) /* Read the next */
1097 return;
1098 /* Keep reading until lines duffer */
1099 while (compare(lastline, merg->line) == SAME)
1100 if (read_line(merg) == ERROR) return;
1103 /* NOTREACHED */
1107 * Check_file () checks if a file is sorted in order according to the arguments
1108 * given in main ().
1110 void check_file(fd, file)
1111 int fd;
1112 char *file;
1114 register MERGE *merg; /* 1 file only */
1115 char lastline[LINE_SIZE]; /* Save last line */
1116 register int ret; /* ret status of compare */
1118 if (fd == 0) file = "stdin";
1119 merg = (MERGE *) mem_top; /* Assign MERGE structure */
1120 merg->buffer = mem_top + sizeof(MERGE);
1121 merg->line = msbrk(LINE_SIZE);
1122 merg->cnt = merg->read_chars = 0;
1123 merg->fd = fd;
1124 buf_size = MEMORY_SIZE - sizeof(MERGE);
1126 if (read_line(merg) == ERROR) /* Read first line */
1127 return;
1128 copy(lastline, merg->line); /* and save it */
1130 for (;;) {
1131 if (read_line(merg) == ERROR) /* EOF reached */
1132 break;
1133 if ((ret = compare(lastline, merg->line)) > 0) {
1134 error(FALSE, "Disorder in file ", file);
1135 write(2, merg->line, length(merg->line));
1136 break;
1137 } else if (ret < 0) /* Copy if lines not equal */
1138 copy(lastline, merg->line);
1139 else if (uniq) {
1140 error(FALSE, "Non uniq line in file ", file);
1141 write(2, merg->line, length(merg->line));
1142 break;
1146 mbrk(mem_top); /* Reset mem */
1149 /* Length () returns the length of the argument line including the linefeed. */
1150 int length(line)
1151 register char *line;
1153 register int i = 1; /* Add linefeed */
1155 while (*line++ != '\n') i++;
1156 return i;
1159 /* Copy () copies the src line into the dest line including linefeed. */
1160 void copy(dest, src)
1161 register char *dest, *src;
1163 while ((*dest++ = *src++) != '\n');
1166 /* Msbrk() does a sbrk() and checks the return value. */
1167 char *msbrk(size)
1168 register int size;
1170 register char *address;
1172 if ((address = sbrk(size)) == (char *) -1)
1173 error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR);
1174 return address;
1177 /* Mbrk() does a brk() and checks the return value. */
1178 void mbrk(address)
1179 char *address;
1181 if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR);
1184 void catch(dummy)
1185 int dummy; /* to satisfy the prototype */
1187 register int i;
1189 signal(SIGINT, SIG_IGN);
1190 only_merge = FALSE;
1191 for (i = 0; i < 26; i++) (void) unlink(file_name(i));
1192 exit(2);