1 /* sort - sort a file of lines Author: Michiel Huisjes */
4 * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
7 * -f : Fold upper case to lower.
8 * -n : Sort to numeric value (optional decimal point) implies -b
9 * -b : Skip leading blanks
10 * -i : Ignore chars outside ASCII range (040 - 0176)
11 * -r : Reverse the sense of comparisons.
12 * -d : Sort to dictionary order. Only letters, digits, comma's and points
14 * If any of these flags are used in [opts], then they override all global
15 * ordering for this field.
17 * I/O control flags are:
18 * -u : Print uniq lines only once.
19 * -c : Check if files are sorted in order.
20 * -m : Merge already sorted files.
21 * -o outfile : Name of output file. (Can be one of the input files).
23 * - : Take stdin as input.
26 * -t'x' : Field separating character is 'x'
27 * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
29 * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
31 * A missing -a.b means the rest of the line.
34 #include <sys/types.h>
44 #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
46 #define MEMORY_SIZE (1024 * 1024)
48 #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
51 #define LINE_SIZE (1024 >> 1) /* Max length of a line */
52 #define IO_SIZE (2 * 1024) /* Size of buffered output */
53 #define STD_OUT 1 /* Fd of terminal */
55 /* Return status of functions */
58 #define NIL_PTR ((char *) 0)
60 /* Compare return values */
65 /* Table definitions. */
66 #define DICT 0x001 /* Alpha, numeric, letters and . */
67 #define ASCII 0x002 /* All between ' ' and '~' */
68 #define BLANK 0x004 /* ' ' and '\t' */
69 #define DIGIT 0x008 /* 0-9 */
70 #define UPPER 0x010 /* A-Z */
78 int fd
; /* Fd of file */
79 char *buffer
; /* Buffer for reads */
80 int read_chars
; /* Nr of chars actually read in buffer */
81 int cnt
; /* Nr of chars taken out of buffer */
82 char *line
; /* Contains line currently used */
85 #define NIL_MERGE ((MERGE *) 0)
86 MERGE merge_f
[OPEN_FILES
]; /* Merge structs */
87 int buf_size
; /* Size of core available for each struct */
89 #define FIELDS_LIMIT 10 /* 1 global + 9 user */
93 int beg_field
, beg_pos
; /* Begin field + offset */
94 int end_field
, end_pos
; /* End field + offset. ERROR == EOLN */
95 BOOL reverse
; /* TRUE if rev. flag set on this field */
103 /* Field declarations. A total of FILEDS_LIMIT is allowed */
104 FIELD fields
[FIELDS_LIMIT
];
105 int field_cnt
; /* Nr of field actually assigned */
107 /* Various output control flags */
109 BOOL only_merge
= FALSE
;
112 char *mem_top
; /* Mem_top points to lowest pos of memory. */
113 char *cur_pos
; /* First free position in mem */
114 char **line_table
; /* Pointer to the internal line table */
115 BOOL in_core
= TRUE
; /* Set if input cannot all be sorted in core */
117 /* Place where temp_files should be made */
118 char temp_files
[] = "/tmp/sort.XXXXX.XX";
119 char *output_file
; /* Name of output file */
120 int out_fd
; /* Fd to output file (could be STD_OUT) */
121 char out_buffer
[IO_SIZE
]; /* For buffered output */
123 char **argptr
; /* Pointer to argv structure */
124 int args_offset
; /* Nr of args spilled on options */
125 int args_limit
; /* Nr of args given */
127 char separator
; /* Char that separates fields */
128 int nr_of_files
= 0; /* Nr_of_files to be merged */
129 int disabled
; /* Nr of files done */
131 char USAGE
[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
133 /* Forward declarations */
134 _PROTOTYPE(int main
, (int argc
, char **argv
));
135 _PROTOTYPE(void get_opts
, (char *ptr
, FIELD
* field
));
136 _PROTOTYPE(void new_field
, (FIELD
* field
, int *offset
, BOOL beg_fl
));
137 _PROTOTYPE(void adjust_options
, (FIELD
* field
));
138 _PROTOTYPE(void error
, (BOOL quit
, char *message
, char *arg
));
139 _PROTOTYPE(void open_outfile
, (void));
140 _PROTOTYPE(void get_file
, (int fd
, off_t size
));
141 _PROTOTYPE(int last_line
, (void));
142 _PROTOTYPE(void print_table
, (int fd
));
143 _PROTOTYPE(char *file_name
, (int nr
));
144 _PROTOTYPE(void mread
, (int fd
, char *address
, int bytes
));
145 _PROTOTYPE(void mwrite
, (int fd
, char *address
, int bytes
));
146 _PROTOTYPE(void sort
, (void));
147 _PROTOTYPE(void sort_table
, (int nel
));
148 _PROTOTYPE(void incr
, (int si
, int ei
));
149 _PROTOTYPE(int cmp_fields
, (char *el1
, char *el2
));
150 _PROTOTYPE(void build_field
, (char *dest
, FIELD
* field
, char *src
));
151 _PROTOTYPE(char *skip_fields
, (char *str
, int nf
));
152 _PROTOTYPE(int compare
, (char *el1
, char *el2
));
153 _PROTOTYPE(int cmp
, (unsigned char *el1
, unsigned char *el2
, FIELD
* field
));
154 _PROTOTYPE(int digits
, (char *str1
, char *str2
, BOOL check_sign
));
155 _PROTOTYPE(void files_merge
, (int file_cnt
));
156 _PROTOTYPE(void merge
, (int start_file
, int limit_file
));
157 _PROTOTYPE(void put_line
, (char *line
));
158 _PROTOTYPE(MERGE
* print
, (MERGE
* merg
, int file_cnt
));
159 _PROTOTYPE(int read_line
, (MERGE
* merg
));
160 _PROTOTYPE(MERGE
* skip_lines
, (MERGE
* smallest
, int file_cnt
));
161 _PROTOTYPE(void uniq_lines
, (MERGE
* merg
));
162 _PROTOTYPE(void check_file
, (int fd
, char *file
));
163 _PROTOTYPE(int length
, (char *line
));
164 _PROTOTYPE(void copy
, (char *dest
, char *src
));
165 _PROTOTYPE(char *msbrk
, (int size
));
166 _PROTOTYPE(void mbrk
, (char *address
));
167 _PROTOTYPE(void catch, (int dummy
));
169 /* Table of all chars. 0 means no special meaning. */
172 0, 0, 0, 0, 0, 0, 0, 0,
173 0, BLANK
| DICT
, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
178 BLANK
| DICT
| ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
179 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
183 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
184 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
185 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
186 DIGIT
| DICT
| ASCII
,
188 /* ASCII from ':' to '@' */
189 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
191 /* Upper case letters 'A' to 'Z' */
192 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
193 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
194 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
195 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
196 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
197 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
198 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
199 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
200 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
202 /* ASCII from '[' to '`' */
203 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
205 /* Lower case letters from 'a' to 'z' */
206 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
207 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
208 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
209 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
210 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
211 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
212 DICT
| ASCII
, DICT
| ASCII
,
214 /* ASCII from '{' to '~' */
215 ASCII
, ASCII
, ASCII
, ASCII
,
217 /* Stuff from -1 to -177 */
218 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 * Get_opts () assigns the options into the field structure as described in ptr.
232 * This field structure could be the GLOBAL one.
234 void get_opts(ptr
, field
)
236 register FIELD
*field
;
239 case 'b': /* Skip leading blanks */
240 field
->blanks
= TRUE
;
242 case 'd': /* Dictionary order */
243 field
->dictionary
= TRUE
;
245 case 'f': /* Fold upper case to lower */
246 field
->fold_case
= TRUE
;
248 case 'i': /* Skip chars outside ' ' '~' */
251 case 'n': /* Sort on numeric */
252 field
->numeric
= TRUE
;
253 field
->blanks
= TRUE
;
255 case 'r': /* Reverse comparisons */
256 field
->reverse
= TRUE
;
258 default: /* Illegal options */
259 error(TRUE
, USAGE
, NIL_PTR
);
263 /* New_field () assigns a new field as described by the arguments.
264 * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
265 * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
266 * is offset from field.
268 void new_field(field
, offset
, beg_fl
)
269 register FIELD
*field
; /* Field to assign */
270 int *offset
; /* Offset in argv structure */
271 BOOL beg_fl
; /* Assign beg or end of field */
275 ptr
= argptr
[*offset
];
276 *offset
+= 1; /* Incr offset to next arg */
280 field
->beg_field
= atoi(ptr
); /* Assign int of first field */
282 field
->end_field
= atoi(ptr
);
284 while (table
[*ptr
] & DIGIT
) /* Skip all digits */
287 if (*ptr
== '.') { /* Check for offset */
290 field
->beg_pos
= atoi(ptr
);
292 field
->end_pos
= atoi(ptr
);
293 while (table
[*ptr
] & DIGIT
) /* Skip digits */
297 while (*ptr
!= '\0') /* Check options after field */
298 get_opts(ptr
++, field
);
300 if (beg_fl
) { /* Check for end pos */
301 ptr
= argptr
[*offset
];
302 if (ptr
&& *ptr
== '-' && ((table
[*(ptr
+ 1)] & DIGIT
) || *(ptr
+ 1) == '.')) {
303 new_field(field
, offset
, FALSE
);
304 if (field
->beg_field
> field
->end_field
)
305 error(TRUE
, "End field is before start field!", NIL_PTR
);
306 } else /* No end pos. */
307 field
->end_field
= ERROR
;
315 int arg_count
= 1; /* Offset in argv */
317 register char *ptr
; /* Ptr to *argv in use */
322 cur_pos
= mem_top
= msbrk(MEMORY_SIZE
); /* Find lowest mem. location */
324 while (arg_count
< argc
&& ((ptr
= argv
[arg_count
])[0] == '-' || *ptr
== '+')) {
325 if (*ptr
== '-' && *(ptr
+ 1) == '\0') /* "-" means stdin */
327 if (*ptr
== '+') { /* Assign field. */
328 if (++field_cnt
== FIELDS_LIMIT
)
329 error(TRUE
, "Too many fields", NIL_PTR
);
330 new_field(&fields
[field_cnt
], &arg_count
, TRUE
);
331 } else { /* Get output options */
334 case 'c': /* Only check file */
337 case 'm': /* Merge (sorted) files */
340 case 'u': /* Only give uniq lines */
343 case 'o': /* Name of output file */
344 output_file
= argv
[++arg_count
];
346 case 't': /* Field separator */
350 default: /* Sort options */
351 get_opts(ptr
, &fields
[GLOBAL
]);
358 for (fd
= 1; fd
<= field_cnt
; fd
++) adjust_options(&fields
[fd
]);
360 /* Create name of tem_files 'sort.pid.aa' */
361 ptr
= &temp_files
[10];
365 *ptr
++ = pid
/ pow
+ '0';
370 signal(SIGINT
, catch);
372 /* Only merge files. Set up */
374 args_limit
= args_offset
= arg_count
;
375 while (argv
[args_limit
] != NIL_PTR
)
376 args_limit
++; /* Find nr of args */
377 files_merge(args_limit
- arg_count
);
380 if (arg_count
== argc
) { /* No args left. Use stdin */
382 check_file(0, NIL_PTR
);
384 get_file(0, (off_t
) 0);
386 while (arg_count
< argc
) { /* Sort or check args */
387 if (strcmp(argv
[arg_count
], "-") == 0)
389 else if (stat(argv
[arg_count
], &st
) < 0) {
390 error(FALSE
, "Cannot find ", argv
[arg_count
++]);
395 else if ((fd
= open(argv
[arg_count
], O_RDONLY
)) < 0) {
396 error(FALSE
, "Cannot open ", argv
[arg_count
++]);
400 check_file(fd
, argv
[arg_count
]);
401 else /* Get_file reads whole file */
402 get_file(fd
, st
.st_size
);
408 sort(); /* Sort whatever is left */
410 if (nr_of_files
== 1) /* Only one file sorted -> don't merge */
413 files_merge(nr_of_files
);
417 /* Adjust_options() assigns all global variables set also in the fields
420 void adjust_options(field
)
421 register FIELD
*field
;
423 register FIELD
*gfield
= &fields
[GLOBAL
];
425 if (gfield
->reverse
) field
->reverse
= TRUE
;
426 if (gfield
->blanks
) field
->blanks
= TRUE
;
427 if (gfield
->dictionary
) field
->dictionary
= TRUE
;
428 if (gfield
->fold_case
) field
->fold_case
= TRUE
;
429 if (gfield
->ascii
) field
->ascii
= TRUE
;
430 if (gfield
->numeric
) field
->numeric
= TRUE
;
433 /* Error () prints the error message on stderr and exits if quit == TRUE. */
434 void error(quit
, message
, arg
)
436 register char *message
, *arg
;
438 write(2, message
, strlen(message
));
439 if (arg
!= NIL_PTR
) write(2, arg
, strlen(arg
));
444 /* Open_outfile () assigns to out_fd the fd where the output must go when all
445 * the sorting is done.
449 if (output_file
== NIL_PTR
)
451 else if ((out_fd
= creat(output_file
, 0644)) < 0)
452 error(TRUE
, "Cannot creat ", output_file
);
455 /* Get_file reads the whole file of filedescriptor fd. If the file is too big
456 * to keep in core, a partial sort is done, and the output is stashed somewhere.
458 void get_file(fd
, size
)
459 int fd
; /* Fd of file to read */
460 register off_t size
; /* Size of file */
463 int rest
; /* Rest in memory */
464 char save_ch
; /* Used in stdin readings */
466 rest
= MEMORY_SIZE
- (cur_pos
- mem_top
);
467 if (fd
== 0) { /* We're reding stdin */
468 while ((i
= read(0, cur_pos
, rest
)) > 0) {
469 if ((cur_pos
- mem_top
) + i
== MEMORY_SIZE
) {
471 i
= last_line(); /* End of last line */
472 save_ch
= mem_top
[i
];
474 sort(); /* Sort core */
475 mem_top
[i
] = save_ch
; /* Restore erased char */
476 /* Restore last (half read) line */
477 for (rest
= 0; i
+ rest
!= MEMORY_SIZE
; rest
++)
478 mem_top
[rest
] = mem_top
[i
+ rest
];
479 /* Assign current pos. in memory */
480 cur_pos
= &mem_top
[rest
];
481 } else { /* Fits, just assign position in mem. */
482 cur_pos
= cur_pos
+ i
;
486 /* Calculate rest of mem */
487 rest
= MEMORY_SIZE
- (cur_pos
- mem_top
);
491 /* Reading file. Check size */
492 else if (size
> rest
) { /* Won't fit */
493 mread(fd
, cur_pos
, rest
);
495 i
= last_line(); /* Get pos. of last line */
496 mem_top
[i
] = '\0'; /* Truncate */
497 (void) lseek(fd
, (off_t
) (i
- MEMORY_SIZE
), SEEK_CUR
); /* Do this next time */
498 size
= size
- rest
- i
+ MEMORY_SIZE
; /* Calculate rest */
499 cur_pos
= mem_top
; /* Reset mem */
500 sort(); /* Sort core */
501 get_file(fd
, size
); /* Get rest of file */
502 } else { /* Fits. Just read in */
504 mread(fd
, cur_pos
, rest
);
505 cur_pos
= cur_pos
+ rest
; /* Reassign cur_pos */
507 (void) close(fd
); /* File completed */
511 /* Last_line () find the last line in core and retuns the offset from the top
518 for (i
= MEMORY_SIZE
- 2; i
> 0; i
--)
519 if (mem_top
[i
] == '\n') break;
523 /* Print_table prints the line table in the given file_descriptor. If the fd
524 * equals ERROR, it opens a temp_file itself.
529 register char **line_ptr
; /* Ptr in line_table */
530 register char *ptr
; /* Ptr to line */
531 int index
= 0; /* Index in output buffer */
534 if ((fd
= creat(file_name(nr_of_files
), 0644)) < 0)
535 error(TRUE
, "Cannot creat ", file_name(nr_of_files
));
537 for (line_ptr
= line_table
; *line_ptr
!= NIL_PTR
; line_ptr
++) {
539 /* Skip all same lines if uniq is set */
540 if (uniq
&& *(line_ptr
+ 1) != NIL_PTR
) {
541 if (compare(ptr
, *(line_ptr
+ 1)) == SAME
) continue;
543 do { /* Print line in a buffered way */
544 out_buffer
[index
++] = *ptr
;
545 if (index
== IO_SIZE
) {
546 mwrite(fd
, out_buffer
, IO_SIZE
);
549 } while (*ptr
++ != '\n');
551 mwrite(fd
, out_buffer
, index
);/* Flush buffer */
552 (void) close(fd
); /* Close file */
553 nr_of_files
++; /* Increment nr_of_files to merge */
556 /* File_name () returns the nr argument from the argument list, or a uniq
557 * filename if the nr is too high, or the arguments were not merge files.
563 if (args_offset
+ nr
< args_limit
) return argptr
[args_offset
+ nr
];
565 temp_files
[16] = nr
/ 26 + 'a';
566 temp_files
[17] = nr
% 26 + 'a';
571 /* Mread () performs a normal read (), but checks the return value. */
572 void mread(fd
, address
, bytes
)
577 if (read(fd
, address
, bytes
) < 0 && bytes
!= 0)
578 error(TRUE
, "Read error", NIL_PTR
);
581 /* Mwrite () performs a normal write (), but checks the return value. */
582 void mwrite(fd
, address
, bytes
)
587 if (write(fd
, address
, bytes
) != bytes
&& bytes
!= 0)
588 error(TRUE
, "Write error", NIL_PTR
);
591 /* Sort () sorts the input in memory starting at mem_top. */
594 register char *ptr
= mem_top
;
595 register int count
= 0;
597 /* Count number of lines in memory */
599 if (*ptr
++ == '\n') count
++;
602 /* Set up the line table */
603 line_table
= (char **) msbrk(count
* sizeof(char *) + sizeof(char *));
606 ptr
= line_table
[0] = mem_top
;
608 if (*ptr
++ == '\n') line_table
[count
++] = ptr
;
611 line_table
[count
- 1] = NIL_PTR
;
613 /* Sort the line table */
614 sort_table(count
- 1);
616 /* Stash output somewhere */
623 /* Free line table */
624 mbrk((char *) line_table
);
627 /* Sort_table () sorts the line table consisting of nel elements. */
635 for (i
= (nel
>> 1); i
>= 1; i
--) incr(i
, nel
);
638 for (i
= nel
; i
> 1; i
--) {
640 line_table
[0] = line_table
[i
- 1];
641 line_table
[i
- 1] = tmp
;
646 /* Incr () increments the heap. */
652 while (si
<= (ei
>> 1)) {
654 if (si
+ 1 <= ei
&& compare(line_table
[si
- 1], line_table
[si
]) <= 0)
656 if (compare(line_table
[(si
>> 1) - 1], line_table
[si
- 1]) >= 0)
658 tmp
= line_table
[(si
>> 1) - 1];
659 line_table
[(si
>> 1) - 1] = line_table
[si
- 1];
660 line_table
[si
- 1] = tmp
;
664 /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
665 * puts it into the line1 and line2 arrays. It then calls the cmp () routine
666 * with the field describing the arguments.
668 int cmp_fields(el1
, el2
)
669 register char *el1
, *el2
;
672 char line1
[LINE_SIZE
], line2
[LINE_SIZE
];
674 for (i
= 0; i
< field_cnt
; i
++) { /* Setup line parts */
675 build_field(line1
, &fields
[i
+ 1], el1
);
676 build_field(line2
, &fields
[i
+ 1], el2
);
677 if ((ret
= cmp((unsigned char *) line1
, (unsigned char *) line2
,
678 &fields
[i
+ 1])) != SAME
)
679 break; /* If equal, try next field */
682 /* Check for reverse flag */
683 if (i
!= field_cnt
&& fields
[i
+ 1].reverse
) return -ret
;
685 /* Else return the last return value of cmp () */
689 /* Build_field builds a new line from the src as described by the field.
690 * The result is put in dest.
692 void build_field(dest
, field
, src
)
693 char *dest
; /* Holds result */
694 register FIELD
*field
; /* Field description */
695 register char *src
; /* Source line */
697 char *begin
= src
; /* Remember start location */
698 char *last
; /* Pointer to end location */
701 /* Skip begin fields */
702 src
= skip_fields(src
, field
->beg_field
);
704 /* Skip begin positions */
705 for (i
= 0; i
< field
->beg_pos
&& *src
!= '\n'; i
++) src
++;
707 /* Copy whatever is left */
710 /* If end field is assigned truncate (perhaps) the part copied */
711 if (field
->end_field
!= ERROR
) { /* Find last field */
712 last
= skip_fields(begin
, field
->end_field
);
713 /* Skip positions as given by end fields description */
714 for (i
= 0; i
< field
->end_pos
&& *last
!= '\n'; i
++) last
++;
715 dest
[last
- src
] = '\n';/* Truncate line */
719 /* Skip_fields () skips nf fields of the line pointed to by str. */
720 char *skip_fields(str
, nf
)
725 if (separator
== '\0') {/* Means ' ' or '\t' */
726 while (*str
!= ' ' && *str
!= '\t' && *str
!= '\n') str
++;
727 while (table
[*str
] & BLANK
) str
++;
729 while (*str
!= separator
&& *str
!= '\n') str
++;
730 if (*str
== separator
) str
++;
733 return str
; /* Return pointer to indicated field */
736 /* Compare is called by all sorting routines. It checks if fields assignments
737 * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
738 * reversed the return value if the (global) reverse flag is set.
740 int compare(el1
, el2
)
741 register char *el1
, *el2
;
745 if (field_cnt
> GLOBAL
) return cmp_fields(el1
, el2
);
747 ret
= cmp((unsigned char *) el1
, (unsigned char *) el2
, &fields
[GLOBAL
]);
748 return(fields
[GLOBAL
].reverse
) ? -ret
: ret
;
751 /* Cmp () is the actual compare routine. It compares according to the
752 * description given in the field pointer.
754 int cmp(el1
, el2
, field
)
755 register unsigned char *el1
, *el2
;
760 if (field
->blanks
) { /* Skip leading blanks */
761 while (table
[*el1
] & BLANK
) el1
++;
762 while (table
[*el2
] & BLANK
) el2
++;
764 if (field
->numeric
) /* Compare numeric */
765 return digits((char *) el1
, (char *) el2
, TRUE
);
768 while (*el1
== *el2
) {
769 if (*el1
++ == '\n') /* EOLN on both strings */
773 if (*el1
== '\n') /* EOLN on string one */
775 if (*el2
== '\n') return HIGHER
;
776 if (field
->ascii
) { /* Skip chars outside 040 - 0177 */
777 if ((table
[*el1
] & ASCII
) == 0) {
780 } while ((table
[*el1
] & ASCII
) == 0);
783 if ((table
[*el2
] & ASCII
) == 0) {
786 } while ((table
[*el2
] & ASCII
) == 0);
790 if (field
->dictionary
) {/* Skip non-dict chars */
791 if ((table
[*el1
] & DICT
) == 0) {
794 } while ((table
[*el1
] & DICT
) == 0);
797 if ((table
[*el2
] & DICT
) == 0) {
800 } while ((table
[*el2
] & DICT
) == 0);
804 if (field
->fold_case
) { /* Fold upper case to lower */
805 if (table
[c1
= *el1
++] & UPPER
) c1
+= 'a' - 'A';
806 if (table
[c2
= *el2
++] & UPPER
) c2
+= 'a' - 'A';
807 if (c1
== c2
) continue;
817 * Digits compares () the two strings that point to a number of digits followed
818 * by an optional decimal point.
820 int digits(str1
, str2
, check_sign
)
821 register char *str1
, *str2
;
822 BOOL check_sign
; /* True if sign must be checked */
824 BOOL negative
= FALSE
; /* True if negative numbers */
827 /* Check for optional minus or plus sign */
832 } else if (*str1
== '+')
836 if (negative
== FALSE
) return HIGHER
;
840 else if (*str2
== '+')
844 /* Keep incrementing as long as digits are available and equal */
845 while ((table
[*str1
] & DIGIT
) && table
[*str2
] & DIGIT
) {
846 if (*str1
!= *str2
) break;
851 /* First check for the decimal point. */
852 if (*str1
== '.' || *str2
== '.') {
854 if (*str2
== '.') /* Both. Check decimal part */
855 ret
= digits(str1
+ 1, str2
+ 1, FALSE
);
857 ret
= (table
[*str2
] & DIGIT
) ? LOWER
: HIGHER
;
859 ret
= (table
[*str1
] & DIGIT
) ? HIGHER
: LOWER
;
862 /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
863 else if ((table
[*str1
] & DIGIT
) && (table
[*str2
] & DIGIT
)) {
864 diff
= *str1
- *str2
; /* Basic difference */
865 pow
= 0; /* Check power of numbers */
866 while (table
[*str1
++] & DIGIT
) pow
++;
867 while (table
[*str2
++] & DIGIT
) pow
--;
868 ret
= (pow
== 0) ? diff
: pow
;
871 /* Unknown char. Check on which string it occurred */
873 if ((table
[*str1
] & DIGIT
) == 0)
874 ret
= (table
[*str2
] & DIGIT
) ? LOWER
: SAME
;
879 /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
880 return(negative
) ? -ret
: ret
;
883 /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
884 * in numbers of files that can be opened at the same time. (OPEN_FILES)
886 void files_merge(file_cnt
)
887 register int file_cnt
; /* Nr_of_files to merge */
892 for (i
= 0; i
< file_cnt
; i
+= OPEN_FILES
) {
893 /* Merge last files and store in output file */
894 if ((limit
= i
+ OPEN_FILES
) >= file_cnt
) {
897 } else { /* Merge OPEN_FILES files and store in temp
899 temp_files
[16] = file_cnt
/ 26 + 'a';
900 temp_files
[17] = file_cnt
% 26 + 'a';
901 if ((out_fd
= creat(temp_files
, 0644)) < 0)
902 error(TRUE
, "Cannot creat ", temp_files
);
909 i
= (only_merge
) ? args_limit
- args_offset
: 0;
910 while (i
< file_cnt
) (void) unlink(file_name(i
++));
913 /* Merge () merges the files between start_file and limit_file. */
914 void merge(start_file
, limit_file
)
915 int start_file
, limit_file
;
917 register MERGE
*smallest
; /* Keeps track of smallest line */
919 int file_cnt
= limit_file
- start_file
; /* Nr of files to merge */
921 /* Calculate size in core available for file_cnt merge structs */
922 buf_size
= MEMORY_SIZE
/ file_cnt
- LINE_SIZE
;
924 mbrk(mem_top
); /* First reset mem to lowest loc. */
925 disabled
= 0; /* All files not done yet */
927 /* Set up merge structures. */
928 for (i
= start_file
; i
< limit_file
; i
++) {
929 smallest
= &merge_f
[i
- start_file
];
930 if (!strcmp(file_name(i
), "-")) /* File is stdin */
932 else if ((smallest
->fd
= open(file_name(i
), O_RDONLY
)) < 0) {
933 smallest
->fd
= ERROR
;
934 error(FALSE
, "Cannot open ", file_name(i
));
935 disabled
++; /* Done this file */
938 smallest
->buffer
= msbrk(buf_size
);
939 smallest
->line
= msbrk(LINE_SIZE
);
940 smallest
->cnt
= smallest
->read_chars
= 0;
941 (void) read_line(smallest
); /* Read first line */
944 if (disabled
== file_cnt
) { /* Couldn't open files */
945 (void) close(out_fd
);
949 /* Find a merg struct to assign smallest. */
950 for (i
= 0; i
< file_cnt
; i
++) {
951 if (merge_f
[i
].fd
!= ERROR
) {
952 smallest
= &merge_f
[i
];
957 /* Loop until all files minus one are done */
958 while (disabled
< file_cnt
- 1) {
959 if (uniq
) /* Skip all same lines */
960 smallest
= skip_lines(smallest
, file_cnt
);
961 else { /* Find smallest line */
962 for (i
= 0; i
< file_cnt
; i
++) {
963 if (merge_f
[i
].fd
== ERROR
)
964 continue; /* We've had this one */
965 if (compare(merge_f
[i
].line
, smallest
->line
) < 0)
966 smallest
= &merge_f
[i
];
968 } /* Print line and read next */
969 smallest
= print(smallest
, file_cnt
);
972 if (only_merge
&& uniq
)
973 uniq_lines(smallest
); /* Print only uniq lines */
974 else /* Print rest of file */
975 while (print(smallest
, file_cnt
) != NIL_MERGE
);
977 put_line(NIL_PTR
); /* Flush output buffer */
980 /* Put_line () prints the line into the out_fd filedescriptor. If line equals
981 * NIL_PTR, the out_fd is flushed and closed.
986 static int index
= 0; /* Index in out_buffer */
988 if (line
== NIL_PTR
) { /* Flush and close */
989 mwrite(out_fd
, out_buffer
, index
);
991 (void) close(out_fd
);
994 do { /* Fill out_buffer with line */
995 out_buffer
[index
++] = *line
;
996 if (index
== IO_SIZE
) {
997 mwrite(out_fd
, out_buffer
, IO_SIZE
);
1000 } while (*line
++ != '\n');
1004 * Print () prints the line of the merg structure and tries to read another one.
1005 * If this fails, it returns the next merg structure which file_descriptor is
1006 * still open. If none could be found, a NIL structure is returned.
1008 MERGE
*print(merg
, file_cnt
)
1009 register MERGE
*merg
;
1010 int file_cnt
; /* Nr of files that are being merged */
1014 put_line(merg
->line
); /* Print the line */
1016 if (read_line(merg
) == ERROR
) { /* Read next line */
1017 for (i
= 0; i
< file_cnt
; i
++) {
1018 if (merge_f
[i
].fd
!= ERROR
) {
1023 if (i
== file_cnt
) /* No more files left */
1029 /* Read_line () reads a line from the fd from the merg struct. If the read
1030 * failed, disabled is incremented and the file is closed. Readings are
1031 * done in buf_size bytes.
1032 * Lines longer than LINE_SIZE are silently truncated.
1035 register MERGE
*merg
;
1037 register char *ptr
= merg
->line
- 1; /* Ptr buf that will hold line */
1041 if (merg
->cnt
== merg
->read_chars
) { /* Read new buffer */
1042 if ((merg
->read_chars
=
1043 read(merg
->fd
, merg
->buffer
, buf_size
)) <= 0) {
1044 (void) close(merg
->fd
); /* OOPS */
1051 *ptr
= merg
->buffer
[merg
->cnt
++]; /* Assign next char of line */
1052 if (ptr
- merg
->line
== LINE_SIZE
- 1)
1053 *ptr
= '\n'; /* Truncate very long lines */
1054 } while (*ptr
!= '\n' && *ptr
!= '\0');
1056 if (*ptr
== '\0') /* Add '\n' to last line */
1058 *++ptr
= '\0'; /* Add '\0' */
1062 /* Skip_lines () skips all same lines in all the files currently being merged.
1063 * It returns a pointer to the merge struct containing the smallest line.
1065 MERGE
*skip_lines(smallest
, file_cnt
)
1066 register MERGE
*smallest
;
1072 if (disabled
== file_cnt
- 1) /* We've had all */
1075 for (i
= 0; i
< file_cnt
; i
++) {
1076 if (merge_f
[i
].fd
== ERROR
|| smallest
== &merge_f
[i
])
1077 continue; /* Don't check same file */
1078 while ((ret
= compare(merge_f
[i
].line
, smallest
->line
)) == 0) {
1079 if (read_line(&merge_f
[i
]) == ERROR
) break; /* EOF */
1081 if (ret
< 0) /* Line wasn't smallest. Try again */
1082 return skip_lines(&merge_f
[i
], file_cnt
);
1087 /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
1088 void uniq_lines(merg
)
1089 register MERGE
*merg
;
1091 char lastline
[LINE_SIZE
]; /* Buffer to hold last line */
1094 put_line(merg
->line
); /* Print this line */
1095 copy(lastline
, merg
->line
); /* and save it */
1096 if (read_line(merg
) == ERROR
) /* Read the next */
1098 /* Keep reading until lines duffer */
1099 while (compare(lastline
, merg
->line
) == SAME
)
1100 if (read_line(merg
) == ERROR
) return;
1107 * Check_file () checks if a file is sorted in order according to the arguments
1110 void check_file(fd
, file
)
1114 register MERGE
*merg
; /* 1 file only */
1115 char lastline
[LINE_SIZE
]; /* Save last line */
1116 register int ret
; /* ret status of compare */
1118 if (fd
== 0) file
= "stdin";
1119 merg
= (MERGE
*) mem_top
; /* Assign MERGE structure */
1120 merg
->buffer
= mem_top
+ sizeof(MERGE
);
1121 merg
->line
= msbrk(LINE_SIZE
);
1122 merg
->cnt
= merg
->read_chars
= 0;
1124 buf_size
= MEMORY_SIZE
- sizeof(MERGE
);
1126 if (read_line(merg
) == ERROR
) /* Read first line */
1128 copy(lastline
, merg
->line
); /* and save it */
1131 if (read_line(merg
) == ERROR
) /* EOF reached */
1133 if ((ret
= compare(lastline
, merg
->line
)) > 0) {
1134 error(FALSE
, "Disorder in file ", file
);
1135 write(2, merg
->line
, length(merg
->line
));
1137 } else if (ret
< 0) /* Copy if lines not equal */
1138 copy(lastline
, merg
->line
);
1140 error(FALSE
, "Non uniq line in file ", file
);
1141 write(2, merg
->line
, length(merg
->line
));
1146 mbrk(mem_top
); /* Reset mem */
1149 /* Length () returns the length of the argument line including the linefeed. */
1151 register char *line
;
1153 register int i
= 1; /* Add linefeed */
1155 while (*line
++ != '\n') i
++;
1159 /* Copy () copies the src line into the dest line including linefeed. */
1160 void copy(dest
, src
)
1161 register char *dest
, *src
;
1163 while ((*dest
++ = *src
++) != '\n');
1166 /* Msbrk() does a sbrk() and checks the return value. */
1170 register char *address
;
1172 if ((address
= sbrk(size
)) == (char *) -1)
1173 error(TRUE
, "Not enough memory. Use chmem to allocate more", NIL_PTR
);
1177 /* Mbrk() does a brk() and checks the return value. */
1181 if (brk(address
) == -1) error(TRUE
, "Cannot reset memory", NIL_PTR
);
1185 int dummy
; /* to satisfy the prototype */
1189 signal(SIGINT
, SIG_IGN
);
1191 for (i
= 0; i
< 26; i
++) (void) unlink(file_name(i
));