kernel - Fix issue w/ buffer ortation when doing non-blocking read from bpf
[dragonfly.git] / contrib / texinfo-4 / util / texindex.c
blob569f877be0d98bfa0473e9c98f492da34bd4b503
1 /* texindex -- sort TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $
4 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
5 2002, 2003, 2004 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
21 #include "system.h"
22 #include <getopt.h>
24 static char *program_name = "texindex";
26 #if defined (emacs)
27 # include "../src/config.h"
28 /* Some s/os.h files redefine these. */
29 # undef read
30 # undef close
31 # undef write
32 # undef open
33 #endif
35 #if !defined (HAVE_MEMSET)
36 #undef memset
37 #define memset(ptr, ignore, count) bzero (ptr, count)
38 #endif
40 char *mktemp (char *);
42 #if !defined (SEEK_SET)
43 # define SEEK_SET 0
44 # define SEEK_CUR 1
45 # define SEEK_END 2
46 #endif /* !SEEK_SET */
48 struct linebuffer;
50 /* When sorting in core, this structure describes one line
51 and the position and length of its first keyfield. */
52 struct lineinfo
54 char *text; /* The actual text of the line. */
55 union {
56 char *text; /* The start of the key (for textual comparison). */
57 long number; /* The numeric value (for numeric comparison). */
58 } key;
59 long keylen; /* Length of KEY field. */
62 /* This structure describes a field to use as a sort key. */
63 struct keyfield
65 int startwords; /* Number of words to skip. */
66 int startchars; /* Number of additional chars to skip. */
67 int endwords; /* Number of words to ignore at end. */
68 int endchars; /* Ditto for characters of last word. */
69 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
70 char fold_case; /* Non-zero means case doesn't matter. */
71 char reverse; /* Non-zero means compare in reverse order. */
72 char numeric; /* Non-zeros means field is ASCII numeric. */
73 char positional; /* Sort according to file position. */
74 char braced; /* Count balanced-braced groupings as fields. */
77 /* Vector of keyfields to use. */
78 struct keyfield keyfields[3];
80 /* Number of keyfields stored in that vector. */
81 int num_keyfields = 3;
83 /* Vector of input file names, terminated with a null pointer. */
84 char **infiles;
86 /* Vector of corresponding output file names, or NULL, meaning default it
87 (add an `s' to the end). */
88 char **outfiles;
90 /* Length of `infiles'. */
91 int num_infiles;
93 /* Pointer to the array of pointers to lines being sorted. */
94 char **linearray;
96 /* The allocated length of `linearray'. */
97 long nlines;
99 /* Directory to use for temporary files. On Unix, it ends with a slash. */
100 char *tempdir;
102 /* Number of last temporary file. */
103 int tempcount;
105 /* Number of last temporary file already deleted.
106 Temporary files are deleted by `flush_tempfiles' in order of creation. */
107 int last_deleted_tempcount;
109 /* During in-core sort, this points to the base of the data block
110 which contains all the lines of data. */
111 char *text_base;
113 /* Initially 0; changed to 1 if we want initials in this index. */
114 int need_initials;
116 /* Remembers the first initial letter seen in this index, so we can
117 determine whether we need initials in the sorted form. */
118 char first_initial;
120 /* Additional command switches .*/
122 /* Nonzero means do not delete tempfiles -- for debugging. */
123 int keep_tempfiles;
125 /* Forward declarations of functions in this file. */
126 void decode_command (int argc, char **argv);
127 void sort_in_core (char *infile, int total, char *outfile);
128 void sort_offline (char *infile, off_t total, char *outfile);
129 char **parsefile (char *filename, char **nextline, char *data, long int size);
130 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
131 char *find_pos (char *str, int words, int chars, int ignore_blanks);
132 long find_value (char *start, long int length);
133 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
134 char *find_braced_end (char *str);
135 void writelines (char **linearray, int nlines, FILE *ostream);
136 int compare_field (struct keyfield *keyfield, char *start1,
137 long int length1, long int pos1, char *start2,
138 long int length2, long int pos2);
139 int compare_full (const void *, const void *);
140 long readline (struct linebuffer *linebuffer, FILE *stream);
141 int merge_files (char **infiles, int nfiles, char *outfile);
142 int merge_direct (char **infiles, int nfiles, char *outfile);
143 void pfatal_with_name (const char *name);
144 void fatal (const char *format, const char *arg);
145 void error (const char *format, const char *arg);
146 void *xmalloc (), *xrealloc ();
147 char *concat (char *s1, char *s2);
148 void flush_tempfiles (int to_count);
150 #define MAX_IN_CORE_SORT 500000
153 main (int argc, char **argv)
155 int i;
157 tempcount = 0;
158 last_deleted_tempcount = 0;
160 #ifdef HAVE_SETLOCALE
161 /* Set locale via LC_ALL. */
162 setlocale (LC_ALL, "");
163 #endif
165 /* Set the text message domain. */
166 bindtextdomain (PACKAGE, LOCALEDIR);
167 textdomain (PACKAGE);
169 /* In case we write to a redirected stdout that fails. */
170 /* not ready atexit (close_stdout); */
172 /* Describe the kind of sorting to do. */
173 /* The first keyfield uses the first braced field and folds case. */
174 keyfields[0].braced = 1;
175 keyfields[0].fold_case = 1;
176 keyfields[0].endwords = -1;
177 keyfields[0].endchars = -1;
179 /* The second keyfield uses the second braced field, numerically. */
180 keyfields[1].braced = 1;
181 keyfields[1].numeric = 1;
182 keyfields[1].startwords = 1;
183 keyfields[1].endwords = -1;
184 keyfields[1].endchars = -1;
186 /* The third keyfield (which is ignored while discarding duplicates)
187 compares the whole line. */
188 keyfields[2].endwords = -1;
189 keyfields[2].endchars = -1;
191 decode_command (argc, argv);
193 /* Process input files completely, one by one. */
195 for (i = 0; i < num_infiles; i++)
197 int desc;
198 off_t ptr;
199 char *outfile;
200 struct stat instat;
202 desc = open (infiles[i], O_RDONLY, 0);
203 if (desc < 0)
204 pfatal_with_name (infiles[i]);
206 if (stat (infiles[i], &instat))
207 pfatal_with_name (infiles[i]);
208 if (S_ISDIR (instat.st_mode))
210 #ifdef EISDIR
211 errno = EISDIR;
212 #endif
213 pfatal_with_name (infiles[i]);
216 lseek (desc, (off_t) 0, SEEK_END);
217 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
219 close (desc);
221 outfile = outfiles[i];
222 if (!outfile)
223 outfile = concat (infiles[i], "s");
225 need_initials = 0;
226 first_initial = '\0';
228 if (ptr < MAX_IN_CORE_SORT)
229 /* Sort a small amount of data. */
230 sort_in_core (infiles[i], (int)ptr, outfile);
231 else
232 sort_offline (infiles[i], ptr, outfile);
235 flush_tempfiles (tempcount);
236 xexit (0);
237 return 0; /* Avoid bogus warnings. */
240 typedef struct
242 char *long_name;
243 char *short_name;
244 int *variable_ref;
245 int variable_value;
246 char *arg_name;
247 char *doc_string;
248 } TEXINDEX_OPTION;
250 TEXINDEX_OPTION texindex_options[] = {
251 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
252 N_("display this help and exit") },
253 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
254 N_("keep temporary files around after processing") },
255 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
256 N_("do not keep temporary files around after processing (default)") },
257 { "--output", "-o", (int *)NULL, 0, "FILE",
258 N_("send output to FILE") },
259 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
260 N_("display version information and exit") },
261 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
264 void
265 usage (int result_value)
267 register int i;
268 FILE *f = result_value ? stderr : stdout;
270 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
271 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
272 /* Avoid trigraph nonsense. */
273 fprintf (f,
274 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
275 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
276 fprintf (f, _("\nOptions:\n"));
278 for (i = 0; texindex_options[i].long_name; i++)
280 putc (' ', f);
282 if (texindex_options[i].short_name)
283 fprintf (f, "%s, ", texindex_options[i].short_name);
285 fprintf (f, "%s %s",
286 texindex_options[i].long_name,
287 texindex_options[i].arg_name
288 ? texindex_options[i].arg_name : "");
290 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
292 fputs (_("\n\
293 Email bug reports to bug-texinfo@gnu.org,\n\
294 general questions and discussion to help-texinfo@gnu.org.\n\
295 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
296 fputs ("\n", f);
298 xexit (result_value);
301 /* Decode the command line arguments to set the parameter variables
302 and set up the vector of keyfields and the vector of input files. */
304 void
305 decode_command (int argc, char **argv)
307 int arg_index = 1;
308 char **ip;
309 char **op;
311 /* Store default values into parameter variables. */
313 tempdir = getenv ("TMPDIR");
314 if (tempdir == NULL)
315 tempdir = getenv ("TEMP");
316 if (tempdir == NULL)
317 tempdir = getenv ("TMP");
318 if (tempdir == NULL)
319 tempdir = DEFAULT_TMPDIR;
320 else
321 tempdir = concat (tempdir, "/");
323 keep_tempfiles = 0;
325 /* Allocate ARGC input files, which must be enough. */
327 infiles = (char **) xmalloc (argc * sizeof (char *));
328 outfiles = (char **) xmalloc (argc * sizeof (char *));
329 ip = infiles;
330 op = outfiles;
332 while (arg_index < argc)
334 char *arg = argv[arg_index++];
336 if (*arg == '-')
338 if (strcmp (arg, "--version") == 0)
340 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
341 puts ("");
342 puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
343 printf (_("There is NO warranty. You may redistribute this software\n\
344 under the terms of the GNU General Public License.\n\
345 For more information about these matters, see the files named COPYING.\n"));
346 xexit (0);
348 else if ((strcmp (arg, "--keep") == 0) ||
349 (strcmp (arg, "-k") == 0))
351 keep_tempfiles = 1;
353 else if ((strcmp (arg, "--help") == 0) ||
354 (strcmp (arg, "-h") == 0))
356 usage (0);
358 else if ((strcmp (arg, "--output") == 0) ||
359 (strcmp (arg, "-o") == 0))
361 if (argv[arg_index] != (char *)NULL)
363 arg_index++;
364 if (op > outfiles)
365 *(op - 1) = argv[arg_index];
367 else
368 usage (1);
370 else
371 usage (1);
373 else
375 *ip++ = arg;
376 *op++ = (char *)NULL;
380 /* Record number of keyfields and terminate list of filenames. */
381 num_infiles = ip - infiles;
382 *ip = (char *)NULL;
383 if (num_infiles == 0)
384 usage (1);
387 /* Return a name for temporary file COUNT. */
389 static char *
390 maketempname (int count)
392 static char *tempbase = NULL;
393 char tempsuffix[10];
395 if (!tempbase)
397 int fd;
398 tempbase = concat (tempdir, "txidxXXXXXX");
400 fd = mkstemp (tempbase);
401 if (fd == -1)
402 pfatal_with_name (tempbase);
405 sprintf (tempsuffix, ".%d", count);
406 return concat (tempbase, tempsuffix);
410 /* Delete all temporary files up to TO_COUNT. */
412 void
413 flush_tempfiles (int to_count)
415 if (keep_tempfiles)
416 return;
417 while (last_deleted_tempcount < to_count)
418 unlink (maketempname (++last_deleted_tempcount));
422 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
425 compare_full (const void *p1, const void *p2)
427 char **line1 = (char **) p1;
428 char **line2 = (char **) p2;
429 int i;
431 /* Compare using the first keyfield;
432 if that does not distinguish the lines, try the second keyfield;
433 and so on. */
435 for (i = 0; i < num_keyfields; i++)
437 long length1, length2;
438 char *start1 = find_field (&keyfields[i], *line1, &length1);
439 char *start2 = find_field (&keyfields[i], *line2, &length2);
440 int tem = compare_field (&keyfields[i], start1, length1,
441 *line1 - text_base,
442 start2, length2, *line2 - text_base);
443 if (tem)
445 if (keyfields[i].reverse)
446 return -tem;
447 return tem;
451 return 0; /* Lines match exactly. */
454 /* Compare LINE1 and LINE2, described by structures
455 in which the first keyfield is identified in advance.
456 For positional sorting, assumes that the order of the lines in core
457 reflects their nominal order. */
459 compare_prepared (const void *p1, const void *p2)
461 struct lineinfo *line1 = (struct lineinfo *) p1;
462 struct lineinfo *line2 = (struct lineinfo *) p2;
463 int i;
464 int tem;
465 char *text1, *text2;
467 /* Compare using the first keyfield, which has been found for us already. */
468 if (keyfields->positional)
470 if (line1->text - text_base > line2->text - text_base)
471 tem = 1;
472 else
473 tem = -1;
475 else if (keyfields->numeric)
476 tem = line1->key.number - line2->key.number;
477 else
478 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
479 line2->key.text, line2->keylen, 0);
480 if (tem)
482 if (keyfields->reverse)
483 return -tem;
484 return tem;
487 text1 = line1->text;
488 text2 = line2->text;
490 /* Compare using the second keyfield;
491 if that does not distinguish the lines, try the third keyfield;
492 and so on. */
494 for (i = 1; i < num_keyfields; i++)
496 long length1, length2;
497 char *start1 = find_field (&keyfields[i], text1, &length1);
498 char *start2 = find_field (&keyfields[i], text2, &length2);
499 int tem = compare_field (&keyfields[i], start1, length1,
500 text1 - text_base,
501 start2, length2, text2 - text_base);
502 if (tem)
504 if (keyfields[i].reverse)
505 return -tem;
506 return tem;
510 return 0; /* Lines match exactly. */
513 /* Like compare_full but more general.
514 You can pass any strings, and you can say how many keyfields to use.
515 POS1 and POS2 should indicate the nominal positional ordering of
516 the two lines in the input. */
519 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
521 int i;
523 /* Compare using the first keyfield;
524 if that does not distinguish the lines, try the second keyfield;
525 and so on. */
527 for (i = 0; i < use_keyfields; i++)
529 long length1, length2;
530 char *start1 = find_field (&keyfields[i], str1, &length1);
531 char *start2 = find_field (&keyfields[i], str2, &length2);
532 int tem = compare_field (&keyfields[i], start1, length1, pos1,
533 start2, length2, pos2);
534 if (tem)
536 if (keyfields[i].reverse)
537 return -tem;
538 return tem;
542 return 0; /* Lines match exactly. */
545 /* Find the start and length of a field in STR according to KEYFIELD.
546 A pointer to the starting character is returned, and the length
547 is stored into the int that LENGTHPTR points to. */
549 char *
550 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
552 char *start;
553 char *end;
554 char *(*fun) ();
556 if (keyfield->braced)
557 fun = find_braced_pos;
558 else
559 fun = find_pos;
561 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
562 keyfield->ignore_blanks);
563 if (keyfield->endwords < 0)
565 if (keyfield->braced)
566 end = find_braced_end (start);
567 else
569 end = start;
570 while (*end && *end != '\n')
571 end++;
574 else
576 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
577 if (end - str < start - str)
578 end = start;
580 *lengthptr = end - start;
581 return start;
584 /* Return a pointer to a specified place within STR,
585 skipping (from the beginning) WORDS words and then CHARS chars.
586 If IGNORE_BLANKS is nonzero, we skip all blanks
587 after finding the specified word. */
589 char *
590 find_pos (char *str, int words, int chars, int ignore_blanks)
592 int i;
593 char *p = str;
595 for (i = 0; i < words; i++)
597 char c;
598 /* Find next bunch of nonblanks and skip them. */
599 while ((c = *p) == ' ' || c == '\t')
600 p++;
601 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
602 p++;
603 if (!*p || *p == '\n')
604 return p;
607 while (*p == ' ' || *p == '\t')
608 p++;
610 for (i = 0; i < chars; i++)
612 if (!*p || *p == '\n')
613 break;
614 p++;
616 return p;
619 /* Like find_pos but assumes that each field is surrounded by braces
620 and that braces within fields are balanced. */
622 char *
623 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
625 int i;
626 int bracelevel;
627 char *p = str;
628 char c;
630 for (i = 0; i < words; i++)
632 bracelevel = 1;
633 while ((c = *p++) != '{' && c != '\n' && c)
634 /* Do nothing. */ ;
635 if (c != '{')
636 return p - 1;
637 while (bracelevel)
639 c = *p++;
640 if (c == '{')
641 bracelevel++;
642 if (c == '}')
643 bracelevel--;
644 if (c == 0 || c == '\n')
645 return p - 1;
649 while ((c = *p++) != '{' && c != '\n' && c)
650 /* Do nothing. */ ;
652 if (c != '{')
653 return p - 1;
655 if (ignore_blanks)
656 while ((c = *p) == ' ' || c == '\t')
657 p++;
659 for (i = 0; i < chars; i++)
661 if (!*p || *p == '\n')
662 break;
663 p++;
665 return p;
668 /* Find the end of the balanced-brace field which starts at STR.
669 The position returned is just before the closing brace. */
671 char *
672 find_braced_end (char *str)
674 int bracelevel;
675 char *p = str;
676 char c;
678 bracelevel = 1;
679 while (bracelevel)
681 c = *p++;
682 if (c == '{')
683 bracelevel++;
684 if (c == '}')
685 bracelevel--;
686 if (c == 0 || c == '\n')
687 return p - 1;
689 return p - 1;
692 long
693 find_value (char *start, long int length)
695 while (length != 0L)
697 if (isdigit (*start))
698 return atol (start);
699 length--;
700 start++;
702 return 0l;
705 /* Vector used to translate characters for comparison.
706 This is how we make all alphanumerics follow all else,
707 and ignore case in the first sorting. */
708 int char_order[256];
710 void
711 init_char_order (void)
713 int i;
714 for (i = 1; i < 256; i++)
715 char_order[i] = i;
717 for (i = '0'; i <= '9'; i++)
718 char_order[i] += 512;
720 for (i = 'a'; i <= 'z'; i++)
722 char_order[i] = 512 + i;
723 char_order[i + 'A' - 'a'] = 512 + i;
727 /* Compare two fields (each specified as a start pointer and a character count)
728 according to KEYFIELD.
729 The sign of the value reports the relation between the fields. */
732 compare_field (struct keyfield *keyfield, char *start1, long int length1,
733 long int pos1, char *start2, long int length2, long int pos2)
735 if (keyfields->positional)
737 if (pos1 > pos2)
738 return 1;
739 else
740 return -1;
742 if (keyfield->numeric)
744 long value = find_value (start1, length1) - find_value (start2, length2);
745 if (value > 0)
746 return 1;
747 if (value < 0)
748 return -1;
749 return 0;
751 else
753 char *p1 = start1;
754 char *p2 = start2;
755 char *e1 = start1 + length1;
756 char *e2 = start2 + length2;
758 while (1)
760 int c1, c2;
762 if (p1 == e1)
763 c1 = 0;
764 else
765 c1 = *p1++;
766 if (p2 == e2)
767 c2 = 0;
768 else
769 c2 = *p2++;
771 if (char_order[c1] != char_order[c2])
772 return char_order[c1] - char_order[c2];
773 if (!c1)
774 break;
777 /* Strings are equal except possibly for case. */
778 p1 = start1;
779 p2 = start2;
780 while (1)
782 int c1, c2;
784 if (p1 == e1)
785 c1 = 0;
786 else
787 c1 = *p1++;
788 if (p2 == e2)
789 c2 = 0;
790 else
791 c2 = *p2++;
793 if (c1 != c2)
794 /* Reverse sign here so upper case comes out last. */
795 return c2 - c1;
796 if (!c1)
797 break;
800 return 0;
804 /* A `struct linebuffer' is a structure which holds a line of text.
805 `readline' reads a line from a stream into a linebuffer
806 and works regardless of the length of the line. */
808 struct linebuffer
810 long size;
811 char *buffer;
814 /* Initialize LINEBUFFER for use. */
816 void
817 initbuffer (struct linebuffer *linebuffer)
819 linebuffer->size = 200;
820 linebuffer->buffer = (char *) xmalloc (200);
823 /* Read a line of text from STREAM into LINEBUFFER.
824 Return the length of the line. */
826 long
827 readline (struct linebuffer *linebuffer, FILE *stream)
829 char *buffer = linebuffer->buffer;
830 char *p = linebuffer->buffer;
831 char *end = p + linebuffer->size;
833 while (1)
835 int c = getc (stream);
836 if (p == end)
838 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
839 p += buffer - linebuffer->buffer;
840 end += buffer - linebuffer->buffer;
841 linebuffer->buffer = buffer;
843 if (c < 0 || c == '\n')
845 *p = 0;
846 break;
848 *p++ = c;
851 return p - buffer;
854 /* Sort an input file too big to sort in core. */
856 void
857 sort_offline (char *infile, off_t total, char *outfile)
859 /* More than enough. */
860 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
861 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
862 FILE *istream = fopen (infile, "r");
863 int i;
864 struct linebuffer lb;
865 long linelength;
866 int failure = 0;
868 initbuffer (&lb);
870 /* Read in one line of input data. */
872 linelength = readline (&lb, istream);
874 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
876 error (_("%s: not a texinfo index file"), infile);
877 return;
880 /* Split up the input into `ntemps' temporary files, or maybe fewer,
881 and put the new files' names into `tempfiles' */
883 for (i = 0; i < ntemps; i++)
885 char *outname = maketempname (++tempcount);
886 FILE *ostream = fopen (outname, "w");
887 long tempsize = 0;
889 if (!ostream)
890 pfatal_with_name (outname);
891 tempfiles[i] = outname;
893 /* Copy lines into this temp file as long as it does not make file
894 "too big" or until there are no more lines. */
896 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
898 tempsize += linelength + 1;
899 fputs (lb.buffer, ostream);
900 putc ('\n', ostream);
902 /* Read another line of input data. */
904 linelength = readline (&lb, istream);
905 if (!linelength && feof (istream))
906 break;
908 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
910 error (_("%s: not a texinfo index file"), infile);
911 failure = 1;
912 goto fail;
915 fclose (ostream);
916 if (feof (istream))
917 break;
920 free (lb.buffer);
922 fail:
923 /* Record number of temp files we actually needed. */
925 ntemps = i;
927 /* Sort each tempfile into another tempfile.
928 Delete the first set of tempfiles and put the names of the second
929 into `tempfiles'. */
931 for (i = 0; i < ntemps; i++)
933 char *newtemp = maketempname (++tempcount);
934 sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
935 if (!keep_tempfiles)
936 unlink (tempfiles[i]);
937 tempfiles[i] = newtemp;
940 if (failure)
941 return;
943 /* Merge the tempfiles together and indexify. */
945 merge_files (tempfiles, ntemps, outfile);
948 /* Sort INFILE, whose size is TOTAL,
949 assuming that is small enough to be done in-core,
950 then indexify it and send the output to OUTFILE (or to stdout). */
952 void
953 sort_in_core (char *infile, int total, char *outfile)
955 char **nextline;
956 char *data = (char *) xmalloc (total + 1);
957 char *file_data;
958 long file_size;
959 int i;
960 FILE *ostream = stdout;
961 struct lineinfo *lineinfo;
963 /* Read the contents of the file into the moby array `data'. */
965 int desc = open (infile, O_RDONLY, 0);
967 if (desc < 0)
968 fatal (_("failure reopening %s"), infile);
969 for (file_size = 0;;)
971 i = read (desc, data + file_size, total - file_size);
972 if (i <= 0)
973 break;
974 file_size += i;
976 file_data = data;
977 data[file_size] = 0;
979 close (desc);
981 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
983 error (_("%s: not a texinfo index file"), infile);
984 return;
987 init_char_order ();
989 /* Sort routines want to know this address. */
991 text_base = data;
993 /* Create the array of pointers to lines, with a default size
994 frequently enough. */
996 nlines = total / 50;
997 if (!nlines)
998 nlines = 2;
999 linearray = (char **) xmalloc (nlines * sizeof (char *));
1001 /* `nextline' points to the next free slot in this array.
1002 `nlines' is the allocated size. */
1004 nextline = linearray;
1006 /* Parse the input file's data, and make entries for the lines. */
1008 nextline = parsefile (infile, nextline, file_data, file_size);
1009 if (nextline == 0)
1011 error (_("%s: not a texinfo index file"), infile);
1012 return;
1015 /* Sort the lines. */
1017 /* If we have enough space, find the first keyfield of each line in advance.
1018 Make a `struct lineinfo' for each line, which records the keyfield
1019 as well as the line, and sort them. */
1021 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
1023 if (lineinfo)
1025 struct lineinfo *lp;
1026 char **p;
1028 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1030 lp->text = *p;
1031 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1032 if (keyfields->numeric)
1033 lp->key.number = find_value (lp->key.text, lp->keylen);
1036 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1037 compare_prepared);
1039 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1040 *p = lp->text;
1042 free (lineinfo);
1044 else
1045 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1047 /* Open the output file. */
1049 if (outfile)
1051 ostream = fopen (outfile, "w");
1052 if (!ostream)
1053 pfatal_with_name (outfile);
1056 writelines (linearray, nextline - linearray, ostream);
1057 if (outfile)
1058 fclose (ostream);
1060 free (linearray);
1061 free (data);
1064 /* Parse an input string in core into lines.
1065 DATA is the input string, and SIZE is its length.
1066 Data goes in LINEARRAY starting at NEXTLINE.
1067 The value returned is the first entry in LINEARRAY still unused.
1068 Value 0 means input file contents are invalid. */
1070 char **
1071 parsefile (char *filename, char **nextline, char *data, long int size)
1073 char *p, *end;
1074 char **line = nextline;
1076 p = data;
1077 end = p + size;
1078 *end = 0;
1080 while (p != end)
1082 if (p[0] != '\\' && p[0] != '@')
1083 return 0;
1085 *line = p;
1087 /* Find the first letter of the first field of this line. If it
1088 is different from the first letter of the first field of the
1089 first line, we need initial headers in the output index. */
1090 while (*p && *p != '{')
1091 p++;
1092 if (p == end)
1093 return 0;
1094 p++;
1095 if (first_initial)
1097 if (first_initial != toupper (*p))
1098 need_initials = 1;
1100 else
1101 first_initial = toupper (*p);
1103 while (*p && *p != '\n')
1104 p++;
1105 if (p != end)
1106 p++;
1108 line++;
1109 if (line == linearray + nlines)
1111 char **old = linearray;
1112 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1113 line += linearray - old;
1117 return line;
1120 /* Indexification is a filter applied to the sorted lines
1121 as they are being written to the output file.
1122 Multiple entries for the same name, with different page numbers,
1123 get combined into a single entry with multiple page numbers.
1124 The first braced field, which is used for sorting, is discarded.
1125 However, its first character is examined, folded to lower case,
1126 and if it is different from that in the previous line fed to us
1127 a \initial line is written with one argument, the new initial.
1129 If an entry has four braced fields, then the second and third
1130 constitute primary and secondary names.
1131 In this case, each change of primary name
1132 generates a \primary line which contains only the primary name,
1133 and in between these are \secondary lines which contain
1134 just a secondary name and page numbers. */
1136 /* The last primary name we wrote a \primary entry for.
1137 If only one level of indexing is being done, this is the last name seen. */
1138 char *lastprimary;
1139 /* Length of storage allocated for lastprimary. */
1140 int lastprimarylength;
1142 /* Similar, for the secondary name. */
1143 char *lastsecondary;
1144 int lastsecondarylength;
1146 /* Zero if we are not in the middle of writing an entry.
1147 One if we have written the beginning of an entry but have not
1148 yet written any page numbers into it.
1149 Greater than one if we have written the beginning of an entry
1150 plus at least one page number. */
1151 int pending;
1153 /* The initial (for sorting purposes) of the last primary entry written.
1154 When this changes, a \initial {c} line is written */
1156 char *lastinitial;
1158 int lastinitiallength;
1160 /* When we need a string of length 1 for the value of lastinitial,
1161 store it here. */
1163 char lastinitial1[2];
1165 /* Initialize static storage for writing an index. */
1167 void
1168 init_index (void)
1170 pending = 0;
1171 lastinitial = lastinitial1;
1172 lastinitial1[0] = 0;
1173 lastinitial1[1] = 0;
1174 lastinitiallength = 0;
1175 lastprimarylength = 100;
1176 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1177 memset (lastprimary, '\0', lastprimarylength + 1);
1178 lastsecondarylength = 100;
1179 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1180 memset (lastsecondary, '\0', lastsecondarylength + 1);
1183 /* Indexify. Merge entries for the same name,
1184 insert headers for each initial character, etc. */
1186 void
1187 indexify (char *line, FILE *ostream)
1189 char *primary, *secondary, *pagenumber;
1190 int primarylength, secondarylength = 0, pagelength;
1191 int nosecondary;
1192 int initiallength;
1193 char *initial;
1194 char initial1[2];
1195 register char *p;
1197 /* First, analyze the parts of the entry fed to us this time. */
1199 p = find_braced_pos (line, 0, 0, 0);
1200 if (*p == '{')
1202 initial = p;
1203 /* Get length of inner pair of braces starting at `p',
1204 including that inner pair of braces. */
1205 initiallength = find_braced_end (p + 1) + 1 - p;
1207 else
1209 initial = initial1;
1210 initial1[0] = toupper (*p);
1211 initial1[1] = 0;
1212 initiallength = 1;
1215 pagenumber = find_braced_pos (line, 1, 0, 0);
1216 pagelength = find_braced_end (pagenumber) - pagenumber;
1217 if (pagelength == 0)
1218 fatal (_("No page number in %s"), line);
1220 primary = find_braced_pos (line, 2, 0, 0);
1221 primarylength = find_braced_end (primary) - primary;
1223 secondary = find_braced_pos (line, 3, 0, 0);
1224 nosecondary = !*secondary;
1225 if (!nosecondary)
1226 secondarylength = find_braced_end (secondary) - secondary;
1228 /* If the primary is different from before, make a new primary entry. */
1229 if (strncmp (primary, lastprimary, primarylength))
1231 /* Close off current secondary entry first, if one is open. */
1232 if (pending)
1234 fputs ("}\n", ostream);
1235 pending = 0;
1238 /* If this primary has a different initial, include an entry for
1239 the initial. */
1240 if (need_initials &&
1241 (initiallength != lastinitiallength ||
1242 strncmp (initial, lastinitial, initiallength)))
1244 fprintf (ostream, "\\initial {");
1245 fwrite (initial, 1, initiallength, ostream);
1246 fputs ("}\n", ostream);
1247 if (initial == initial1)
1249 lastinitial = lastinitial1;
1250 *lastinitial1 = *initial1;
1252 else
1254 lastinitial = initial;
1256 lastinitiallength = initiallength;
1259 /* Make the entry for the primary. */
1260 if (nosecondary)
1261 fputs ("\\entry {", ostream);
1262 else
1263 fputs ("\\primary {", ostream);
1264 fwrite (primary, primarylength, 1, ostream);
1265 if (nosecondary)
1267 fputs ("}{", ostream);
1268 pending = 1;
1270 else
1271 fputs ("}\n", ostream);
1273 /* Record name of most recent primary. */
1274 if (lastprimarylength < primarylength)
1276 lastprimarylength = primarylength + 100;
1277 lastprimary = (char *) xrealloc (lastprimary,
1278 1 + lastprimarylength);
1280 strncpy (lastprimary, primary, primarylength);
1281 lastprimary[primarylength] = 0;
1283 /* There is no current secondary within this primary, now. */
1284 lastsecondary[0] = 0;
1287 /* Should not have an entry with no subtopic following one with a
1288 subtopic. */
1290 if (nosecondary && *lastsecondary)
1291 error (_("entry %s follows an entry with a secondary name"), line);
1293 /* Start a new secondary entry if necessary. */
1294 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1296 if (pending)
1298 fputs ("}\n", ostream);
1299 pending = 0;
1302 /* Write the entry for the secondary. */
1303 fputs ("\\secondary {", ostream);
1304 fwrite (secondary, secondarylength, 1, ostream);
1305 fputs ("}{", ostream);
1306 pending = 1;
1308 /* Record name of most recent secondary. */
1309 if (lastsecondarylength < secondarylength)
1311 lastsecondarylength = secondarylength + 100;
1312 lastsecondary = (char *) xrealloc (lastsecondary,
1313 1 + lastsecondarylength);
1315 strncpy (lastsecondary, secondary, secondarylength);
1316 lastsecondary[secondarylength] = 0;
1319 /* Here to add one more page number to the current entry. */
1320 if (pending++ != 1)
1321 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1322 fwrite (pagenumber, pagelength, 1, ostream);
1325 /* Close out any unfinished output entry. */
1327 void
1328 finish_index (FILE *ostream)
1330 if (pending)
1331 fputs ("}\n", ostream);
1332 free (lastprimary);
1333 free (lastsecondary);
1336 /* Copy the lines in the sorted order.
1337 Each line is copied out of the input file it was found in. */
1339 void
1340 writelines (char **linearray, int nlines, FILE *ostream)
1342 char **stop_line = linearray + nlines;
1343 char **next_line;
1345 init_index ();
1347 /* Output the text of the lines, and free the buffer space. */
1349 for (next_line = linearray; next_line != stop_line; next_line++)
1351 /* If -u was specified, output the line only if distinct from
1352 previous one. */
1353 if (next_line == linearray
1354 /* Compare previous line with this one, using only the
1355 explicitly specd keyfields. */
1356 || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1357 num_keyfields - 1))
1359 char *p = *next_line;
1360 char c;
1362 while ((c = *p++) && c != '\n')
1363 /* Do nothing. */ ;
1364 *(p - 1) = 0;
1365 indexify (*next_line, ostream);
1369 finish_index (ostream);
1372 /* Assume (and optionally verify) that each input file is sorted;
1373 merge them and output the result.
1374 Returns nonzero if any input file fails to be sorted.
1376 This is the high-level interface that can handle an unlimited
1377 number of files. */
1379 #define MAX_DIRECT_MERGE 10
1382 merge_files (char **infiles, int nfiles, char *outfile)
1384 char **tempfiles;
1385 int ntemps;
1386 int i;
1387 int value = 0;
1388 int start_tempcount = tempcount;
1390 if (nfiles <= MAX_DIRECT_MERGE)
1391 return merge_direct (infiles, nfiles, outfile);
1393 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1394 making a temporary file to hold each group's result. */
1396 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1397 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1398 for (i = 0; i < ntemps; i++)
1400 int nf = MAX_DIRECT_MERGE;
1401 if (i + 1 == ntemps)
1402 nf = nfiles - i * MAX_DIRECT_MERGE;
1403 tempfiles[i] = maketempname (++tempcount);
1404 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1407 /* All temporary files that existed before are no longer needed
1408 since their contents have been merged into our new tempfiles.
1409 So delete them. */
1410 flush_tempfiles (start_tempcount);
1412 /* Now merge the temporary files we created. */
1414 merge_files (tempfiles, ntemps, outfile);
1416 free (tempfiles);
1418 return value;
1421 /* Assume (and optionally verify) that each input file is sorted;
1422 merge them and output the result.
1423 Returns nonzero if any input file fails to be sorted.
1425 This version of merging will not work if the number of
1426 input files gets too high. Higher level functions
1427 use it only with a bounded number of input files. */
1430 merge_direct (char **infiles, int nfiles, char *outfile)
1432 struct linebuffer *lb1, *lb2;
1433 struct linebuffer **thisline, **prevline;
1434 FILE **streams;
1435 int i;
1436 int nleft;
1437 int lossage = 0;
1438 int *file_lossage;
1439 struct linebuffer *prev_out = 0;
1440 FILE *ostream = stdout;
1442 if (outfile)
1444 ostream = fopen (outfile, "w");
1446 if (!ostream)
1447 pfatal_with_name (outfile);
1449 init_index ();
1451 if (nfiles == 0)
1453 if (outfile)
1454 fclose (ostream);
1455 return 0;
1458 /* For each file, make two line buffers. Also, for each file, there
1459 is an element of `thisline' which points at any time to one of the
1460 file's two buffers, and an element of `prevline' which points to
1461 the other buffer. `thisline' is supposed to point to the next
1462 available line from the file, while `prevline' holds the last file
1463 line used, which is remembered so that we can verify that the file
1464 is properly sorted. */
1466 /* lb1 and lb2 contain one buffer each per file. */
1467 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1468 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1470 /* thisline[i] points to the linebuffer holding the next available
1471 line in file i, or is zero if there are no lines left in that file. */
1472 thisline = (struct linebuffer **)
1473 xmalloc (nfiles * sizeof (struct linebuffer *));
1474 /* prevline[i] points to the linebuffer holding the last used line
1475 from file i. This is just for verifying that file i is properly
1476 sorted. */
1477 prevline = (struct linebuffer **)
1478 xmalloc (nfiles * sizeof (struct linebuffer *));
1479 /* streams[i] holds the input stream for file i. */
1480 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1481 /* file_lossage[i] is nonzero if we already know file i is not
1482 properly sorted. */
1483 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1485 /* Allocate and initialize all that storage. */
1487 for (i = 0; i < nfiles; i++)
1489 initbuffer (&lb1[i]);
1490 initbuffer (&lb2[i]);
1491 thisline[i] = &lb1[i];
1492 prevline[i] = &lb2[i];
1493 file_lossage[i] = 0;
1494 streams[i] = fopen (infiles[i], "r");
1495 if (!streams[i])
1496 pfatal_with_name (infiles[i]);
1498 readline (thisline[i], streams[i]);
1501 /* Keep count of number of files not at eof. */
1502 nleft = nfiles;
1504 while (nleft)
1506 struct linebuffer *best = 0;
1507 struct linebuffer *exch;
1508 int bestfile = -1;
1509 int i;
1511 /* Look at the next avail line of each file; choose the least one. */
1513 for (i = 0; i < nfiles; i++)
1515 if (thisline[i] &&
1516 (!best ||
1517 0 < compare_general (best->buffer, thisline[i]->buffer,
1518 (long) bestfile, (long) i, num_keyfields)))
1520 best = thisline[i];
1521 bestfile = i;
1525 /* Output that line, unless it matches the previous one and we
1526 don't want duplicates. */
1528 if (!(prev_out &&
1529 !compare_general (prev_out->buffer,
1530 best->buffer, 0L, 1L, num_keyfields - 1)))
1531 indexify (best->buffer, ostream);
1532 prev_out = best;
1534 /* Now make the line the previous of its file, and fetch a new
1535 line from that file. */
1537 exch = prevline[bestfile];
1538 prevline[bestfile] = thisline[bestfile];
1539 thisline[bestfile] = exch;
1541 while (1)
1543 /* If the file has no more, mark it empty. */
1545 if (feof (streams[bestfile]))
1547 thisline[bestfile] = 0;
1548 /* Update the number of files still not empty. */
1549 nleft--;
1550 break;
1552 readline (thisline[bestfile], streams[bestfile]);
1553 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1554 break;
1558 finish_index (ostream);
1560 /* Free all storage and close all input streams. */
1562 for (i = 0; i < nfiles; i++)
1564 fclose (streams[i]);
1565 free (lb1[i].buffer);
1566 free (lb2[i].buffer);
1568 free (file_lossage);
1569 free (lb1);
1570 free (lb2);
1571 free (thisline);
1572 free (prevline);
1573 free (streams);
1575 if (outfile)
1576 fclose (ostream);
1578 return lossage;
1581 /* Print error message and exit. */
1583 void
1584 fatal (const char *format, const char *arg)
1586 error (format, arg);
1587 xexit (1);
1590 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1591 void
1592 error (const char *format, const char *arg)
1594 printf ("%s: ", program_name);
1595 printf (format, arg);
1596 if (format[strlen (format) -1] != '\n')
1597 printf ("\n");
1600 void
1601 perror_with_name (const char *name)
1603 fprintf (stderr, "%s: ", program_name);
1604 perror (name);
1607 void
1608 pfatal_with_name (const char *name)
1610 perror_with_name (name);
1611 xexit (1);
1615 /* Return a newly-allocated string concatenating S1 and S2. */
1617 char *
1618 concat (char *s1, char *s2)
1620 int len1 = strlen (s1), len2 = strlen (s2);
1621 char *result = (char *) xmalloc (len1 + len2 + 1);
1623 strcpy (result, s1);
1624 strcpy (result + len1, s2);
1625 *(result + len1 + len2) = 0;
1627 return result;