* MAINTAINERS: Added myself for write-after-approval.
[official-gcc.git] / texinfo / util / texindex.c
blob347cccbd6124128333ab3b80cea17128ee21543f
1 /* Prepare TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.1.1.3 1998/03/24 18:20:31 law Exp $
4 Copyright (C) 1987, 91, 92, 96, 97, 98 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
20 #include "system.h"
21 #include <getopt.h>
23 #if defined (emacs)
24 # include "../src/config.h"
25 /* Some s/os.h files redefine these. */
26 # undef read
27 # undef close
28 # undef write
29 # undef open
30 #endif
32 #if !defined (HAVE_MEMSET)
33 #undef memset
34 #define memset(ptr, ignore, count) bzero (ptr, count)
35 #endif
38 char *mktemp ();
40 #if defined (VMS)
41 # include <file.h>
42 # define TI_NO_ERROR ((1 << 28) | 1)
43 # define TI_FATAL_ERROR ((1 << 28) | 4)
44 # define unlink delete
45 #else /* !VMS */
46 # define TI_NO_ERROR 0
47 # define TI_FATAL_ERROR 1
48 #endif /* !VMS */
50 #if !defined (SEEK_SET)
51 # define SEEK_SET 0
52 # define SEEK_CUR 1
53 # define SEEK_END 2
54 #endif /* !SEEK_SET */
56 /* When sorting in core, this structure describes one line
57 and the position and length of its first keyfield. */
58 struct lineinfo
60 char *text; /* The actual text of the line. */
61 union {
62 char *text; /* The start of the key (for textual comparison). */
63 long number; /* The numeric value (for numeric comparison). */
64 } key;
65 long keylen; /* Length of KEY field. */
68 /* This structure describes a field to use as a sort key. */
69 struct keyfield
71 int startwords; /* Number of words to skip. */
72 int startchars; /* Number of additional chars to skip. */
73 int endwords; /* Number of words to ignore at end. */
74 int endchars; /* Ditto for characters of last word. */
75 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
76 char fold_case; /* Non-zero means case doesn't matter. */
77 char reverse; /* Non-zero means compare in reverse order. */
78 char numeric; /* Non-zeros means field is ASCII numeric. */
79 char positional; /* Sort according to file position. */
80 char braced; /* Count balanced-braced groupings as fields. */
83 /* Vector of keyfields to use. */
84 struct keyfield keyfields[3];
86 /* Number of keyfields stored in that vector. */
87 int num_keyfields = 3;
89 /* Vector of input file names, terminated with a null pointer. */
90 char **infiles;
92 /* Vector of corresponding output file names, or NULL, meaning default it
93 (add an `s' to the end). */
94 char **outfiles;
96 /* Length of `infiles'. */
97 int num_infiles;
99 /* Pointer to the array of pointers to lines being sorted. */
100 char **linearray;
102 /* The allocated length of `linearray'. */
103 long nlines;
105 /* Directory to use for temporary files. On Unix, it ends with a slash. */
106 char *tempdir;
108 /* Start of filename to use for temporary files. */
109 char *tempbase;
111 /* Number of last temporary file. */
112 int tempcount;
114 /* Number of last temporary file already deleted.
115 Temporary files are deleted by `flush_tempfiles' in order of creation. */
116 int last_deleted_tempcount;
118 /* During in-core sort, this points to the base of the data block
119 which contains all the lines of data. */
120 char *text_base;
122 /* Additional command switches .*/
124 /* Nonzero means do not delete tempfiles -- for debugging. */
125 int keep_tempfiles;
127 /* The name this program was run with. */
128 char *program_name;
130 /* Forward declarations of functions in this file. */
132 void decode_command ();
133 void sort_in_core ();
134 void sort_offline ();
135 char **parsefile ();
136 char *find_field ();
137 char *find_pos ();
138 long find_value ();
139 char *find_braced_pos ();
140 char *find_braced_end ();
141 void writelines ();
142 int compare_field ();
143 int compare_full ();
144 long readline ();
145 int merge_files ();
146 int merge_direct ();
147 void pfatal_with_name ();
148 void fatal ();
149 void error ();
150 void *xmalloc (), *xrealloc ();
151 char *concat ();
152 char *maketempname ();
153 void flush_tempfiles ();
154 char *tempcopy ();
156 #define MAX_IN_CORE_SORT 500000
159 main (argc, argv)
160 int argc;
161 char **argv;
163 int i;
165 tempcount = 0;
166 last_deleted_tempcount = 0;
168 program_name = strrchr (argv[0], '/');
169 if (program_name != (char *)NULL)
170 program_name++;
171 else
172 program_name = argv[0];
174 #ifdef HAVE_SETLOCALE
175 /* Set locale via LC_ALL. */
176 setlocale (LC_ALL, "");
177 #endif
179 /* Set the text message domain. */
180 bindtextdomain (PACKAGE, LOCALEDIR);
181 textdomain (PACKAGE);
183 /* Describe the kind of sorting to do. */
184 /* The first keyfield uses the first braced field and folds case. */
185 keyfields[0].braced = 1;
186 keyfields[0].fold_case = 1;
187 keyfields[0].endwords = -1;
188 keyfields[0].endchars = -1;
190 /* The second keyfield uses the second braced field, numerically. */
191 keyfields[1].braced = 1;
192 keyfields[1].numeric = 1;
193 keyfields[1].startwords = 1;
194 keyfields[1].endwords = -1;
195 keyfields[1].endchars = -1;
197 /* The third keyfield (which is ignored while discarding duplicates)
198 compares the whole line. */
199 keyfields[2].endwords = -1;
200 keyfields[2].endchars = -1;
202 decode_command (argc, argv);
204 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
206 /* Process input files completely, one by one. */
208 for (i = 0; i < num_infiles; i++)
210 int desc;
211 long ptr;
212 char *outfile;
214 desc = open (infiles[i], O_RDONLY, 0);
215 if (desc < 0)
216 pfatal_with_name (infiles[i]);
217 lseek (desc, (off_t) 0, SEEK_END);
218 ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
220 close (desc);
222 outfile = outfiles[i];
223 if (!outfile)
225 outfile = concat (infiles[i], "s", "");
228 if (ptr < MAX_IN_CORE_SORT)
229 /* Sort a small amount of data. */
230 sort_in_core (infiles[i], ptr, outfile);
231 else
232 sort_offline (infiles[i], ptr, outfile);
235 flush_tempfiles (tempcount);
236 exit (TI_NO_ERROR);
238 return 0; /* Avoid bogus warnings. */
241 typedef struct
243 char *long_name;
244 char *short_name;
245 int *variable_ref;
246 int variable_value;
247 char *arg_name;
248 char *doc_string;
249 } TEXINDEX_OPTION;
251 TEXINDEX_OPTION texindex_options[] = {
252 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
253 N_("keep temporary files around after processing") },
254 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
255 N_("do not keep temporary files around after processing (default)") },
256 { "--output", "-o", (int *)NULL, 0, "FILE",
257 N_("send output to FILE") },
258 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
259 N_("display version information and exit") },
260 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
261 N_("display this help and exit") },
262 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
265 void
266 usage (result_value)
267 int result_value;
269 register int i;
270 FILE *f = result_value ? stderr : stdout;
272 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
273 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
274 /* Avoid trigraph nonsense. */
275 fprintf (f, _("Usually FILE... is `foo.??\' for a document `foo.texi'.\n"));
276 fprintf (f, _("\nOptions:\n"));
278 for (i = 0; texindex_options[i].long_name; i++)
280 if (texindex_options[i].short_name)
281 fprintf (f, "%s, ", texindex_options[i].short_name);
283 fprintf (f, "%s %s",
284 texindex_options[i].long_name,
285 texindex_options[i].arg_name
286 ? texindex_options[i].arg_name : "");
288 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
290 puts (_("\nEmail bug reports to bug-texinfo@gnu.org."));
292 exit (result_value);
295 /* Decode the command line arguments to set the parameter variables
296 and set up the vector of keyfields and the vector of input files. */
298 void
299 decode_command (argc, argv)
300 int argc;
301 char **argv;
303 int arg_index = 1;
304 char **ip;
305 char **op;
307 /* Store default values into parameter variables. */
309 tempdir = getenv ("TMPDIR");
310 #ifdef VMS
311 if (tempdir == NULL)
312 tempdir = "sys$scratch:";
313 #else
314 if (tempdir == NULL)
315 tempdir = "/tmp/";
316 else
317 tempdir = concat (tempdir, "/", "");
318 #endif
320 keep_tempfiles = 0;
322 /* Allocate ARGC input files, which must be enough. */
324 infiles = (char **) xmalloc (argc * sizeof (char *));
325 outfiles = (char **) xmalloc (argc * sizeof (char *));
326 ip = infiles;
327 op = outfiles;
329 while (arg_index < argc)
331 char *arg = argv[arg_index++];
333 if (*arg == '-')
335 if (strcmp (arg, "--version") == 0)
337 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
338 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
339 There is NO warranty. You may redistribute this software\n\
340 under the terms of the GNU General Public License.\n\
341 For more information about these matters, see the files named COPYING.\n"),
342 "1998");
343 exit (0);
345 else if ((strcmp (arg, "--keep") == 0) ||
346 (strcmp (arg, "-k") == 0))
348 keep_tempfiles = 1;
350 else if ((strcmp (arg, "--help") == 0) ||
351 (strcmp (arg, "-h") == 0))
353 usage (0);
355 else if ((strcmp (arg, "--output") == 0) ||
356 (strcmp (arg, "-o") == 0))
358 if (argv[arg_index] != (char *)NULL)
360 arg_index++;
361 if (op > outfiles)
362 *(op - 1) = argv[arg_index];
364 else
365 usage (1);
367 else
368 usage (1);
370 else
372 *ip++ = arg;
373 *op++ = (char *)NULL;
377 /* Record number of keyfields and terminate list of filenames. */
378 num_infiles = ip - infiles;
379 *ip = (char *)NULL;
380 if (num_infiles == 0)
381 usage (1);
384 /* Return a name for a temporary file. */
386 char *
387 maketempname (count)
388 int count;
390 char tempsuffix[10];
391 sprintf (tempsuffix, "%d", count);
392 return concat (tempdir, tempbase, tempsuffix);
395 /* Delete all temporary files up to TO_COUNT. */
397 void
398 flush_tempfiles (to_count)
399 int to_count;
401 if (keep_tempfiles)
402 return;
403 while (last_deleted_tempcount < to_count)
404 unlink (maketempname (++last_deleted_tempcount));
407 /* Copy the input file open on IDESC into a temporary file
408 and return the temporary file name. */
410 #define BUFSIZE 1024
412 char *
413 tempcopy (idesc)
414 int idesc;
416 char *outfile = maketempname (++tempcount);
417 int odesc;
418 char buffer[BUFSIZE];
420 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
422 if (odesc < 0)
423 pfatal_with_name (outfile);
425 while (1)
427 int nread = read (idesc, buffer, BUFSIZE);
428 write (odesc, buffer, nread);
429 if (!nread)
430 break;
433 close (odesc);
435 return outfile;
438 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
441 compare_full (line1, line2)
442 char **line1, **line2;
444 int i;
446 /* Compare using the first keyfield;
447 if that does not distinguish the lines, try the second keyfield;
448 and so on. */
450 for (i = 0; i < num_keyfields; i++)
452 long length1, length2;
453 char *start1 = find_field (&keyfields[i], *line1, &length1);
454 char *start2 = find_field (&keyfields[i], *line2, &length2);
455 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
456 start2, length2, *line2 - text_base);
457 if (tem)
459 if (keyfields[i].reverse)
460 return -tem;
461 return tem;
465 return 0; /* Lines match exactly. */
468 /* Compare LINE1 and LINE2, described by structures
469 in which the first keyfield is identified in advance.
470 For positional sorting, assumes that the order of the lines in core
471 reflects their nominal order. */
474 compare_prepared (line1, line2)
475 struct lineinfo *line1, *line2;
477 int i;
478 int tem;
479 char *text1, *text2;
481 /* Compare using the first keyfield, which has been found for us already. */
482 if (keyfields->positional)
484 if (line1->text - text_base > line2->text - text_base)
485 tem = 1;
486 else
487 tem = -1;
489 else if (keyfields->numeric)
490 tem = line1->key.number - line2->key.number;
491 else
492 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
493 line2->key.text, line2->keylen, 0);
494 if (tem)
496 if (keyfields->reverse)
497 return -tem;
498 return tem;
501 text1 = line1->text;
502 text2 = line2->text;
504 /* Compare using the second keyfield;
505 if that does not distinguish the lines, try the third keyfield;
506 and so on. */
508 for (i = 1; i < num_keyfields; i++)
510 long length1, length2;
511 char *start1 = find_field (&keyfields[i], text1, &length1);
512 char *start2 = find_field (&keyfields[i], text2, &length2);
513 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
514 start2, length2, text2 - text_base);
515 if (tem)
517 if (keyfields[i].reverse)
518 return -tem;
519 return tem;
523 return 0; /* Lines match exactly. */
526 /* Like compare_full but more general.
527 You can pass any strings, and you can say how many keyfields to use.
528 POS1 and POS2 should indicate the nominal positional ordering of
529 the two lines in the input. */
532 compare_general (str1, str2, pos1, pos2, use_keyfields)
533 char *str1, *str2;
534 long pos1, pos2;
535 int use_keyfields;
537 int i;
539 /* Compare using the first keyfield;
540 if that does not distinguish the lines, try the second keyfield;
541 and so on. */
543 for (i = 0; i < use_keyfields; i++)
545 long length1, length2;
546 char *start1 = find_field (&keyfields[i], str1, &length1);
547 char *start2 = find_field (&keyfields[i], str2, &length2);
548 int tem = compare_field (&keyfields[i], start1, length1, pos1,
549 start2, length2, pos2);
550 if (tem)
552 if (keyfields[i].reverse)
553 return -tem;
554 return tem;
558 return 0; /* Lines match exactly. */
561 /* Find the start and length of a field in STR according to KEYFIELD.
562 A pointer to the starting character is returned, and the length
563 is stored into the int that LENGTHPTR points to. */
565 char *
566 find_field (keyfield, str, lengthptr)
567 struct keyfield *keyfield;
568 char *str;
569 long *lengthptr;
571 char *start;
572 char *end;
573 char *(*fun) ();
575 if (keyfield->braced)
576 fun = find_braced_pos;
577 else
578 fun = find_pos;
580 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
581 keyfield->ignore_blanks);
582 if (keyfield->endwords < 0)
584 if (keyfield->braced)
585 end = find_braced_end (start);
586 else
588 end = start;
589 while (*end && *end != '\n')
590 end++;
593 else
595 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
596 if (end - str < start - str)
597 end = start;
599 *lengthptr = end - start;
600 return start;
603 /* Return a pointer to a specified place within STR,
604 skipping (from the beginning) WORDS words and then CHARS chars.
605 If IGNORE_BLANKS is nonzero, we skip all blanks
606 after finding the specified word. */
608 char *
609 find_pos (str, words, chars, ignore_blanks)
610 char *str;
611 int words, chars;
612 int ignore_blanks;
614 int i;
615 char *p = str;
617 for (i = 0; i < words; i++)
619 char c;
620 /* Find next bunch of nonblanks and skip them. */
621 while ((c = *p) == ' ' || c == '\t')
622 p++;
623 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
624 p++;
625 if (!*p || *p == '\n')
626 return p;
629 while (*p == ' ' || *p == '\t')
630 p++;
632 for (i = 0; i < chars; i++)
634 if (!*p || *p == '\n')
635 break;
636 p++;
638 return p;
641 /* Like find_pos but assumes that each field is surrounded by braces
642 and that braces within fields are balanced. */
644 char *
645 find_braced_pos (str, words, chars, ignore_blanks)
646 char *str;
647 int words, chars;
648 int ignore_blanks;
650 int i;
651 int bracelevel;
652 char *p = str;
653 char c;
655 for (i = 0; i < words; i++)
657 bracelevel = 1;
658 while ((c = *p++) != '{' && c != '\n' && c)
659 /* Do nothing. */ ;
660 if (c != '{')
661 return p - 1;
662 while (bracelevel)
664 c = *p++;
665 if (c == '{')
666 bracelevel++;
667 if (c == '}')
668 bracelevel--;
669 if (c == 0 || c == '\n')
670 return p - 1;
674 while ((c = *p++) != '{' && c != '\n' && c)
675 /* Do nothing. */ ;
677 if (c != '{')
678 return p - 1;
680 if (ignore_blanks)
681 while ((c = *p) == ' ' || c == '\t')
682 p++;
684 for (i = 0; i < chars; i++)
686 if (!*p || *p == '\n')
687 break;
688 p++;
690 return p;
693 /* Find the end of the balanced-brace field which starts at STR.
694 The position returned is just before the closing brace. */
696 char *
697 find_braced_end (str)
698 char *str;
700 int bracelevel;
701 char *p = str;
702 char c;
704 bracelevel = 1;
705 while (bracelevel)
707 c = *p++;
708 if (c == '{')
709 bracelevel++;
710 if (c == '}')
711 bracelevel--;
712 if (c == 0 || c == '\n')
713 return p - 1;
715 return p - 1;
718 long
719 find_value (start, length)
720 char *start;
721 long length;
723 while (length != 0L)
725 if (isdigit (*start))
726 return atol (start);
727 length--;
728 start++;
730 return 0l;
733 /* Vector used to translate characters for comparison.
734 This is how we make all alphanumerics follow all else,
735 and ignore case in the first sorting. */
736 int char_order[256];
738 void
739 init_char_order ()
741 int i;
742 for (i = 1; i < 256; i++)
743 char_order[i] = i;
745 for (i = '0'; i <= '9'; i++)
746 char_order[i] += 512;
748 for (i = 'a'; i <= 'z'; i++)
750 char_order[i] = 512 + i;
751 char_order[i + 'A' - 'a'] = 512 + i;
755 /* Compare two fields (each specified as a start pointer and a character count)
756 according to KEYFIELD.
757 The sign of the value reports the relation between the fields. */
760 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
761 struct keyfield *keyfield;
762 char *start1;
763 long length1;
764 long pos1;
765 char *start2;
766 long length2;
767 long pos2;
769 if (keyfields->positional)
771 if (pos1 > pos2)
772 return 1;
773 else
774 return -1;
776 if (keyfield->numeric)
778 long value = find_value (start1, length1) - find_value (start2, length2);
779 if (value > 0)
780 return 1;
781 if (value < 0)
782 return -1;
783 return 0;
785 else
787 char *p1 = start1;
788 char *p2 = start2;
789 char *e1 = start1 + length1;
790 char *e2 = start2 + length2;
792 while (1)
794 int c1, c2;
796 if (p1 == e1)
797 c1 = 0;
798 else
799 c1 = *p1++;
800 if (p2 == e2)
801 c2 = 0;
802 else
803 c2 = *p2++;
805 if (char_order[c1] != char_order[c2])
806 return char_order[c1] - char_order[c2];
807 if (!c1)
808 break;
811 /* Strings are equal except possibly for case. */
812 p1 = start1;
813 p2 = start2;
814 while (1)
816 int c1, c2;
818 if (p1 == e1)
819 c1 = 0;
820 else
821 c1 = *p1++;
822 if (p2 == e2)
823 c2 = 0;
824 else
825 c2 = *p2++;
827 if (c1 != c2)
828 /* Reverse sign here so upper case comes out last. */
829 return c2 - c1;
830 if (!c1)
831 break;
834 return 0;
838 /* A `struct linebuffer' is a structure which holds a line of text.
839 `readline' reads a line from a stream into a linebuffer
840 and works regardless of the length of the line. */
842 struct linebuffer
844 long size;
845 char *buffer;
848 /* Initialize LINEBUFFER for use. */
850 void
851 initbuffer (linebuffer)
852 struct linebuffer *linebuffer;
854 linebuffer->size = 200;
855 linebuffer->buffer = (char *) xmalloc (200);
858 /* Read a line of text from STREAM into LINEBUFFER.
859 Return the length of the line. */
861 long
862 readline (linebuffer, stream)
863 struct linebuffer *linebuffer;
864 FILE *stream;
866 char *buffer = linebuffer->buffer;
867 char *p = linebuffer->buffer;
868 char *end = p + linebuffer->size;
870 while (1)
872 int c = getc (stream);
873 if (p == end)
875 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
876 p += buffer - linebuffer->buffer;
877 end += buffer - linebuffer->buffer;
878 linebuffer->buffer = buffer;
880 if (c < 0 || c == '\n')
882 *p = 0;
883 break;
885 *p++ = c;
888 return p - buffer;
891 /* Sort an input file too big to sort in core. */
893 void
894 sort_offline (infile, nfiles, total, outfile)
895 char *infile;
896 int nfiles;
897 long total;
898 char *outfile;
900 /* More than enough. */
901 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
902 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
903 FILE *istream = fopen (infile, "r");
904 int i;
905 struct linebuffer lb;
906 long linelength;
907 int failure = 0;
909 initbuffer (&lb);
911 /* Read in one line of input data. */
913 linelength = readline (&lb, istream);
915 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
917 error (_("%s: not a texinfo index file"), infile);
918 return;
921 /* Split up the input into `ntemps' temporary files, or maybe fewer,
922 and put the new files' names into `tempfiles' */
924 for (i = 0; i < ntemps; i++)
926 char *outname = maketempname (++tempcount);
927 FILE *ostream = fopen (outname, "w");
928 long tempsize = 0;
930 if (!ostream)
931 pfatal_with_name (outname);
932 tempfiles[i] = outname;
934 /* Copy lines into this temp file as long as it does not make file
935 "too big" or until there are no more lines. */
937 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
939 tempsize += linelength + 1;
940 fputs (lb.buffer, ostream);
941 putc ('\n', ostream);
943 /* Read another line of input data. */
945 linelength = readline (&lb, istream);
946 if (!linelength && feof (istream))
947 break;
949 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
951 error (_("%s: not a texinfo index file"), infile);
952 failure = 1;
953 goto fail;
956 fclose (ostream);
957 if (feof (istream))
958 break;
961 free (lb.buffer);
963 fail:
964 /* Record number of temp files we actually needed. */
966 ntemps = i;
968 /* Sort each tempfile into another tempfile.
969 Delete the first set of tempfiles and put the names of the second
970 into `tempfiles'. */
972 for (i = 0; i < ntemps; i++)
974 char *newtemp = maketempname (++tempcount);
975 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
976 if (!keep_tempfiles)
977 unlink (tempfiles[i]);
978 tempfiles[i] = newtemp;
981 if (failure)
982 return;
984 /* Merge the tempfiles together and indexify. */
986 merge_files (tempfiles, ntemps, outfile);
989 /* Sort INFILE, whose size is TOTAL,
990 assuming that is small enough to be done in-core,
991 then indexify it and send the output to OUTFILE (or to stdout). */
993 void
994 sort_in_core (infile, total, outfile)
995 char *infile;
996 long total;
997 char *outfile;
999 char **nextline;
1000 char *data = (char *) xmalloc (total + 1);
1001 char *file_data;
1002 long file_size;
1003 int i;
1004 FILE *ostream = stdout;
1005 struct lineinfo *lineinfo;
1007 /* Read the contents of the file into the moby array `data'. */
1009 int desc = open (infile, O_RDONLY, 0);
1011 if (desc < 0)
1012 fatal (_("failure reopening %s"), infile);
1013 for (file_size = 0;;)
1015 i = read (desc, data + file_size, total - file_size);
1016 if (i <= 0)
1017 break;
1018 file_size += i;
1020 file_data = data;
1021 data[file_size] = 0;
1023 close (desc);
1025 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
1027 error (_("%s: not a texinfo index file"), infile);
1028 return;
1031 init_char_order ();
1033 /* Sort routines want to know this address. */
1035 text_base = data;
1037 /* Create the array of pointers to lines, with a default size
1038 frequently enough. */
1040 nlines = total / 50;
1041 if (!nlines)
1042 nlines = 2;
1043 linearray = (char **) xmalloc (nlines * sizeof (char *));
1045 /* `nextline' points to the next free slot in this array.
1046 `nlines' is the allocated size. */
1048 nextline = linearray;
1050 /* Parse the input file's data, and make entries for the lines. */
1052 nextline = parsefile (infile, nextline, file_data, file_size);
1053 if (nextline == 0)
1055 error (_("%s: not a texinfo index file"), infile);
1056 return;
1059 /* Sort the lines. */
1061 /* If we have enough space, find the first keyfield of each line in advance.
1062 Make a `struct lineinfo' for each line, which records the keyfield
1063 as well as the line, and sort them. */
1065 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
1067 if (lineinfo)
1069 struct lineinfo *lp;
1070 char **p;
1072 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1074 lp->text = *p;
1075 lp->key.text = find_field (keyfields, *p, &lp->keylen);
1076 if (keyfields->numeric)
1077 lp->key.number = find_value (lp->key.text, lp->keylen);
1080 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1081 compare_prepared);
1083 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1084 *p = lp->text;
1086 free (lineinfo);
1088 else
1089 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1091 /* Open the output file. */
1093 if (outfile)
1095 ostream = fopen (outfile, "w");
1096 if (!ostream)
1097 pfatal_with_name (outfile);
1100 writelines (linearray, nextline - linearray, ostream);
1101 if (outfile)
1102 fclose (ostream);
1104 free (linearray);
1105 free (data);
1108 /* Parse an input string in core into lines.
1109 DATA is the input string, and SIZE is its length.
1110 Data goes in LINEARRAY starting at NEXTLINE.
1111 The value returned is the first entry in LINEARRAY still unused.
1112 Value 0 means input file contents are invalid. */
1114 char **
1115 parsefile (filename, nextline, data, size)
1116 char *filename;
1117 char **nextline;
1118 char *data;
1119 long size;
1121 char *p, *end;
1122 char **line = nextline;
1124 p = data;
1125 end = p + size;
1126 *end = 0;
1128 while (p != end)
1130 if (p[0] != '\\' && p[0] != '@')
1131 return 0;
1133 *line = p;
1134 while (*p && *p != '\n')
1135 p++;
1136 if (p != end)
1137 p++;
1139 line++;
1140 if (line == linearray + nlines)
1142 char **old = linearray;
1143 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1144 line += linearray - old;
1148 return line;
1151 /* Indexification is a filter applied to the sorted lines
1152 as they are being written to the output file.
1153 Multiple entries for the same name, with different page numbers,
1154 get combined into a single entry with multiple page numbers.
1155 The first braced field, which is used for sorting, is discarded.
1156 However, its first character is examined, folded to lower case,
1157 and if it is different from that in the previous line fed to us
1158 a \initial line is written with one argument, the new initial.
1160 If an entry has four braced fields, then the second and third
1161 constitute primary and secondary names.
1162 In this case, each change of primary name
1163 generates a \primary line which contains only the primary name,
1164 and in between these are \secondary lines which contain
1165 just a secondary name and page numbers. */
1167 /* The last primary name we wrote a \primary entry for.
1168 If only one level of indexing is being done, this is the last name seen. */
1169 char *lastprimary;
1170 /* Length of storage allocated for lastprimary. */
1171 int lastprimarylength;
1173 /* Similar, for the secondary name. */
1174 char *lastsecondary;
1175 int lastsecondarylength;
1177 /* Zero if we are not in the middle of writing an entry.
1178 One if we have written the beginning of an entry but have not
1179 yet written any page numbers into it.
1180 Greater than one if we have written the beginning of an entry
1181 plus at least one page number. */
1182 int pending;
1184 /* The initial (for sorting purposes) of the last primary entry written.
1185 When this changes, a \initial {c} line is written */
1187 char *lastinitial;
1189 int lastinitiallength;
1191 /* When we need a string of length 1 for the value of lastinitial,
1192 store it here. */
1194 char lastinitial1[2];
1196 /* Initialize static storage for writing an index. */
1198 void
1199 init_index ()
1201 pending = 0;
1202 lastinitial = lastinitial1;
1203 lastinitial1[0] = 0;
1204 lastinitial1[1] = 0;
1205 lastinitiallength = 0;
1206 lastprimarylength = 100;
1207 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1208 memset (lastprimary, '\0', lastprimarylength + 1);
1209 lastsecondarylength = 100;
1210 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1211 memset (lastsecondary, '\0', lastsecondarylength + 1);
1214 /* Indexify. Merge entries for the same name,
1215 insert headers for each initial character, etc. */
1217 void
1218 indexify (line, ostream)
1219 char *line;
1220 FILE *ostream;
1222 char *primary, *secondary, *pagenumber;
1223 int primarylength, secondarylength = 0, pagelength;
1224 int nosecondary;
1225 int initiallength;
1226 char *initial;
1227 char initial1[2];
1228 register char *p;
1230 /* First, analyze the parts of the entry fed to us this time. */
1232 p = find_braced_pos (line, 0, 0, 0);
1233 if (*p == '{')
1235 initial = p;
1236 /* Get length of inner pair of braces starting at `p',
1237 including that inner pair of braces. */
1238 initiallength = find_braced_end (p + 1) + 1 - p;
1240 else
1242 initial = initial1;
1243 initial1[0] = *p;
1244 initial1[1] = 0;
1245 initiallength = 1;
1247 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1248 initial1[0] -= 040;
1251 pagenumber = find_braced_pos (line, 1, 0, 0);
1252 pagelength = find_braced_end (pagenumber) - pagenumber;
1253 if (pagelength == 0)
1254 abort ();
1256 primary = find_braced_pos (line, 2, 0, 0);
1257 primarylength = find_braced_end (primary) - primary;
1259 secondary = find_braced_pos (line, 3, 0, 0);
1260 nosecondary = !*secondary;
1261 if (!nosecondary)
1262 secondarylength = find_braced_end (secondary) - secondary;
1264 /* If the primary is different from before, make a new primary entry. */
1265 if (strncmp (primary, lastprimary, primarylength))
1267 /* Close off current secondary entry first, if one is open. */
1268 if (pending)
1270 fputs ("}\n", ostream);
1271 pending = 0;
1274 /* If this primary has a different initial, include an entry for
1275 the initial. */
1276 if (initiallength != lastinitiallength ||
1277 strncmp (initial, lastinitial, initiallength))
1279 fprintf (ostream, "\\initial {");
1280 fwrite (initial, 1, initiallength, ostream);
1281 fputs ("}\n", ostream);
1282 if (initial == initial1)
1284 lastinitial = lastinitial1;
1285 *lastinitial1 = *initial1;
1287 else
1289 lastinitial = initial;
1291 lastinitiallength = initiallength;
1294 /* Make the entry for the primary. */
1295 if (nosecondary)
1296 fputs ("\\entry {", ostream);
1297 else
1298 fputs ("\\primary {", ostream);
1299 fwrite (primary, primarylength, 1, ostream);
1300 if (nosecondary)
1302 fputs ("}{", ostream);
1303 pending = 1;
1305 else
1306 fputs ("}\n", ostream);
1308 /* Record name of most recent primary. */
1309 if (lastprimarylength < primarylength)
1311 lastprimarylength = primarylength + 100;
1312 lastprimary = (char *) xrealloc (lastprimary,
1313 1 + lastprimarylength);
1315 strncpy (lastprimary, primary, primarylength);
1316 lastprimary[primarylength] = 0;
1318 /* There is no current secondary within this primary, now. */
1319 lastsecondary[0] = 0;
1322 /* Should not have an entry with no subtopic following one with a subtopic. */
1324 if (nosecondary && *lastsecondary)
1325 error (_("entry %s follows an entry with a secondary name"), line);
1327 /* Start a new secondary entry if necessary. */
1328 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1330 if (pending)
1332 fputs ("}\n", ostream);
1333 pending = 0;
1336 /* Write the entry for the secondary. */
1337 fputs ("\\secondary {", ostream);
1338 fwrite (secondary, secondarylength, 1, ostream);
1339 fputs ("}{", ostream);
1340 pending = 1;
1342 /* Record name of most recent secondary. */
1343 if (lastsecondarylength < secondarylength)
1345 lastsecondarylength = secondarylength + 100;
1346 lastsecondary = (char *) xrealloc (lastsecondary,
1347 1 + lastsecondarylength);
1349 strncpy (lastsecondary, secondary, secondarylength);
1350 lastsecondary[secondarylength] = 0;
1353 /* Here to add one more page number to the current entry. */
1354 if (pending++ != 1)
1355 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1356 fwrite (pagenumber, pagelength, 1, ostream);
1359 /* Close out any unfinished output entry. */
1361 void
1362 finish_index (ostream)
1363 FILE *ostream;
1365 if (pending)
1366 fputs ("}\n", ostream);
1367 free (lastprimary);
1368 free (lastsecondary);
1371 /* Copy the lines in the sorted order.
1372 Each line is copied out of the input file it was found in. */
1374 void
1375 writelines (linearray, nlines, ostream)
1376 char **linearray;
1377 int nlines;
1378 FILE *ostream;
1380 char **stop_line = linearray + nlines;
1381 char **next_line;
1383 init_index ();
1385 /* Output the text of the lines, and free the buffer space. */
1387 for (next_line = linearray; next_line != stop_line; next_line++)
1389 /* If -u was specified, output the line only if distinct from previous one. */
1390 if (next_line == linearray
1391 /* Compare previous line with this one, using only the
1392 explicitly specd keyfields. */
1393 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1395 char *p = *next_line;
1396 char c;
1398 while ((c = *p++) && c != '\n')
1399 /* Do nothing. */ ;
1400 *(p - 1) = 0;
1401 indexify (*next_line, ostream);
1405 finish_index (ostream);
1408 /* Assume (and optionally verify) that each input file is sorted;
1409 merge them and output the result.
1410 Returns nonzero if any input file fails to be sorted.
1412 This is the high-level interface that can handle an unlimited
1413 number of files. */
1415 #define MAX_DIRECT_MERGE 10
1418 merge_files (infiles, nfiles, outfile)
1419 char **infiles;
1420 int nfiles;
1421 char *outfile;
1423 char **tempfiles;
1424 int ntemps;
1425 int i;
1426 int value = 0;
1427 int start_tempcount = tempcount;
1429 if (nfiles <= MAX_DIRECT_MERGE)
1430 return merge_direct (infiles, nfiles, outfile);
1432 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1433 making a temporary file to hold each group's result. */
1435 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1436 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1437 for (i = 0; i < ntemps; i++)
1439 int nf = MAX_DIRECT_MERGE;
1440 if (i + 1 == ntemps)
1441 nf = nfiles - i * MAX_DIRECT_MERGE;
1442 tempfiles[i] = maketempname (++tempcount);
1443 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1446 /* All temporary files that existed before are no longer needed
1447 since their contents have been merged into our new tempfiles.
1448 So delete them. */
1449 flush_tempfiles (start_tempcount);
1451 /* Now merge the temporary files we created. */
1453 merge_files (tempfiles, ntemps, outfile);
1455 free (tempfiles);
1457 return value;
1460 /* Assume (and optionally verify) that each input file is sorted;
1461 merge them and output the result.
1462 Returns nonzero if any input file fails to be sorted.
1464 This version of merging will not work if the number of
1465 input files gets too high. Higher level functions
1466 use it only with a bounded number of input files. */
1469 merge_direct (infiles, nfiles, outfile)
1470 char **infiles;
1471 int nfiles;
1472 char *outfile;
1474 struct linebuffer *lb1, *lb2;
1475 struct linebuffer **thisline, **prevline;
1476 FILE **streams;
1477 int i;
1478 int nleft;
1479 int lossage = 0;
1480 int *file_lossage;
1481 struct linebuffer *prev_out = 0;
1482 FILE *ostream = stdout;
1484 if (outfile)
1486 ostream = fopen (outfile, "w");
1488 if (!ostream)
1489 pfatal_with_name (outfile);
1491 init_index ();
1493 if (nfiles == 0)
1495 if (outfile)
1496 fclose (ostream);
1497 return 0;
1500 /* For each file, make two line buffers.
1501 Also, for each file, there is an element of `thisline'
1502 which points at any time to one of the file's two buffers,
1503 and an element of `prevline' which points to the other buffer.
1504 `thisline' is supposed to point to the next available line from the file,
1505 while `prevline' holds the last file line used,
1506 which is remembered so that we can verify that the file is properly sorted. */
1508 /* lb1 and lb2 contain one buffer each per file. */
1509 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1510 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1512 /* thisline[i] points to the linebuffer holding the next available line in file i,
1513 or is zero if there are no lines left in that file. */
1514 thisline = (struct linebuffer **)
1515 xmalloc (nfiles * sizeof (struct linebuffer *));
1516 /* prevline[i] points to the linebuffer holding the last used line
1517 from file i. This is just for verifying that file i is properly
1518 sorted. */
1519 prevline = (struct linebuffer **)
1520 xmalloc (nfiles * sizeof (struct linebuffer *));
1521 /* streams[i] holds the input stream for file i. */
1522 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1523 /* file_lossage[i] is nonzero if we already know file i is not
1524 properly sorted. */
1525 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1527 /* Allocate and initialize all that storage. */
1529 for (i = 0; i < nfiles; i++)
1531 initbuffer (&lb1[i]);
1532 initbuffer (&lb2[i]);
1533 thisline[i] = &lb1[i];
1534 prevline[i] = &lb2[i];
1535 file_lossage[i] = 0;
1536 streams[i] = fopen (infiles[i], "r");
1537 if (!streams[i])
1538 pfatal_with_name (infiles[i]);
1540 readline (thisline[i], streams[i]);
1543 /* Keep count of number of files not at eof. */
1544 nleft = nfiles;
1546 while (nleft)
1548 struct linebuffer *best = 0;
1549 struct linebuffer *exch;
1550 int bestfile = -1;
1551 int i;
1553 /* Look at the next avail line of each file; choose the least one. */
1555 for (i = 0; i < nfiles; i++)
1557 if (thisline[i] &&
1558 (!best ||
1559 0 < compare_general (best->buffer, thisline[i]->buffer,
1560 (long) bestfile, (long) i, num_keyfields)))
1562 best = thisline[i];
1563 bestfile = i;
1567 /* Output that line, unless it matches the previous one and we
1568 don't want duplicates. */
1570 if (!(prev_out &&
1571 !compare_general (prev_out->buffer,
1572 best->buffer, 0L, 1L, num_keyfields - 1)))
1573 indexify (best->buffer, ostream);
1574 prev_out = best;
1576 /* Now make the line the previous of its file, and fetch a new
1577 line from that file. */
1579 exch = prevline[bestfile];
1580 prevline[bestfile] = thisline[bestfile];
1581 thisline[bestfile] = exch;
1583 while (1)
1585 /* If the file has no more, mark it empty. */
1587 if (feof (streams[bestfile]))
1589 thisline[bestfile] = 0;
1590 /* Update the number of files still not empty. */
1591 nleft--;
1592 break;
1594 readline (thisline[bestfile], streams[bestfile]);
1595 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1596 break;
1600 finish_index (ostream);
1602 /* Free all storage and close all input streams. */
1604 for (i = 0; i < nfiles; i++)
1606 fclose (streams[i]);
1607 free (lb1[i].buffer);
1608 free (lb2[i].buffer);
1610 free (file_lossage);
1611 free (lb1);
1612 free (lb2);
1613 free (thisline);
1614 free (prevline);
1615 free (streams);
1617 if (outfile)
1618 fclose (ostream);
1620 return lossage;
1623 /* Print error message and exit. */
1625 void
1626 fatal (format, arg)
1627 char *format, *arg;
1629 error (format, arg);
1630 exit (TI_FATAL_ERROR);
1633 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1634 void
1635 error (format, arg)
1636 char *format, *arg;
1638 printf ("%s: ", program_name);
1639 printf (format, arg);
1640 if (format[strlen (format) -1] != '\n')
1641 printf ("\n");
1644 void
1645 perror_with_name (name)
1646 char *name;
1648 char *s;
1650 s = strerror (errno);
1651 printf ("%s: ", program_name);
1652 printf ("%s; for file `%s'.\n", s, name);
1655 void
1656 pfatal_with_name (name)
1657 char *name;
1659 char *s;
1661 s = strerror (errno);
1662 printf ("%s: ", program_name);
1663 printf (_("%s; for file `%s'.\n"), s, name);
1664 exit (TI_FATAL_ERROR);
1667 /* Return a newly-allocated string whose contents concatenate those of
1668 S1, S2, S3. */
1670 char *
1671 concat (s1, s2, s3)
1672 char *s1, *s2, *s3;
1674 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1675 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1677 strcpy (result, s1);
1678 strcpy (result + len1, s2);
1679 strcpy (result + len1 + len2, s3);
1680 *(result + len1 + len2 + len3) = 0;
1682 return result;
1685 #if !defined (HAVE_STRERROR)
1686 extern char *sys_errlist[];
1687 extern int sys_nerr;
1689 char *
1690 strerror (num)
1691 int num;
1693 if (num >= sys_nerr)
1694 return ("");
1695 else
1696 return (sys_errlist[num]);
1698 #endif /* !HAVE_STRERROR */
1700 #if !defined (HAVE_STRCHR)
1701 char *
1702 strrchr (string, character)
1703 char *string;
1704 int character;
1706 register int i;
1708 for (i = strlen (string) - 1; i > -1; i--)
1709 if (string[i] == character)
1710 return (string + i);
1712 return ((char *)NULL);
1714 #endif /* HAVE_STRCHR */
1716 void
1717 memory_error (callers_name, bytes_wanted)
1718 char *callers_name;
1719 int bytes_wanted;
1721 char printable_string[80];
1723 sprintf (printable_string,
1724 _("Virtual memory exhausted in %s ()! Needed %d bytes."),
1725 callers_name, bytes_wanted);
1727 error (printable_string);
1728 abort ();
1731 /* Just like malloc, but kills the program in case of fatal error. */
1732 void *
1733 xmalloc (nbytes)
1734 int nbytes;
1736 void *temp = (void *) malloc (nbytes);
1738 if (nbytes && temp == (void *)NULL)
1739 memory_error ("xmalloc", nbytes);
1741 return (temp);
1744 /* Like realloc (), but barfs if there isn't enough memory. */
1745 void *
1746 xrealloc (pointer, nbytes)
1747 void *pointer;
1748 int nbytes;
1750 void *temp;
1752 if (!pointer)
1753 temp = (void *)xmalloc (nbytes);
1754 else
1755 temp = (void *)realloc (pointer, nbytes);
1757 if (nbytes && !temp)
1758 memory_error ("xrealloc", nbytes);
1760 return (temp);