Release 18.59
[emacs.git] / man / texindex.c
blobc6696e77ba835ff122abe40a65b9de0f972913a7
1 /* Prepare Tex index dribble output into an actual index.
2 Copyright (C) 1987 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 1, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
23 #include <stdio.h>
24 #include <ctype.h>
26 #ifdef VMS
27 #include <file.h>
29 #define EXIT_SUCCESS ((1 << 28) | 1)
30 #define EXIT_FATAL ((1 << 28) | 4)
31 #define unlink delete
32 #define tell(fd) lseek(fd, 0L, 1)
33 #else
34 #include <sys/types.h>
35 #include <sys/fcntl.h>
36 #include <sys/file.h>
38 #define EXIT_SUCCESS 0
39 #define EXIT_FATAL 1
40 #endif
43 #ifndef L_XTND
44 #define L_XTND 2
45 #endif
47 /* When sorting in core, this structure describes one line
48 and the position and length of its first keyfield. */
50 struct lineinfo
52 char *text; /* The actual text of the line */
53 union
54 { /* The start of the key (for textual comparison) */
55 char *text;
56 long number; /* or the numeric value (for numeric comparison) */
57 } key;
58 long keylen; /* Length of key field */
61 /* This structure describes a field to use as a sort key */
63 struct keyfield
65 int startwords; /* # words to skip */
66 int startchars; /* and # additional chars to skip, to start of field */
67 int endwords; /* similar, from beg (or end) of line, to find end of field */
68 int endchars;
69 char ignore_blanks; /* Ignore spaces and tabs within the field */
70 char fold_case; /* Convert upper case to lower before comparing */
71 char reverse; /* Compare in reverse order */
72 char numeric; /* Parse text as an integer and compare the integers */
73 char positional; /* Sort according to position within the file */
74 char braced; /* Count balanced-braced groupings as fields */
77 /* Vector of keyfields to use */
79 struct keyfield keyfields[3];
81 /* Number of keyfields stored in that vector. */
83 int num_keyfields = 3;
85 /* Vector of input file names, terminated with a zero (null pointer) */
87 char **infiles;
89 /* Vector of corresponding output file names, or zero meaning default it */
91 char **outfiles;
93 /* Length of `infiles' */
95 int num_infiles;
97 /* Pointer to the array of pointers to lines being sorted */
99 char **linearray;
101 /* The allocated length of `linearray'. */
103 long lines;
105 /* Directory to use for temporary files. On Unix, it ends with a slash. */
107 char *tempdir;
109 /* Start of filename to use for temporary files. */
111 char *tempbase;
113 /* Number of last temporary file. */
115 int tempcount;
117 /* Number of last temporary file already deleted.
118 Temporary files are deleted by `flush_tempfiles' in order of creation. */
120 int last_deleted_tempcount;
122 /* During in-core sort, this points to the base of the data block
123 which contains all the lines of data. */
125 char *text_base;
127 /* Additional command switches */
129 int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */
131 /* Forward declarations of functions in this file */
133 void decode_command ();
134 void sort_in_core ();
135 void sort_offline ();
136 char **parsefile ();
137 char *find_field ();
138 char *find_pos ();
139 long find_value ();
140 char *find_braced_pos ();
141 char *find_braced_end ();
142 void writelines ();
143 int compare_full ();
144 long readline ();
145 int merge_files ();
146 int merge_direct ();
147 char *concat ();
148 char *maketempname ();
149 void flush_tempfiles ();
150 char *tempcopy ();
152 extern char *mktemp ();
154 #define MAX_IN_CORE_SORT 500000
157 main (argc, argv)
158 int argc;
159 char **argv;
161 int i;
163 tempcount = 0;
164 last_deleted_tempcount = 0;
166 /* Describe the kind of sorting to do. */
167 /* The first keyfield uses the first braced field and folds case */
168 keyfields[0].braced = 1;
169 keyfields[0].fold_case = 1;
170 keyfields[0].endwords = -1;
171 keyfields[0].endchars = -1;
172 /* The second keyfield uses the second braced field, numerically */
173 keyfields[1].braced = 1;
174 keyfields[1].numeric = 1;
175 keyfields[1].startwords = 1;
176 keyfields[1].endwords = -1;
177 keyfields[1].endchars = -1;
178 /* The third keyfield (which is ignored while discarding duplicates)
179 compares the whole line */
180 keyfields[2].endwords = -1;
181 keyfields[2].endchars = -1;
183 decode_command (argc, argv);
185 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
187 /* Process input files completely, one by one. */
189 for (i = 0; i < num_infiles; i++)
191 int desc;
192 long ptr;
193 char *outfile;
194 char *p;
196 desc = open (infiles[i], 0, 0);
197 if (desc < 0) pfatal_with_name (infiles[i]);
198 lseek (desc, 0, L_XTND);
199 ptr = tell (desc);
200 close (desc);
202 outfile = outfiles[i];
203 if (!outfile)
205 outfile = concat (infiles[i], "s", "");
208 if (ptr < MAX_IN_CORE_SORT)
209 /* Sort a small amount of data */
210 sort_in_core (infiles[i], ptr, outfile);
211 else
212 sort_offline (infiles[i], ptr, outfile);
215 flush_tempfiles (tempcount);
216 exit (EXIT_SUCCESS);
219 /* This page decodes the command line arguments to set the parameter variables
220 and set up the vector of keyfields and the vector of input files */
222 void
223 decode_command (argc, argv)
224 int argc;
225 char **argv;
227 int i;
228 char **ip;
229 char **op;
231 /* Store default values into parameter variables */
233 #ifdef VMS
234 tempdir = "sys$scratch:";
235 #else
236 tempdir = "/tmp/";
237 #endif
239 keep_tempfiles = 0;
241 /* Allocate argc input files, which must be enough. */
243 infiles = (char **) xmalloc (argc * sizeof (char *));
244 outfiles = (char **) xmalloc (argc * sizeof (char *));
245 ip = infiles;
246 op = outfiles;
248 /* First find all switches that control the default kind-of-sort */
250 for (i = 1; i < argc; i++)
252 int tem = classify_arg (argv[i]);
253 char c;
254 char *p;
256 if (tem <= 0)
258 *ip++ = argv[i];
259 *op++ = 0;
260 continue;
262 if (tem > 1)
264 if (i + 1 == argc)
265 fatal ("switch %s given with no argument following it", argv[i]);
266 else if (!strcmp (argv[i], "-T"))
267 tempdir = argv[i + 1];
268 else if (!strcmp (argv[i], "-o"))
269 *(op - 1) = argv[i + 1];
270 i += tem - 1;
271 continue;
274 p = &argv[i][1];
275 while (c = *p++)
276 switch (c)
278 case 'k':
279 keep_tempfiles = 1;
280 break;
282 default:
283 fatal ("invalid command switch %c", c);
285 switchdone: ;
288 /* Record number of keyfields, terminate list of filenames */
290 num_infiles = ip - infiles;
291 *ip = 0;
294 /* Return 0 for an argument that is not a switch;
295 for a switch, return 1 plus the number of following arguments that the switch swallows.
299 classify_arg (arg)
300 char *arg;
302 if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
303 return 2;
304 if (arg[0] == '-')
305 return 1;
306 return 0;
309 /* Create a name for a temporary file */
311 char *
312 maketempname (count)
313 int count;
315 char tempsuffix[10];
316 sprintf (tempsuffix, "%d", count);
317 return concat (tempdir, tempbase, tempsuffix);
320 /* Delete all temporary files up to the specified count */
322 void
323 flush_tempfiles (to_count)
324 int to_count;
326 if (keep_tempfiles) return;
327 while (last_deleted_tempcount < to_count)
328 unlink (maketempname (++last_deleted_tempcount));
331 /* Copy an input file into a temporary file, and return the temporary file name */
333 #define BUFSIZE 1024
335 char *
336 tempcopy (idesc)
337 int idesc;
339 char *outfile = maketempname (++tempcount);
340 int odesc;
341 char buffer[BUFSIZE];
343 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
345 if (odesc < 0) pfatal_with_name (outfile);
347 while (1)
349 int nread = read (idesc, buffer, BUFSIZE);
350 write (odesc, buffer, nread);
351 if (!nread) break;
354 close (odesc);
356 return outfile;
359 /* Compare two lines, provided as pointers to pointers to text,
360 according to the specified set of keyfields */
363 compare_full (line1, line2)
364 char **line1, **line2;
366 int i;
368 /* Compare using the first keyfield;
369 if that does not distinguish the lines, try the second keyfield; and so on. */
371 for (i = 0; i < num_keyfields; i++)
373 long length1, length2;
374 char *start1 = find_field (&keyfields[i], *line1, &length1);
375 char *start2 = find_field (&keyfields[i], *line2, &length2);
376 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
377 start2, length2, *line2 - text_base);
378 if (tem)
380 if (keyfields[i].reverse)
381 return - tem;
382 return tem;
386 return 0; /* Lines match exactly */
389 /* Compare two lines described by structures
390 in which the first keyfield is identified in advance.
391 For positional sorting, assumes that the order of the lines in core
392 reflects their nominal order. */
395 compare_prepared (line1, line2)
396 struct lineinfo *line1, *line2;
398 int i;
399 int tem;
400 char *text1, *text2;
402 /* Compare using the first keyfield, which has been found for us already */
403 if (keyfields->positional)
405 if (line1->text - text_base > line2->text - text_base)
406 tem = 1;
407 else
408 tem = -1;
410 else if (keyfields->numeric)
411 tem = line1->key.number - line2->key.number;
412 else
413 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
414 if (tem)
416 if (keyfields->reverse)
417 return - tem;
418 return tem;
421 text1 = line1->text;
422 text2 = line2->text;
424 /* Compare using the second keyfield;
425 if that does not distinguish the lines, try the third keyfield; and so on. */
427 for (i = 1; i < num_keyfields; i++)
429 long length1, length2;
430 char *start1 = find_field (&keyfields[i], text1, &length1);
431 char *start2 = find_field (&keyfields[i], text2, &length2);
432 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
433 start2, length2, text2 - text_base);
434 if (tem)
436 if (keyfields[i].reverse)
437 return - tem;
438 return tem;
442 return 0; /* Lines match exactly */
445 /* Like compare_full but more general.
446 You can pass any strings, and you can say how many keyfields to use.
447 `pos1' and `pos2' should indicate the nominal positional ordering of
448 the two lines in the input. */
451 compare_general (str1, str2, pos1, pos2, use_keyfields)
452 char *str1, *str2;
453 long pos1, pos2;
454 int use_keyfields;
456 int i;
458 /* Compare using the first keyfield;
459 if that does not distinguish the lines, try the second keyfield; and so on. */
461 for (i = 0; i < use_keyfields; i++)
463 long length1, length2;
464 char *start1 = find_field (&keyfields[i], str1, &length1);
465 char *start2 = find_field (&keyfields[i], str2, &length2);
466 int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
467 if (tem)
469 if (keyfields[i].reverse)
470 return - tem;
471 return tem;
475 return 0; /* Lines match exactly */
478 /* Find the start and length of a field in `str' according to `keyfield'.
479 A pointer to the starting character is returned, and the length
480 is stored into the int that `lengthptr' points to. */
482 char *
483 find_field (keyfield, str, lengthptr)
484 struct keyfield *keyfield;
485 char *str;
486 long *lengthptr;
488 char *start;
489 char *end;
490 char *(*fun) ();
492 if (keyfield->braced) fun = find_braced_pos;
493 else fun = find_pos;
495 start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
496 keyfield->ignore_blanks);
497 if (keyfield->endwords < 0)
499 if (keyfield->braced)
500 end = find_braced_end (start);
501 else
503 end = start;
504 while (*end && *end != '\n') end++;
507 else
509 end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
510 if (end - str < start - str) end = start;
512 *lengthptr = end - start;
513 return start;
516 /* Find a pointer to a specified place within `str',
517 skipping (from the beginning) `words' words and then `chars' chars.
518 If `ignore_blanks' is nonzero, we skip all blanks
519 after finding the specified word. */
521 char *
522 find_pos (str, words, chars, ignore_blanks)
523 char *str;
524 int words, chars;
525 int ignore_blanks;
527 int i;
528 char *p = str;
530 for (i = 0; i < words; i++)
532 char c;
533 /* Find next bunch of nonblanks and skip them. */
534 while ((c = *p) == ' ' || c == '\t') p++;
535 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
536 if (!*p || *p == '\n') return p;
539 while (*p == ' ' || *p == '\t') p++;
541 for (i = 0; i < chars; i++)
543 if (!*p || *p == '\n') break;
544 p++;
546 return p;
549 /* Like find_pos but assumes that each field is surrounded by braces
550 and that braces within fields are balanced. */
552 char *
553 find_braced_pos (str, words, chars, ignore_blanks)
554 char *str;
555 int words, chars;
556 int ignore_blanks;
558 int i;
559 int bracelevel;
560 char *p = str;
561 char c;
563 for (i = 0; i < words; i++)
565 bracelevel = 1;
566 while ((c = *p++) != '{' && c != '\n' && c);
567 if (c != '{')
568 return p - 1;
569 while (bracelevel)
571 c = *p++;
572 if (c == '{') bracelevel++;
573 if (c == '}') bracelevel--;
574 if (c == '\\') c = *p++; /* \ quotes braces and \ */
575 if (c == 0 || c == '\n') return p-1;
579 while ((c = *p++) != '{' && c != '\n' && c);
581 if (c != '{')
582 return p-1;
584 if (ignore_blanks)
585 while ((c = *p) == ' ' || c == '\t') p++;
587 for (i = 0; i < chars; i++)
589 if (!*p || *p == '\n') break;
590 p++;
592 return p;
595 /* Find the end of the balanced-brace field which starts at `str'.
596 The position returned is just before the closing brace. */
598 char *
599 find_braced_end (str)
600 char *str;
602 int bracelevel;
603 char *p = str;
604 char c;
606 bracelevel = 1;
607 while (bracelevel)
609 c = *p++;
610 if (c == '{') bracelevel++;
611 if (c == '}') bracelevel--;
612 if (c == '\\') c = *p++;
613 if (c == 0 || c == '\n') return p-1;
615 return p - 1;
618 long
619 find_value (start, length)
620 char *start;
621 long length;
623 while (length != 0L) {
624 if (isdigit(*start))
625 return atol(start);
626 length--;
627 start++;
629 return 0l;
632 /* Vector used to translate characters for comparison.
633 This is how we make all alphanumerics follow all else,
634 and ignore case in the first sorting. */
635 int char_order[256];
637 init_char_order ()
639 int i;
640 for (i = 1; i < 256; i++)
641 char_order[i] = i;
643 for (i = '0'; i <= '9'; i++)
644 char_order[i] += 512;
646 for (i = 'a'; i <= 'z'; i++) {
647 char_order[i] = 512 + i;
648 char_order[i + 'A' - 'a'] = 512 + i;
652 /* Compare two fields (each specified as a start pointer and a character count)
653 according to `keyfield'. The sign of the value reports the relation between the fields */
656 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
657 struct keyfield *keyfield;
658 char *start1;
659 long length1;
660 long pos1;
661 char *start2;
662 long length2;
663 long pos2;
665 if (keyfields->positional)
667 if (pos1 > pos2)
668 return 1;
669 else
670 return -1;
672 if (keyfield->numeric)
674 long value = find_value (start1, length1) - find_value (start2, length2);
675 if (value > 0) return 1;
676 if (value < 0) return -1;
677 return 0;
679 else
681 char *p1 = start1;
682 char *p2 = start2;
683 char *e1 = start1 + length1;
684 char *e2 = start2 + length2;
686 int fold_case = keyfield->fold_case;
688 while (1)
690 int c1, c2;
692 if (p1 == e1) c1 = 0;
693 else c1 = *p1++;
694 if (p2 == e2) c2 = 0;
695 else c2 = *p2++;
697 if (char_order[c1] != char_order[c2])
698 return char_order[c1] - char_order[c2];
699 if (!c1) break;
702 /* Strings are equal except possibly for case. */
703 p1 = start1;
704 p2 = start2;
705 while (1)
707 int c1, c2;
709 if (p1 == e1) c1 = 0;
710 else c1 = *p1++;
711 if (p2 == e2) c2 = 0;
712 else c2 = *p2++;
714 if (c1 != c2)
715 /* Reverse sign here so upper case comes out last. */
716 return c2 - c1;
717 if (!c1) break;
720 return 0;
724 /* A `struct linebuffer' is a structure which holds a line of text.
725 `readline' reads a line from a stream into a linebuffer
726 and works regardless of the length of the line. */
728 struct linebuffer
730 long size;
731 char *buffer;
734 /* Initialize a linebuffer for use */
736 void
737 initbuffer (linebuffer)
738 struct linebuffer *linebuffer;
740 linebuffer->size = 200;
741 linebuffer->buffer = (char *) xmalloc (200);
744 /* Read a line of text from `stream' into `linebuffer'.
745 Return the length of the line. */
747 long
748 readline (linebuffer, stream)
749 struct linebuffer *linebuffer;
750 FILE *stream;
752 char *buffer = linebuffer->buffer;
753 char *p = linebuffer->buffer;
754 char *end = p + linebuffer->size;
756 while (1)
758 int c = getc (stream);
759 if (p == end)
761 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
762 p += buffer - linebuffer->buffer;
763 end += buffer - linebuffer->buffer;
764 linebuffer->buffer = buffer;
766 if (c < 0 || c == '\n')
768 *p = 0;
769 break;
771 *p++ = c;
774 return p - buffer;
777 /* Sort an input file too big to sort in core. */
779 void
780 sort_offline (infile, nfiles, total, outfile)
781 char *infile;
782 long total;
783 char *outfile;
785 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */
786 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
787 FILE *istream = fopen (infile, "r");
788 int i;
789 struct linebuffer lb;
790 long linelength;
791 int failure = 0;
793 initbuffer (&lb);
795 /* Read in one line of input data. */
797 linelength = readline (&lb, istream);
799 if (lb.buffer[0] != '\\')
801 error ("%s: not a texinfo index file", infile);
802 return;
805 /* Split up the input into `ntemps' temporary files, or maybe fewer,
806 and put the new files' names into `tempfiles' */
808 for (i = 0; i < ntemps; i++)
810 char *outname = maketempname (++tempcount);
811 FILE *ostream = fopen (outname, "w");
812 long tempsize = 0;
814 if (!ostream) pfatal_with_name (outname);
815 tempfiles[i] = outname;
817 /* Copy lines into this temp file as long as it does not make file "too big"
818 or until there are no more lines. */
820 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
822 tempsize += linelength + 1;
823 fputs (lb.buffer, ostream);
824 putc ('\n', ostream);
826 /* Read another line of input data. */
828 linelength = readline (&lb, istream);
829 if (!linelength && feof (istream)) break;
831 if (lb.buffer[0] != '\\')
833 error ("%s: not a texinfo index file", infile);
834 failure = 1;
835 goto fail;
838 fclose (ostream);
839 if (feof (istream)) break;
842 free (lb.buffer);
844 fail:
845 /* Record number of temp files we actually needed. */
847 ntemps = i;
849 /* Sort each tempfile into another tempfile.
850 Delete the first set of tempfiles and put the names of the second into `tempfiles' */
852 for (i = 0; i < ntemps; i++)
854 char *newtemp = maketempname (++tempcount);
855 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
856 if (!keep_tempfiles)
857 unlink (tempfiles[i]);
858 tempfiles[i] = newtemp;
861 if (failure)
862 return;
864 /* Merge the tempfiles together and indexify */
866 merge_files (tempfiles, ntemps, outfile);
869 /* Sort `infile', whose size is `total',
870 assuming that is small enough to be done in-core,
871 then indexify it and send the output to `outfile' (or to stdout). */
873 void
874 sort_in_core (infile, total, outfile)
875 char *infile;
876 long total;
877 char *outfile;
879 char **nextline;
880 char *data = (char *) xmalloc (total + 1);
881 char *file_data;
882 long file_size;
883 int i;
884 FILE *ostream = stdout;
885 struct lineinfo *lineinfo;
887 /* Read the contents of the file into the moby array `data' */
889 int desc = open (infile, 0, 0);
891 if (desc < 0)
892 fatal ("failure reopening %s", infile);
893 for (file_size = 0; ; )
895 if ((i = read (desc, data + file_size, total - file_size)) <= 0)
896 break;
897 file_size += i;
899 file_data = data;
900 data[file_size] = 0;
902 close (desc);
904 if (file_size > 0 && data[0] != '\\')
906 error ("%s: not a texinfo index file", infile);
907 return;
910 init_char_order ();
912 /* Sort routines want to know this address */
914 text_base = data;
916 /* Create the array of pointers to lines, with a default size frequently enough. */
918 lines = total / 50;
919 if (!lines) lines = 2;
920 linearray = (char **) xmalloc (lines * sizeof (char *));
922 /* `nextline' points to the next free slot in this array.
923 `lines' is the allocated size. */
925 nextline = linearray;
927 /* Parse the input file's data, and make entries for the lines. */
929 nextline = parsefile (infile, nextline, file_data, file_size);
930 if (nextline == 0)
932 error ("%s: not a texinfo index file", infile);
933 return;
936 /* Sort the lines */
938 /* If we have enough space, find the first keyfield of each line in advance.
939 Make a `struct lineinfo' for each line, which records the keyfield
940 as well as the line, and sort them. */
942 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
944 if (lineinfo)
946 struct lineinfo *lp;
947 char **p;
949 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
951 lp->text = *p;
952 lp->key.text = find_field (keyfields, *p, &lp->keylen);
953 if (keyfields->numeric)
954 lp->key.number = find_value (lp->key.text, lp->keylen);
957 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
959 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
960 *p = lp->text;
962 free (lineinfo);
964 else
965 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
967 /* Open the output file */
969 if (outfile)
971 ostream = fopen (outfile, "w");
972 if (!ostream)
973 pfatal_with_name (outfile);
976 writelines (linearray, nextline - linearray, ostream);
977 if (outfile) fclose (ostream);
979 free (linearray);
980 free (data);
983 /* Parse an input string in core into lines.
984 DATA is the input string, and SIZE is its length.
985 Data goes in LINEARRAY starting at NEXTLINE.
986 The value returned is the first entry in LINEARRAY still unused.
987 Value 0 means input file contents are invalid. */
989 char **
990 parsefile (filename, nextline, data, size)
991 char *filename;
992 char **nextline;
993 char *data;
994 long size;
996 char *p, *end;
997 char **line = nextline;
999 p = data;
1000 end = p + size;
1001 *end = 0;
1003 while (p != end)
1005 if (p[0] != '\\')
1006 return 0;
1008 *line = p;
1009 while (*p && *p != '\n') p++;
1010 if (p != end) p++;
1012 line++;
1013 if (line == linearray + lines)
1015 char **old = linearray;
1016 linearray = (char **) xrealloc (linearray, sizeof (char *) * (lines *= 4));
1017 line += linearray - old;
1021 return line;
1024 /* Indexification is a filter applied to the sorted lines
1025 as they are being written to the output file.
1026 Multiple entries for the same name, with different page numbers,
1027 get combined into a single entry with multiple page numbers.
1028 The first braced field, which is used for sorting, is discarded.
1029 However, its first character is examined, folded to lower case,
1030 and if it is different from that in the previous line fed to us
1031 a \initial line is written with one argument, the new initial.
1033 If an entry has four braced fields, then the second and third
1034 constitute primary and secondary names.
1035 In this case, each change of primary name
1036 generates a \primary line which contains only the primary name,
1037 and in between these are \secondary lines which contain
1038 just a secondary name and page numbers.
1041 /* The last primary name we wrote a \primary entry for.
1042 If only one level of indexing is being done, this is the last name seen */
1043 char *lastprimary;
1044 int lastprimarylength; /* Length of storage allocated for lastprimary */
1046 /* Similar, for the secondary name. */
1047 char *lastsecondary;
1048 int lastsecondarylength;
1050 /* Zero if we are not in the middle of writing an entry.
1051 One if we have written the beginning of an entry but have not
1052 yet written any page numbers into it.
1053 Greater than one if we have written the beginning of an entry
1054 plus at least one page number. */
1055 int pending;
1057 /* The initial (for sorting purposes) of the last primary entry written.
1058 When this changes, a \initial {c} line is written */
1060 char * lastinitial;
1062 int lastinitiallength;
1064 /* When we need a string of length 1 for the value of lastinitial,
1065 store it here. */
1067 char lastinitial1[2];
1069 /* Initialize static storage for writing an index */
1071 void
1072 init_index ()
1074 pending = 0;
1075 lastinitial = lastinitial1;
1076 lastinitial1[0] = 0;
1077 lastinitial1[1] = 0;
1078 lastinitiallength = 0;
1079 lastprimarylength = 100;
1080 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1081 bzero (lastprimary, lastprimarylength + 1);
1082 lastsecondarylength = 100;
1083 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1084 bzero (lastsecondary, lastsecondarylength + 1);
1087 /* Indexify. Merge entries for the same name,
1088 insert headers for each initial character, etc. */
1090 indexify (line, ostream)
1091 char *line;
1092 FILE *ostream;
1094 char *primary, *secondary, *pagenumber;
1095 int primarylength, secondarylength, pagelength;
1096 int len = strlen (line);
1097 int nosecondary;
1098 int initiallength;
1099 char *initial;
1100 char initial1[2];
1101 register char *p;
1103 /* First, analyze the parts of the entry fed to us this time */
1105 p = find_braced_pos (line, 0, 0, 0);
1106 if (*p == '{')
1108 initial = p;
1109 /* Get length of inner pair of braces starting at p,
1110 including that inner pair of braces. */
1111 initiallength = find_braced_end (p + 1) + 1 - p;
1113 else
1115 initial = initial1;
1116 initial1[0] = *p;
1117 initial1[1] = 0;
1118 initiallength = 1;
1120 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1121 initial1[0] -= 040;
1124 pagenumber = find_braced_pos (line, 1, 0, 0);
1125 pagelength = find_braced_end (pagenumber) - pagenumber;
1126 if (pagelength == 0)
1127 abort ();
1129 primary = find_braced_pos (line, 2, 0, 0);
1130 primarylength = find_braced_end (primary) - primary;
1132 secondary = find_braced_pos (line, 3, 0, 0);
1133 nosecondary = !*secondary;
1134 if (!nosecondary)
1135 secondarylength = find_braced_end (secondary) - secondary;
1137 /* If the primary is different from before, make a new primary entry */
1138 if (strncmp (primary, lastprimary, primarylength))
1140 /* Close off current secondary entry first, if one is open */
1141 if (pending)
1143 fputs ("}\n", ostream);
1144 pending = 0;
1147 /* If this primary has a different initial, include an entry for the initial */
1148 if (initiallength != lastinitiallength ||
1149 strncmp (initial, lastinitial, initiallength))
1151 fprintf (ostream, "\\initial {");
1152 fwrite (initial, 1, initiallength, ostream);
1153 fprintf (ostream, "}\n", initial);
1154 if (initial == initial1)
1156 lastinitial = lastinitial1;
1157 *lastinitial1 = *initial1;
1159 else
1161 lastinitial = initial;
1163 lastinitiallength = initiallength;
1166 /* Make the entry for the primary. */
1167 if (nosecondary)
1168 fputs ("\\entry {", ostream);
1169 else
1170 fputs ("\\primary {", ostream);
1171 fwrite (primary, primarylength, 1, ostream);
1172 if (nosecondary)
1174 fputs ("}{", ostream);
1175 pending = 1;
1177 else
1178 fputs ("}\n", ostream);
1180 /* Record name of most recent primary */
1181 if (lastprimarylength < primarylength)
1183 lastprimarylength = primarylength + 100;
1184 lastprimary = (char *) xrealloc (lastprimary,
1185 1 + lastprimarylength);
1187 strncpy (lastprimary, primary, primarylength);
1188 lastprimary[primarylength] = 0;
1190 /* There is no current secondary within this primary, now */
1191 lastsecondary[0] = 0;
1194 /* Should not have an entry with no subtopic following one with a subtopic */
1196 if (nosecondary && *lastsecondary)
1197 error ("entry %s follows an entry with a secondary name", line);
1199 /* Start a new secondary entry if necessary */
1200 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1202 if (pending)
1204 fputs ("}\n", ostream);
1205 pending = 0;
1208 /* Write the entry for the secondary. */
1209 fputs ("\\secondary {", ostream);
1210 fwrite (secondary, secondarylength, 1, ostream);
1211 fputs ("}{", ostream);
1212 pending = 1;
1214 /* Record name of most recent secondary */
1215 if (lastsecondarylength < secondarylength)
1217 lastsecondarylength = secondarylength + 100;
1218 lastsecondary = (char *) xrealloc (lastsecondary,
1219 1 + lastsecondarylength);
1221 strncpy (lastsecondary, secondary, secondarylength);
1222 lastsecondary[secondarylength] = 0;
1225 /* Here to add one more page number to the current entry */
1226 if (pending++ != 1)
1227 fputs (", ", ostream); /* Punctuate first, if this is not the first */
1228 fwrite (pagenumber, pagelength, 1, ostream);
1231 /* Close out any unfinished output entry */
1233 void
1234 finish_index (ostream)
1235 FILE *ostream;
1237 if (pending)
1238 fputs ("}\n", ostream);
1239 free (lastprimary);
1240 free (lastsecondary);
1243 /* Copy the lines in the sorted order.
1244 Each line is copied out of the input file it was found in. */
1246 void
1247 writelines (linearray, nlines, ostream)
1248 char **linearray;
1249 int nlines;
1250 FILE *ostream;
1252 char **stop_line = linearray + nlines;
1253 char **next_line;
1255 init_index ();
1257 /* Output the text of the lines, and free the buffer space */
1259 for (next_line = linearray; next_line != stop_line; next_line++)
1261 /* If -u was specified, output the line only if distinct from previous one. */
1262 if (next_line == linearray
1263 /* Compare previous line with this one, using only the explicitly specd keyfields */
1264 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1266 char *p = *next_line;
1267 char c;
1268 while ((c = *p++) && c != '\n');
1269 *(p-1) = 0;
1270 indexify (*next_line, ostream);
1274 finish_index (ostream);
1277 /* Assume (and optionally verify) that each input file is sorted;
1278 merge them and output the result.
1279 Returns nonzero if any input file fails to be sorted.
1281 This is the high-level interface that can handle an unlimited number of files. */
1283 #define MAX_DIRECT_MERGE 10
1286 merge_files (infiles, nfiles, outfile)
1287 char **infiles;
1288 int nfiles;
1289 char *outfile;
1291 char **tempfiles;
1292 int ntemps;
1293 int i;
1294 int value = 0;
1295 int start_tempcount = tempcount;
1297 if (nfiles <= MAX_DIRECT_MERGE)
1298 return merge_direct (infiles, nfiles, outfile);
1300 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1301 making a temporary file to hold each group's result. */
1303 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1304 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1305 for (i = 0; i < ntemps; i++)
1307 int nf = MAX_DIRECT_MERGE;
1308 if (i + 1 == ntemps)
1309 nf = nfiles - i * MAX_DIRECT_MERGE;
1310 tempfiles[i] = maketempname (++tempcount);
1311 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1314 /* All temporary files that existed before are no longer needed
1315 since their contents have been merged into our new tempfiles.
1316 So delete them. */
1317 flush_tempfiles (start_tempcount);
1319 /* Now merge the temporary files we created. */
1321 merge_files (tempfiles, ntemps, outfile);
1323 free (tempfiles);
1325 return value;
1328 /* Assume (and optionally verify) that each input file is sorted;
1329 merge them and output the result.
1330 Returns nonzero if any input file fails to be sorted.
1332 This version of merging will not work if the number of
1333 input files gets too high. Higher level functions
1334 use it only with a bounded number of input files. */
1337 merge_direct (infiles, nfiles, outfile)
1338 char **infiles;
1339 int nfiles;
1340 char *outfile;
1342 char **ip = infiles;
1343 struct linebuffer *lb1, *lb2;
1344 struct linebuffer **thisline, **prevline;
1345 FILE **streams;
1346 int i;
1347 int nleft;
1348 int lossage = 0;
1349 int *file_lossage;
1350 struct linebuffer *prev_out = 0;
1351 FILE *ostream = stdout;
1353 if (outfile)
1355 ostream = fopen (outfile, "w");
1357 if (!ostream) pfatal_with_name (outfile);
1359 init_index ();
1361 if (nfiles == 0)
1363 if (outfile)
1364 fclose (ostream);
1365 return 0;
1368 /* For each file, make two line buffers.
1369 Also, for each file, there is an element of `thisline'
1370 which points at any time to one of the file's two buffers,
1371 and an element of `prevline' which points to the other buffer.
1372 `thisline' is supposed to point to the next available line from the file,
1373 while `prevline' holds the last file line used,
1374 which is remembered so that we can verify that the file is properly sorted. */
1376 /* lb1 and lb2 contain one buffer each per file */
1377 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1378 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1380 /* thisline[i] points to the linebuffer holding the next available line in file i,
1381 or is zero if there are no lines left in that file. */
1382 thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1383 /* prevline[i] points to the linebuffer holding the last used line from file i.
1384 This is just for verifying that file i is properly sorted. */
1385 prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1386 /* streams[i] holds the input stream for file i. */
1387 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1388 /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
1389 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1391 /* Allocate and initialize all that storage */
1393 for (i = 0; i < nfiles; i++)
1395 initbuffer (&lb1[i]);
1396 initbuffer (&lb2[i]);
1397 thisline[i] = &lb1[i];
1398 prevline[i] = &lb2[i];
1399 file_lossage[i] = 0;
1400 streams[i] = fopen (infiles[i], "r");
1401 if (!streams[i])
1402 pfatal_with_name (infiles[i]);
1404 readline (thisline[i], streams[i]);
1407 /* Keep count of number of files not at eof */
1408 nleft = nfiles;
1410 while (nleft)
1412 struct linebuffer *best = 0;
1413 struct linebuffer *exch;
1414 int bestfile = -1;
1415 int i;
1417 /* Look at the next avail line of each file; choose the least one. */
1419 for (i = 0; i < nfiles; i++)
1421 if (thisline[i] &&
1422 (!best ||
1423 0 < compare_general (best->buffer, thisline[i]->buffer,
1424 (long) bestfile, (long) i, num_keyfields)))
1426 best = thisline[i];
1427 bestfile = i;
1431 /* Output that line, unless it matches the previous one and we don't want duplicates */
1433 if (!(prev_out &&
1434 !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
1435 indexify (best->buffer, ostream);
1436 prev_out = best;
1438 /* Now make the line the previous of its file, and fetch a new line from that file */
1440 exch = prevline[bestfile];
1441 prevline[bestfile] = thisline[bestfile];
1442 thisline[bestfile] = exch;
1444 while (1)
1446 /* If the file has no more, mark it empty */
1448 if (feof (streams[bestfile]))
1450 thisline[bestfile] = 0;
1451 nleft--; /* Update the number of files still not empty */
1452 break;
1454 readline (thisline[bestfile], streams[bestfile]);
1455 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
1459 finish_index (ostream);
1461 /* Free all storage and close all input streams */
1463 for (i = 0; i < nfiles; i++)
1465 fclose (streams[i]);
1466 free (lb1[i].buffer);
1467 free (lb2[i].buffer);
1469 free (file_lossage);
1470 free (lb1);
1471 free (lb2);
1472 free (thisline);
1473 free (prevline);
1474 free (streams);
1476 if (outfile)
1477 fclose (ostream);
1479 return lossage;
1482 /* Print error message and exit. */
1484 fatal (s1, s2)
1485 char *s1, *s2;
1487 error (s1, s2);
1488 exit (EXIT_FATAL);
1491 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
1493 error (s1, s2)
1494 char *s1, *s2;
1496 printf ("texindex: ");
1497 printf (s1, s2);
1498 printf ("\n");
1501 perror_with_name (name)
1502 char *name;
1504 #ifdef VMS
1505 #include <errno.h>
1506 extern noshare int sys_nerr;
1507 extern noshare char *sys_errlist[];
1508 #else
1509 extern int errno, sys_nerr;
1510 extern char *sys_errlist[];
1511 #endif
1512 char *s;
1514 if (errno < sys_nerr)
1515 s = concat ("", sys_errlist[errno], " for %s");
1516 else
1517 s = "cannot open %s";
1518 error (s, name);
1521 pfatal_with_name (name)
1522 char *name;
1524 extern int errno, sys_nerr;
1525 extern char *sys_errlist[];
1526 char *s;
1528 if (errno < sys_nerr)
1529 s = concat ("", sys_errlist[errno], " for %s");
1530 else
1531 s = "cannot open %s";
1532 fatal (s, name);
1535 /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
1537 char *
1538 concat (s1, s2, s3)
1539 char *s1, *s2, *s3;
1541 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1542 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1544 strcpy (result, s1);
1545 strcpy (result + len1, s2);
1546 strcpy (result + len1 + len2, s3);
1547 *(result + len1 + len2 + len3) = 0;
1549 return result;
1552 /* Like malloc but get fatal error if memory is exhausted. */
1555 xmalloc (size)
1556 int size;
1558 int result = malloc (size);
1559 if (!result)
1560 fatal ("virtual memory exhausted", 0);
1561 return result;
1566 xrealloc (ptr, size)
1567 char *ptr;
1568 int size;
1570 int result = realloc (ptr, size);
1571 if (!result)
1572 fatal ("virtual memory exhausted");
1573 return result;
1576 bzero (b, length)
1577 register char *b;
1578 register int length;
1580 #ifdef VMS
1581 short zero = 0;
1582 long max_str = 65535;
1584 while (length > max_str) {
1585 (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
1586 length -= max_str;
1587 b += max_str;
1589 (void) LIB$MOVC5 (&zero, &zero, &zero, &length, b);
1590 #else
1591 while (length-- > 0)
1592 *b++ = 0;
1593 #endif /* not VMS */