1 /* Prepare Tex index dribble output into an actual index.
2 Copyright (C) 1987 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 1, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
29 #define EXIT_SUCCESS ((1 << 28) | 1)
30 #define EXIT_FATAL ((1 << 28) | 4)
32 #define tell(fd) lseek(fd, 0L, 1)
34 #include <sys/types.h>
35 #include <sys/fcntl.h>
38 #define EXIT_SUCCESS 0
47 /* When sorting in core, this structure describes one line
48 and the position and length of its first keyfield. */
52 char *text
; /* The actual text of the line */
54 { /* The start of the key (for textual comparison) */
56 long number
; /* or the numeric value (for numeric comparison) */
58 long keylen
; /* Length of key field */
61 /* This structure describes a field to use as a sort key */
65 int startwords
; /* # words to skip */
66 int startchars
; /* and # additional chars to skip, to start of field */
67 int endwords
; /* similar, from beg (or end) of line, to find end of field */
69 char ignore_blanks
; /* Ignore spaces and tabs within the field */
70 char fold_case
; /* Convert upper case to lower before comparing */
71 char reverse
; /* Compare in reverse order */
72 char numeric
; /* Parse text as an integer and compare the integers */
73 char positional
; /* Sort according to position within the file */
74 char braced
; /* Count balanced-braced groupings as fields */
77 /* Vector of keyfields to use */
79 struct keyfield keyfields
[3];
81 /* Number of keyfields stored in that vector. */
83 int num_keyfields
= 3;
85 /* Vector of input file names, terminated with a zero (null pointer) */
89 /* Vector of corresponding output file names, or zero meaning default it */
93 /* Length of `infiles' */
97 /* Pointer to the array of pointers to lines being sorted */
101 /* The allocated length of `linearray'. */
105 /* Directory to use for temporary files. On Unix, it ends with a slash. */
109 /* Start of filename to use for temporary files. */
113 /* Number of last temporary file. */
117 /* Number of last temporary file already deleted.
118 Temporary files are deleted by `flush_tempfiles' in order of creation. */
120 int last_deleted_tempcount
;
122 /* During in-core sort, this points to the base of the data block
123 which contains all the lines of data. */
127 /* Additional command switches */
129 int keep_tempfiles
; /* Nonzero means do not delete tempfiles -- for debugging */
131 /* Forward declarations of functions in this file */
133 void decode_command ();
134 void sort_in_core ();
135 void sort_offline ();
140 char *find_braced_pos ();
141 char *find_braced_end ();
148 char *maketempname ();
149 void flush_tempfiles ();
152 extern char *mktemp ();
154 #define MAX_IN_CORE_SORT 500000
164 last_deleted_tempcount
= 0;
166 /* Describe the kind of sorting to do. */
167 /* The first keyfield uses the first braced field and folds case */
168 keyfields
[0].braced
= 1;
169 keyfields
[0].fold_case
= 1;
170 keyfields
[0].endwords
= -1;
171 keyfields
[0].endchars
= -1;
172 /* The second keyfield uses the second braced field, numerically */
173 keyfields
[1].braced
= 1;
174 keyfields
[1].numeric
= 1;
175 keyfields
[1].startwords
= 1;
176 keyfields
[1].endwords
= -1;
177 keyfields
[1].endchars
= -1;
178 /* The third keyfield (which is ignored while discarding duplicates)
179 compares the whole line */
180 keyfields
[2].endwords
= -1;
181 keyfields
[2].endchars
= -1;
183 decode_command (argc
, argv
);
185 tempbase
= mktemp (concat ("txiXXXXXX", "", ""));
187 /* Process input files completely, one by one. */
189 for (i
= 0; i
< num_infiles
; i
++)
196 desc
= open (infiles
[i
], 0, 0);
197 if (desc
< 0) pfatal_with_name (infiles
[i
]);
198 lseek (desc
, 0, L_XTND
);
202 outfile
= outfiles
[i
];
205 outfile
= concat (infiles
[i
], "s", "");
208 if (ptr
< MAX_IN_CORE_SORT
)
209 /* Sort a small amount of data */
210 sort_in_core (infiles
[i
], ptr
, outfile
);
212 sort_offline (infiles
[i
], ptr
, outfile
);
215 flush_tempfiles (tempcount
);
219 /* This page decodes the command line arguments to set the parameter variables
220 and set up the vector of keyfields and the vector of input files */
223 decode_command (argc
, argv
)
231 /* Store default values into parameter variables */
234 tempdir
= "sys$scratch:";
241 /* Allocate argc input files, which must be enough. */
243 infiles
= (char **) xmalloc (argc
* sizeof (char *));
244 outfiles
= (char **) xmalloc (argc
* sizeof (char *));
248 /* First find all switches that control the default kind-of-sort */
250 for (i
= 1; i
< argc
; i
++)
252 int tem
= classify_arg (argv
[i
]);
265 fatal ("switch %s given with no argument following it", argv
[i
]);
266 else if (!strcmp (argv
[i
], "-T"))
267 tempdir
= argv
[i
+ 1];
268 else if (!strcmp (argv
[i
], "-o"))
269 *(op
- 1) = argv
[i
+ 1];
283 fatal ("invalid command switch %c", c
);
288 /* Record number of keyfields, terminate list of filenames */
290 num_infiles
= ip
- infiles
;
294 /* Return 0 for an argument that is not a switch;
295 for a switch, return 1 plus the number of following arguments that the switch swallows.
302 if (!strcmp (arg
, "-T") || !strcmp (arg
, "-o"))
309 /* Create a name for a temporary file */
316 sprintf (tempsuffix
, "%d", count
);
317 return concat (tempdir
, tempbase
, tempsuffix
);
320 /* Delete all temporary files up to the specified count */
323 flush_tempfiles (to_count
)
326 if (keep_tempfiles
) return;
327 while (last_deleted_tempcount
< to_count
)
328 unlink (maketempname (++last_deleted_tempcount
));
331 /* Copy an input file into a temporary file, and return the temporary file name */
339 char *outfile
= maketempname (++tempcount
);
341 char buffer
[BUFSIZE
];
343 odesc
= open (outfile
, O_WRONLY
| O_CREAT
, 0666);
345 if (odesc
< 0) pfatal_with_name (outfile
);
349 int nread
= read (idesc
, buffer
, BUFSIZE
);
350 write (odesc
, buffer
, nread
);
359 /* Compare two lines, provided as pointers to pointers to text,
360 according to the specified set of keyfields */
363 compare_full (line1
, line2
)
364 char **line1
, **line2
;
368 /* Compare using the first keyfield;
369 if that does not distinguish the lines, try the second keyfield; and so on. */
371 for (i
= 0; i
< num_keyfields
; i
++)
373 long length1
, length2
;
374 char *start1
= find_field (&keyfields
[i
], *line1
, &length1
);
375 char *start2
= find_field (&keyfields
[i
], *line2
, &length2
);
376 int tem
= compare_field (&keyfields
[i
], start1
, length1
, *line1
- text_base
,
377 start2
, length2
, *line2
- text_base
);
380 if (keyfields
[i
].reverse
)
386 return 0; /* Lines match exactly */
389 /* Compare two lines described by structures
390 in which the first keyfield is identified in advance.
391 For positional sorting, assumes that the order of the lines in core
392 reflects their nominal order. */
395 compare_prepared (line1
, line2
)
396 struct lineinfo
*line1
, *line2
;
402 /* Compare using the first keyfield, which has been found for us already */
403 if (keyfields
->positional
)
405 if (line1
->text
- text_base
> line2
->text
- text_base
)
410 else if (keyfields
->numeric
)
411 tem
= line1
->key
.number
- line2
->key
.number
;
413 tem
= compare_field (keyfields
, line1
->key
.text
, line1
->keylen
, 0, line2
->key
.text
, line2
->keylen
, 0);
416 if (keyfields
->reverse
)
424 /* Compare using the second keyfield;
425 if that does not distinguish the lines, try the third keyfield; and so on. */
427 for (i
= 1; i
< num_keyfields
; i
++)
429 long length1
, length2
;
430 char *start1
= find_field (&keyfields
[i
], text1
, &length1
);
431 char *start2
= find_field (&keyfields
[i
], text2
, &length2
);
432 int tem
= compare_field (&keyfields
[i
], start1
, length1
, text1
- text_base
,
433 start2
, length2
, text2
- text_base
);
436 if (keyfields
[i
].reverse
)
442 return 0; /* Lines match exactly */
445 /* Like compare_full but more general.
446 You can pass any strings, and you can say how many keyfields to use.
447 `pos1' and `pos2' should indicate the nominal positional ordering of
448 the two lines in the input. */
451 compare_general (str1
, str2
, pos1
, pos2
, use_keyfields
)
458 /* Compare using the first keyfield;
459 if that does not distinguish the lines, try the second keyfield; and so on. */
461 for (i
= 0; i
< use_keyfields
; i
++)
463 long length1
, length2
;
464 char *start1
= find_field (&keyfields
[i
], str1
, &length1
);
465 char *start2
= find_field (&keyfields
[i
], str2
, &length2
);
466 int tem
= compare_field (&keyfields
[i
], start1
, length1
, pos1
, start2
, length2
, pos2
);
469 if (keyfields
[i
].reverse
)
475 return 0; /* Lines match exactly */
478 /* Find the start and length of a field in `str' according to `keyfield'.
479 A pointer to the starting character is returned, and the length
480 is stored into the int that `lengthptr' points to. */
483 find_field (keyfield
, str
, lengthptr
)
484 struct keyfield
*keyfield
;
492 if (keyfield
->braced
) fun
= find_braced_pos
;
495 start
= ( *fun
)(str
, keyfield
->startwords
, keyfield
->startchars
,
496 keyfield
->ignore_blanks
);
497 if (keyfield
->endwords
< 0)
499 if (keyfield
->braced
)
500 end
= find_braced_end (start
);
504 while (*end
&& *end
!= '\n') end
++;
509 end
= ( *fun
)(str
, keyfield
->endwords
, keyfield
->endchars
, 0);
510 if (end
- str
< start
- str
) end
= start
;
512 *lengthptr
= end
- start
;
516 /* Find a pointer to a specified place within `str',
517 skipping (from the beginning) `words' words and then `chars' chars.
518 If `ignore_blanks' is nonzero, we skip all blanks
519 after finding the specified word. */
522 find_pos (str
, words
, chars
, ignore_blanks
)
530 for (i
= 0; i
< words
; i
++)
533 /* Find next bunch of nonblanks and skip them. */
534 while ((c
= *p
) == ' ' || c
== '\t') p
++;
535 while ((c
= *p
) && c
!= '\n' && !(c
== ' ' || c
== '\t')) p
++;
536 if (!*p
|| *p
== '\n') return p
;
539 while (*p
== ' ' || *p
== '\t') p
++;
541 for (i
= 0; i
< chars
; i
++)
543 if (!*p
|| *p
== '\n') break;
549 /* Like find_pos but assumes that each field is surrounded by braces
550 and that braces within fields are balanced. */
553 find_braced_pos (str
, words
, chars
, ignore_blanks
)
563 for (i
= 0; i
< words
; i
++)
566 while ((c
= *p
++) != '{' && c
!= '\n' && c
);
572 if (c
== '{') bracelevel
++;
573 if (c
== '}') bracelevel
--;
574 if (c
== '\\') c
= *p
++; /* \ quotes braces and \ */
575 if (c
== 0 || c
== '\n') return p
-1;
579 while ((c
= *p
++) != '{' && c
!= '\n' && c
);
585 while ((c
= *p
) == ' ' || c
== '\t') p
++;
587 for (i
= 0; i
< chars
; i
++)
589 if (!*p
|| *p
== '\n') break;
595 /* Find the end of the balanced-brace field which starts at `str'.
596 The position returned is just before the closing brace. */
599 find_braced_end (str
)
610 if (c
== '{') bracelevel
++;
611 if (c
== '}') bracelevel
--;
612 if (c
== '\\') c
= *p
++;
613 if (c
== 0 || c
== '\n') return p
-1;
619 find_value (start
, length
)
623 while (length
!= 0L) {
632 /* Vector used to translate characters for comparison.
633 This is how we make all alphanumerics follow all else,
634 and ignore case in the first sorting. */
640 for (i
= 1; i
< 256; i
++)
643 for (i
= '0'; i
<= '9'; i
++)
644 char_order
[i
] += 512;
646 for (i
= 'a'; i
<= 'z'; i
++) {
647 char_order
[i
] = 512 + i
;
648 char_order
[i
+ 'A' - 'a'] = 512 + i
;
652 /* Compare two fields (each specified as a start pointer and a character count)
653 according to `keyfield'. The sign of the value reports the relation between the fields */
656 compare_field (keyfield
, start1
, length1
, pos1
, start2
, length2
, pos2
)
657 struct keyfield
*keyfield
;
665 if (keyfields
->positional
)
672 if (keyfield
->numeric
)
674 long value
= find_value (start1
, length1
) - find_value (start2
, length2
);
675 if (value
> 0) return 1;
676 if (value
< 0) return -1;
683 char *e1
= start1
+ length1
;
684 char *e2
= start2
+ length2
;
686 int fold_case
= keyfield
->fold_case
;
692 if (p1
== e1
) c1
= 0;
694 if (p2
== e2
) c2
= 0;
697 if (char_order
[c1
] != char_order
[c2
])
698 return char_order
[c1
] - char_order
[c2
];
702 /* Strings are equal except possibly for case. */
709 if (p1
== e1
) c1
= 0;
711 if (p2
== e2
) c2
= 0;
715 /* Reverse sign here so upper case comes out last. */
724 /* A `struct linebuffer' is a structure which holds a line of text.
725 `readline' reads a line from a stream into a linebuffer
726 and works regardless of the length of the line. */
734 /* Initialize a linebuffer for use */
737 initbuffer (linebuffer
)
738 struct linebuffer
*linebuffer
;
740 linebuffer
->size
= 200;
741 linebuffer
->buffer
= (char *) xmalloc (200);
744 /* Read a line of text from `stream' into `linebuffer'.
745 Return the length of the line. */
748 readline (linebuffer
, stream
)
749 struct linebuffer
*linebuffer
;
752 char *buffer
= linebuffer
->buffer
;
753 char *p
= linebuffer
->buffer
;
754 char *end
= p
+ linebuffer
->size
;
758 int c
= getc (stream
);
761 buffer
= (char *) xrealloc (buffer
, linebuffer
->size
*= 2);
762 p
+= buffer
- linebuffer
->buffer
;
763 end
+= buffer
- linebuffer
->buffer
;
764 linebuffer
->buffer
= buffer
;
766 if (c
< 0 || c
== '\n')
777 /* Sort an input file too big to sort in core. */
780 sort_offline (infile
, nfiles
, total
, outfile
)
785 int ntemps
= 2 * (total
+ MAX_IN_CORE_SORT
- 1) / MAX_IN_CORE_SORT
; /* More than enough */
786 char **tempfiles
= (char **) xmalloc (ntemps
* sizeof (char *));
787 FILE *istream
= fopen (infile
, "r");
789 struct linebuffer lb
;
795 /* Read in one line of input data. */
797 linelength
= readline (&lb
, istream
);
799 if (lb
.buffer
[0] != '\\')
801 error ("%s: not a texinfo index file", infile
);
805 /* Split up the input into `ntemps' temporary files, or maybe fewer,
806 and put the new files' names into `tempfiles' */
808 for (i
= 0; i
< ntemps
; i
++)
810 char *outname
= maketempname (++tempcount
);
811 FILE *ostream
= fopen (outname
, "w");
814 if (!ostream
) pfatal_with_name (outname
);
815 tempfiles
[i
] = outname
;
817 /* Copy lines into this temp file as long as it does not make file "too big"
818 or until there are no more lines. */
820 while (tempsize
+ linelength
+ 1 <= MAX_IN_CORE_SORT
)
822 tempsize
+= linelength
+ 1;
823 fputs (lb
.buffer
, ostream
);
824 putc ('\n', ostream
);
826 /* Read another line of input data. */
828 linelength
= readline (&lb
, istream
);
829 if (!linelength
&& feof (istream
)) break;
831 if (lb
.buffer
[0] != '\\')
833 error ("%s: not a texinfo index file", infile
);
839 if (feof (istream
)) break;
845 /* Record number of temp files we actually needed. */
849 /* Sort each tempfile into another tempfile.
850 Delete the first set of tempfiles and put the names of the second into `tempfiles' */
852 for (i
= 0; i
< ntemps
; i
++)
854 char *newtemp
= maketempname (++tempcount
);
855 sort_in_core (&tempfiles
[i
], MAX_IN_CORE_SORT
, newtemp
);
857 unlink (tempfiles
[i
]);
858 tempfiles
[i
] = newtemp
;
864 /* Merge the tempfiles together and indexify */
866 merge_files (tempfiles
, ntemps
, outfile
);
869 /* Sort `infile', whose size is `total',
870 assuming that is small enough to be done in-core,
871 then indexify it and send the output to `outfile' (or to stdout). */
874 sort_in_core (infile
, total
, outfile
)
880 char *data
= (char *) xmalloc (total
+ 1);
884 FILE *ostream
= stdout
;
885 struct lineinfo
*lineinfo
;
887 /* Read the contents of the file into the moby array `data' */
889 int desc
= open (infile
, 0, 0);
892 fatal ("failure reopening %s", infile
);
893 for (file_size
= 0; ; )
895 if ((i
= read (desc
, data
+ file_size
, total
- file_size
)) <= 0)
904 if (file_size
> 0 && data
[0] != '\\')
906 error ("%s: not a texinfo index file", infile
);
912 /* Sort routines want to know this address */
916 /* Create the array of pointers to lines, with a default size frequently enough. */
919 if (!lines
) lines
= 2;
920 linearray
= (char **) xmalloc (lines
* sizeof (char *));
922 /* `nextline' points to the next free slot in this array.
923 `lines' is the allocated size. */
925 nextline
= linearray
;
927 /* Parse the input file's data, and make entries for the lines. */
929 nextline
= parsefile (infile
, nextline
, file_data
, file_size
);
932 error ("%s: not a texinfo index file", infile
);
938 /* If we have enough space, find the first keyfield of each line in advance.
939 Make a `struct lineinfo' for each line, which records the keyfield
940 as well as the line, and sort them. */
942 lineinfo
= (struct lineinfo
*) malloc ((nextline
- linearray
) * sizeof (struct lineinfo
));
949 for (lp
= lineinfo
, p
= linearray
; p
!= nextline
; lp
++, p
++)
952 lp
->key
.text
= find_field (keyfields
, *p
, &lp
->keylen
);
953 if (keyfields
->numeric
)
954 lp
->key
.number
= find_value (lp
->key
.text
, lp
->keylen
);
957 qsort (lineinfo
, nextline
- linearray
, sizeof (struct lineinfo
), compare_prepared
);
959 for (lp
= lineinfo
, p
= linearray
; p
!= nextline
; lp
++, p
++)
965 qsort (linearray
, nextline
- linearray
, sizeof (char *), compare_full
);
967 /* Open the output file */
971 ostream
= fopen (outfile
, "w");
973 pfatal_with_name (outfile
);
976 writelines (linearray
, nextline
- linearray
, ostream
);
977 if (outfile
) fclose (ostream
);
983 /* Parse an input string in core into lines.
984 DATA is the input string, and SIZE is its length.
985 Data goes in LINEARRAY starting at NEXTLINE.
986 The value returned is the first entry in LINEARRAY still unused.
987 Value 0 means input file contents are invalid. */
990 parsefile (filename
, nextline
, data
, size
)
997 char **line
= nextline
;
1009 while (*p
&& *p
!= '\n') p
++;
1013 if (line
== linearray
+ lines
)
1015 char **old
= linearray
;
1016 linearray
= (char **) xrealloc (linearray
, sizeof (char *) * (lines
*= 4));
1017 line
+= linearray
- old
;
1024 /* Indexification is a filter applied to the sorted lines
1025 as they are being written to the output file.
1026 Multiple entries for the same name, with different page numbers,
1027 get combined into a single entry with multiple page numbers.
1028 The first braced field, which is used for sorting, is discarded.
1029 However, its first character is examined, folded to lower case,
1030 and if it is different from that in the previous line fed to us
1031 a \initial line is written with one argument, the new initial.
1033 If an entry has four braced fields, then the second and third
1034 constitute primary and secondary names.
1035 In this case, each change of primary name
1036 generates a \primary line which contains only the primary name,
1037 and in between these are \secondary lines which contain
1038 just a secondary name and page numbers.
1041 /* The last primary name we wrote a \primary entry for.
1042 If only one level of indexing is being done, this is the last name seen */
1044 int lastprimarylength
; /* Length of storage allocated for lastprimary */
1046 /* Similar, for the secondary name. */
1047 char *lastsecondary
;
1048 int lastsecondarylength
;
1050 /* Zero if we are not in the middle of writing an entry.
1051 One if we have written the beginning of an entry but have not
1052 yet written any page numbers into it.
1053 Greater than one if we have written the beginning of an entry
1054 plus at least one page number. */
1057 /* The initial (for sorting purposes) of the last primary entry written.
1058 When this changes, a \initial {c} line is written */
1062 int lastinitiallength
;
1064 /* When we need a string of length 1 for the value of lastinitial,
1067 char lastinitial1
[2];
1069 /* Initialize static storage for writing an index */
1075 lastinitial
= lastinitial1
;
1076 lastinitial1
[0] = 0;
1077 lastinitial1
[1] = 0;
1078 lastinitiallength
= 0;
1079 lastprimarylength
= 100;
1080 lastprimary
= (char *) xmalloc (lastprimarylength
+ 1);
1081 bzero (lastprimary
, lastprimarylength
+ 1);
1082 lastsecondarylength
= 100;
1083 lastsecondary
= (char *) xmalloc (lastsecondarylength
+ 1);
1084 bzero (lastsecondary
, lastsecondarylength
+ 1);
1087 /* Indexify. Merge entries for the same name,
1088 insert headers for each initial character, etc. */
1090 indexify (line
, ostream
)
1094 char *primary
, *secondary
, *pagenumber
;
1095 int primarylength
, secondarylength
, pagelength
;
1096 int len
= strlen (line
);
1103 /* First, analyze the parts of the entry fed to us this time */
1105 p
= find_braced_pos (line
, 0, 0, 0);
1109 /* Get length of inner pair of braces starting at p,
1110 including that inner pair of braces. */
1111 initiallength
= find_braced_end (p
+ 1) + 1 - p
;
1120 if (initial1
[0] >= 'a' && initial1
[0] <= 'z')
1124 pagenumber
= find_braced_pos (line
, 1, 0, 0);
1125 pagelength
= find_braced_end (pagenumber
) - pagenumber
;
1126 if (pagelength
== 0)
1129 primary
= find_braced_pos (line
, 2, 0, 0);
1130 primarylength
= find_braced_end (primary
) - primary
;
1132 secondary
= find_braced_pos (line
, 3, 0, 0);
1133 nosecondary
= !*secondary
;
1135 secondarylength
= find_braced_end (secondary
) - secondary
;
1137 /* If the primary is different from before, make a new primary entry */
1138 if (strncmp (primary
, lastprimary
, primarylength
))
1140 /* Close off current secondary entry first, if one is open */
1143 fputs ("}\n", ostream
);
1147 /* If this primary has a different initial, include an entry for the initial */
1148 if (initiallength
!= lastinitiallength
||
1149 strncmp (initial
, lastinitial
, initiallength
))
1151 fprintf (ostream
, "\\initial {");
1152 fwrite (initial
, 1, initiallength
, ostream
);
1153 fprintf (ostream
, "}\n", initial
);
1154 if (initial
== initial1
)
1156 lastinitial
= lastinitial1
;
1157 *lastinitial1
= *initial1
;
1161 lastinitial
= initial
;
1163 lastinitiallength
= initiallength
;
1166 /* Make the entry for the primary. */
1168 fputs ("\\entry {", ostream
);
1170 fputs ("\\primary {", ostream
);
1171 fwrite (primary
, primarylength
, 1, ostream
);
1174 fputs ("}{", ostream
);
1178 fputs ("}\n", ostream
);
1180 /* Record name of most recent primary */
1181 if (lastprimarylength
< primarylength
)
1183 lastprimarylength
= primarylength
+ 100;
1184 lastprimary
= (char *) xrealloc (lastprimary
,
1185 1 + lastprimarylength
);
1187 strncpy (lastprimary
, primary
, primarylength
);
1188 lastprimary
[primarylength
] = 0;
1190 /* There is no current secondary within this primary, now */
1191 lastsecondary
[0] = 0;
1194 /* Should not have an entry with no subtopic following one with a subtopic */
1196 if (nosecondary
&& *lastsecondary
)
1197 error ("entry %s follows an entry with a secondary name", line
);
1199 /* Start a new secondary entry if necessary */
1200 if (!nosecondary
&& strncmp (secondary
, lastsecondary
, secondarylength
))
1204 fputs ("}\n", ostream
);
1208 /* Write the entry for the secondary. */
1209 fputs ("\\secondary {", ostream
);
1210 fwrite (secondary
, secondarylength
, 1, ostream
);
1211 fputs ("}{", ostream
);
1214 /* Record name of most recent secondary */
1215 if (lastsecondarylength
< secondarylength
)
1217 lastsecondarylength
= secondarylength
+ 100;
1218 lastsecondary
= (char *) xrealloc (lastsecondary
,
1219 1 + lastsecondarylength
);
1221 strncpy (lastsecondary
, secondary
, secondarylength
);
1222 lastsecondary
[secondarylength
] = 0;
1225 /* Here to add one more page number to the current entry */
1227 fputs (", ", ostream
); /* Punctuate first, if this is not the first */
1228 fwrite (pagenumber
, pagelength
, 1, ostream
);
1231 /* Close out any unfinished output entry */
1234 finish_index (ostream
)
1238 fputs ("}\n", ostream
);
1240 free (lastsecondary
);
1243 /* Copy the lines in the sorted order.
1244 Each line is copied out of the input file it was found in. */
1247 writelines (linearray
, nlines
, ostream
)
1252 char **stop_line
= linearray
+ nlines
;
1257 /* Output the text of the lines, and free the buffer space */
1259 for (next_line
= linearray
; next_line
!= stop_line
; next_line
++)
1261 /* If -u was specified, output the line only if distinct from previous one. */
1262 if (next_line
== linearray
1263 /* Compare previous line with this one, using only the explicitly specd keyfields */
1264 || compare_general (*(next_line
- 1), *next_line
, 0L, 0L, num_keyfields
- 1))
1266 char *p
= *next_line
;
1268 while ((c
= *p
++) && c
!= '\n');
1270 indexify (*next_line
, ostream
);
1274 finish_index (ostream
);
1277 /* Assume (and optionally verify) that each input file is sorted;
1278 merge them and output the result.
1279 Returns nonzero if any input file fails to be sorted.
1281 This is the high-level interface that can handle an unlimited number of files. */
1283 #define MAX_DIRECT_MERGE 10
1286 merge_files (infiles
, nfiles
, outfile
)
1295 int start_tempcount
= tempcount
;
1297 if (nfiles
<= MAX_DIRECT_MERGE
)
1298 return merge_direct (infiles
, nfiles
, outfile
);
1300 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1301 making a temporary file to hold each group's result. */
1303 ntemps
= (nfiles
+ MAX_DIRECT_MERGE
- 1) / MAX_DIRECT_MERGE
;
1304 tempfiles
= (char **) xmalloc (ntemps
* sizeof (char *));
1305 for (i
= 0; i
< ntemps
; i
++)
1307 int nf
= MAX_DIRECT_MERGE
;
1308 if (i
+ 1 == ntemps
)
1309 nf
= nfiles
- i
* MAX_DIRECT_MERGE
;
1310 tempfiles
[i
] = maketempname (++tempcount
);
1311 value
|= merge_direct (&infiles
[i
* MAX_DIRECT_MERGE
], nf
, tempfiles
[i
]);
1314 /* All temporary files that existed before are no longer needed
1315 since their contents have been merged into our new tempfiles.
1317 flush_tempfiles (start_tempcount
);
1319 /* Now merge the temporary files we created. */
1321 merge_files (tempfiles
, ntemps
, outfile
);
1328 /* Assume (and optionally verify) that each input file is sorted;
1329 merge them and output the result.
1330 Returns nonzero if any input file fails to be sorted.
1332 This version of merging will not work if the number of
1333 input files gets too high. Higher level functions
1334 use it only with a bounded number of input files. */
1337 merge_direct (infiles
, nfiles
, outfile
)
1342 char **ip
= infiles
;
1343 struct linebuffer
*lb1
, *lb2
;
1344 struct linebuffer
**thisline
, **prevline
;
1350 struct linebuffer
*prev_out
= 0;
1351 FILE *ostream
= stdout
;
1355 ostream
= fopen (outfile
, "w");
1357 if (!ostream
) pfatal_with_name (outfile
);
1368 /* For each file, make two line buffers.
1369 Also, for each file, there is an element of `thisline'
1370 which points at any time to one of the file's two buffers,
1371 and an element of `prevline' which points to the other buffer.
1372 `thisline' is supposed to point to the next available line from the file,
1373 while `prevline' holds the last file line used,
1374 which is remembered so that we can verify that the file is properly sorted. */
1376 /* lb1 and lb2 contain one buffer each per file */
1377 lb1
= (struct linebuffer
*) xmalloc (nfiles
* sizeof (struct linebuffer
));
1378 lb2
= (struct linebuffer
*) xmalloc (nfiles
* sizeof (struct linebuffer
));
1380 /* thisline[i] points to the linebuffer holding the next available line in file i,
1381 or is zero if there are no lines left in that file. */
1382 thisline
= (struct linebuffer
**) xmalloc (nfiles
* sizeof (struct linebuffer
*));
1383 /* prevline[i] points to the linebuffer holding the last used line from file i.
1384 This is just for verifying that file i is properly sorted. */
1385 prevline
= (struct linebuffer
**) xmalloc (nfiles
* sizeof (struct linebuffer
*));
1386 /* streams[i] holds the input stream for file i. */
1387 streams
= (FILE **) xmalloc (nfiles
* sizeof (FILE *));
1388 /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
1389 file_lossage
= (int *) xmalloc (nfiles
* sizeof (int));
1391 /* Allocate and initialize all that storage */
1393 for (i
= 0; i
< nfiles
; i
++)
1395 initbuffer (&lb1
[i
]);
1396 initbuffer (&lb2
[i
]);
1397 thisline
[i
] = &lb1
[i
];
1398 prevline
[i
] = &lb2
[i
];
1399 file_lossage
[i
] = 0;
1400 streams
[i
] = fopen (infiles
[i
], "r");
1402 pfatal_with_name (infiles
[i
]);
1404 readline (thisline
[i
], streams
[i
]);
1407 /* Keep count of number of files not at eof */
1412 struct linebuffer
*best
= 0;
1413 struct linebuffer
*exch
;
1417 /* Look at the next avail line of each file; choose the least one. */
1419 for (i
= 0; i
< nfiles
; i
++)
1423 0 < compare_general (best
->buffer
, thisline
[i
]->buffer
,
1424 (long) bestfile
, (long) i
, num_keyfields
)))
1431 /* Output that line, unless it matches the previous one and we don't want duplicates */
1434 !compare_general (prev_out
->buffer
, best
->buffer
, 0L, 1L, num_keyfields
- 1)))
1435 indexify (best
->buffer
, ostream
);
1438 /* Now make the line the previous of its file, and fetch a new line from that file */
1440 exch
= prevline
[bestfile
];
1441 prevline
[bestfile
] = thisline
[bestfile
];
1442 thisline
[bestfile
] = exch
;
1446 /* If the file has no more, mark it empty */
1448 if (feof (streams
[bestfile
]))
1450 thisline
[bestfile
] = 0;
1451 nleft
--; /* Update the number of files still not empty */
1454 readline (thisline
[bestfile
], streams
[bestfile
]);
1455 if (thisline
[bestfile
]->buffer
[0] || !feof (streams
[bestfile
])) break;
1459 finish_index (ostream
);
1461 /* Free all storage and close all input streams */
1463 for (i
= 0; i
< nfiles
; i
++)
1465 fclose (streams
[i
]);
1466 free (lb1
[i
].buffer
);
1467 free (lb2
[i
].buffer
);
1469 free (file_lossage
);
1482 /* Print error message and exit. */
1491 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
1496 printf ("texindex: ");
1501 perror_with_name (name
)
1506 extern noshare
int sys_nerr
;
1507 extern noshare
char *sys_errlist
[];
1509 extern int errno
, sys_nerr
;
1510 extern char *sys_errlist
[];
1514 if (errno
< sys_nerr
)
1515 s
= concat ("", sys_errlist
[errno
], " for %s");
1517 s
= "cannot open %s";
1521 pfatal_with_name (name
)
1524 extern int errno
, sys_nerr
;
1525 extern char *sys_errlist
[];
1528 if (errno
< sys_nerr
)
1529 s
= concat ("", sys_errlist
[errno
], " for %s");
1531 s
= "cannot open %s";
1535 /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
1541 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
1542 char *result
= (char *) xmalloc (len1
+ len2
+ len3
+ 1);
1544 strcpy (result
, s1
);
1545 strcpy (result
+ len1
, s2
);
1546 strcpy (result
+ len1
+ len2
, s3
);
1547 *(result
+ len1
+ len2
+ len3
) = 0;
1552 /* Like malloc but get fatal error if memory is exhausted. */
1558 int result
= malloc (size
);
1560 fatal ("virtual memory exhausted", 0);
1566 xrealloc (ptr
, size
)
1570 int result
= realloc (ptr
, size
);
1572 fatal ("virtual memory exhausted");
1578 register int length
;
1582 long max_str
= 65535;
1584 while (length
> max_str
) {
1585 (void) LIB$
MOVC5 (&zero
, &zero
, &zero
, &max_str
, b
);
1589 (void) LIB$
MOVC5 (&zero
, &zero
, &zero
, &length
, b
);
1591 while (length
-- > 0)
1593 #endif /* not VMS */