1 /* Prepare TeX index dribble output into an actual index.
2 $Id: texindex.c,v 1.1.1.3 1998/03/24 18:20:31 law Exp $
4 Copyright (C) 1987, 91, 92, 96, 97, 98 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
24 # include "../src/config.h"
25 /* Some s/os.h files redefine these. */
32 #if !defined (HAVE_MEMSET)
34 #define memset(ptr, ignore, count) bzero (ptr, count)
42 # define TI_NO_ERROR ((1 << 28) | 1)
43 # define TI_FATAL_ERROR ((1 << 28) | 4)
44 # define unlink delete
46 # define TI_NO_ERROR 0
47 # define TI_FATAL_ERROR 1
50 #if !defined (SEEK_SET)
54 #endif /* !SEEK_SET */
56 /* When sorting in core, this structure describes one line
57 and the position and length of its first keyfield. */
60 char *text
; /* The actual text of the line. */
62 char *text
; /* The start of the key (for textual comparison). */
63 long number
; /* The numeric value (for numeric comparison). */
65 long keylen
; /* Length of KEY field. */
68 /* This structure describes a field to use as a sort key. */
71 int startwords
; /* Number of words to skip. */
72 int startchars
; /* Number of additional chars to skip. */
73 int endwords
; /* Number of words to ignore at end. */
74 int endchars
; /* Ditto for characters of last word. */
75 char ignore_blanks
; /* Non-zero means ignore spaces and tabs. */
76 char fold_case
; /* Non-zero means case doesn't matter. */
77 char reverse
; /* Non-zero means compare in reverse order. */
78 char numeric
; /* Non-zeros means field is ASCII numeric. */
79 char positional
; /* Sort according to file position. */
80 char braced
; /* Count balanced-braced groupings as fields. */
83 /* Vector of keyfields to use. */
84 struct keyfield keyfields
[3];
86 /* Number of keyfields stored in that vector. */
87 int num_keyfields
= 3;
89 /* Vector of input file names, terminated with a null pointer. */
92 /* Vector of corresponding output file names, or NULL, meaning default it
93 (add an `s' to the end). */
96 /* Length of `infiles'. */
99 /* Pointer to the array of pointers to lines being sorted. */
102 /* The allocated length of `linearray'. */
105 /* Directory to use for temporary files. On Unix, it ends with a slash. */
108 /* Start of filename to use for temporary files. */
111 /* Number of last temporary file. */
114 /* Number of last temporary file already deleted.
115 Temporary files are deleted by `flush_tempfiles' in order of creation. */
116 int last_deleted_tempcount
;
118 /* During in-core sort, this points to the base of the data block
119 which contains all the lines of data. */
122 /* Additional command switches .*/
124 /* Nonzero means do not delete tempfiles -- for debugging. */
127 /* The name this program was run with. */
130 /* Forward declarations of functions in this file. */
132 void decode_command ();
133 void sort_in_core ();
134 void sort_offline ();
139 char *find_braced_pos ();
140 char *find_braced_end ();
142 int compare_field ();
147 void pfatal_with_name ();
150 void *xmalloc (), *xrealloc ();
152 char *maketempname ();
153 void flush_tempfiles ();
156 #define MAX_IN_CORE_SORT 500000
166 last_deleted_tempcount
= 0;
168 program_name
= strrchr (argv
[0], '/');
169 if (program_name
!= (char *)NULL
)
172 program_name
= argv
[0];
174 #ifdef HAVE_SETLOCALE
175 /* Set locale via LC_ALL. */
176 setlocale (LC_ALL
, "");
179 /* Set the text message domain. */
180 bindtextdomain (PACKAGE
, LOCALEDIR
);
181 textdomain (PACKAGE
);
183 /* Describe the kind of sorting to do. */
184 /* The first keyfield uses the first braced field and folds case. */
185 keyfields
[0].braced
= 1;
186 keyfields
[0].fold_case
= 1;
187 keyfields
[0].endwords
= -1;
188 keyfields
[0].endchars
= -1;
190 /* The second keyfield uses the second braced field, numerically. */
191 keyfields
[1].braced
= 1;
192 keyfields
[1].numeric
= 1;
193 keyfields
[1].startwords
= 1;
194 keyfields
[1].endwords
= -1;
195 keyfields
[1].endchars
= -1;
197 /* The third keyfield (which is ignored while discarding duplicates)
198 compares the whole line. */
199 keyfields
[2].endwords
= -1;
200 keyfields
[2].endchars
= -1;
202 decode_command (argc
, argv
);
204 tempbase
= mktemp (concat ("txiXXXXXX", "", ""));
206 /* Process input files completely, one by one. */
208 for (i
= 0; i
< num_infiles
; i
++)
214 desc
= open (infiles
[i
], O_RDONLY
, 0);
216 pfatal_with_name (infiles
[i
]);
217 lseek (desc
, (off_t
) 0, SEEK_END
);
218 ptr
= (long) lseek (desc
, (off_t
) 0, SEEK_CUR
);
222 outfile
= outfiles
[i
];
225 outfile
= concat (infiles
[i
], "s", "");
228 if (ptr
< MAX_IN_CORE_SORT
)
229 /* Sort a small amount of data. */
230 sort_in_core (infiles
[i
], ptr
, outfile
);
232 sort_offline (infiles
[i
], ptr
, outfile
);
235 flush_tempfiles (tempcount
);
238 return 0; /* Avoid bogus warnings. */
251 TEXINDEX_OPTION texindex_options
[] = {
252 { "--keep", "-k", &keep_tempfiles
, 1, (char *)NULL
,
253 N_("keep temporary files around after processing") },
254 { "--no-keep", 0, &keep_tempfiles
, 0, (char *)NULL
,
255 N_("do not keep temporary files around after processing (default)") },
256 { "--output", "-o", (int *)NULL
, 0, "FILE",
257 N_("send output to FILE") },
258 { "--version", (char *)NULL
, (int *)NULL
, 0, (char *)NULL
,
259 N_("display version information and exit") },
260 { "--help", "-h", (int *)NULL
, 0, (char *)NULL
,
261 N_("display this help and exit") },
262 { (char *)NULL
, (char *)NULL
, (int *)NULL
, 0, (char *)NULL
}
270 FILE *f
= result_value
? stderr
: stdout
;
272 fprintf (f
, _("Usage: %s [OPTION]... FILE...\n"), program_name
);
273 fprintf (f
, _("Generate a sorted index for each TeX output FILE.\n"));
274 /* Avoid trigraph nonsense. */
275 fprintf (f
, _("Usually FILE... is `foo.??\' for a document `foo.texi'.\n"));
276 fprintf (f
, _("\nOptions:\n"));
278 for (i
= 0; texindex_options
[i
].long_name
; i
++)
280 if (texindex_options
[i
].short_name
)
281 fprintf (f
, "%s, ", texindex_options
[i
].short_name
);
284 texindex_options
[i
].long_name
,
285 texindex_options
[i
].arg_name
286 ? texindex_options
[i
].arg_name
: "");
288 fprintf (f
, "\t%s\n", _(texindex_options
[i
].doc_string
));
290 puts (_("\nEmail bug reports to bug-texinfo@gnu.org."));
295 /* Decode the command line arguments to set the parameter variables
296 and set up the vector of keyfields and the vector of input files. */
299 decode_command (argc
, argv
)
307 /* Store default values into parameter variables. */
309 tempdir
= getenv ("TMPDIR");
312 tempdir
= "sys$scratch:";
317 tempdir
= concat (tempdir
, "/", "");
322 /* Allocate ARGC input files, which must be enough. */
324 infiles
= (char **) xmalloc (argc
* sizeof (char *));
325 outfiles
= (char **) xmalloc (argc
* sizeof (char *));
329 while (arg_index
< argc
)
331 char *arg
= argv
[arg_index
++];
335 if (strcmp (arg
, "--version") == 0)
337 printf ("texindex (GNU %s) %s\n", PACKAGE
, VERSION
);
338 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
339 There is NO warranty. You may redistribute this software\n\
340 under the terms of the GNU General Public License.\n\
341 For more information about these matters, see the files named COPYING.\n"),
345 else if ((strcmp (arg
, "--keep") == 0) ||
346 (strcmp (arg
, "-k") == 0))
350 else if ((strcmp (arg
, "--help") == 0) ||
351 (strcmp (arg
, "-h") == 0))
355 else if ((strcmp (arg
, "--output") == 0) ||
356 (strcmp (arg
, "-o") == 0))
358 if (argv
[arg_index
] != (char *)NULL
)
362 *(op
- 1) = argv
[arg_index
];
373 *op
++ = (char *)NULL
;
377 /* Record number of keyfields and terminate list of filenames. */
378 num_infiles
= ip
- infiles
;
380 if (num_infiles
== 0)
384 /* Return a name for a temporary file. */
391 sprintf (tempsuffix
, "%d", count
);
392 return concat (tempdir
, tempbase
, tempsuffix
);
395 /* Delete all temporary files up to TO_COUNT. */
398 flush_tempfiles (to_count
)
403 while (last_deleted_tempcount
< to_count
)
404 unlink (maketempname (++last_deleted_tempcount
));
407 /* Copy the input file open on IDESC into a temporary file
408 and return the temporary file name. */
416 char *outfile
= maketempname (++tempcount
);
418 char buffer
[BUFSIZE
];
420 odesc
= open (outfile
, O_WRONLY
| O_CREAT
, 0666);
423 pfatal_with_name (outfile
);
427 int nread
= read (idesc
, buffer
, BUFSIZE
);
428 write (odesc
, buffer
, nread
);
438 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
441 compare_full (line1
, line2
)
442 char **line1
, **line2
;
446 /* Compare using the first keyfield;
447 if that does not distinguish the lines, try the second keyfield;
450 for (i
= 0; i
< num_keyfields
; i
++)
452 long length1
, length2
;
453 char *start1
= find_field (&keyfields
[i
], *line1
, &length1
);
454 char *start2
= find_field (&keyfields
[i
], *line2
, &length2
);
455 int tem
= compare_field (&keyfields
[i
], start1
, length1
, *line1
- text_base
,
456 start2
, length2
, *line2
- text_base
);
459 if (keyfields
[i
].reverse
)
465 return 0; /* Lines match exactly. */
468 /* Compare LINE1 and LINE2, described by structures
469 in which the first keyfield is identified in advance.
470 For positional sorting, assumes that the order of the lines in core
471 reflects their nominal order. */
474 compare_prepared (line1
, line2
)
475 struct lineinfo
*line1
, *line2
;
481 /* Compare using the first keyfield, which has been found for us already. */
482 if (keyfields
->positional
)
484 if (line1
->text
- text_base
> line2
->text
- text_base
)
489 else if (keyfields
->numeric
)
490 tem
= line1
->key
.number
- line2
->key
.number
;
492 tem
= compare_field (keyfields
, line1
->key
.text
, line1
->keylen
, 0,
493 line2
->key
.text
, line2
->keylen
, 0);
496 if (keyfields
->reverse
)
504 /* Compare using the second keyfield;
505 if that does not distinguish the lines, try the third keyfield;
508 for (i
= 1; i
< num_keyfields
; i
++)
510 long length1
, length2
;
511 char *start1
= find_field (&keyfields
[i
], text1
, &length1
);
512 char *start2
= find_field (&keyfields
[i
], text2
, &length2
);
513 int tem
= compare_field (&keyfields
[i
], start1
, length1
, text1
- text_base
,
514 start2
, length2
, text2
- text_base
);
517 if (keyfields
[i
].reverse
)
523 return 0; /* Lines match exactly. */
526 /* Like compare_full but more general.
527 You can pass any strings, and you can say how many keyfields to use.
528 POS1 and POS2 should indicate the nominal positional ordering of
529 the two lines in the input. */
532 compare_general (str1
, str2
, pos1
, pos2
, use_keyfields
)
539 /* Compare using the first keyfield;
540 if that does not distinguish the lines, try the second keyfield;
543 for (i
= 0; i
< use_keyfields
; i
++)
545 long length1
, length2
;
546 char *start1
= find_field (&keyfields
[i
], str1
, &length1
);
547 char *start2
= find_field (&keyfields
[i
], str2
, &length2
);
548 int tem
= compare_field (&keyfields
[i
], start1
, length1
, pos1
,
549 start2
, length2
, pos2
);
552 if (keyfields
[i
].reverse
)
558 return 0; /* Lines match exactly. */
561 /* Find the start and length of a field in STR according to KEYFIELD.
562 A pointer to the starting character is returned, and the length
563 is stored into the int that LENGTHPTR points to. */
566 find_field (keyfield
, str
, lengthptr
)
567 struct keyfield
*keyfield
;
575 if (keyfield
->braced
)
576 fun
= find_braced_pos
;
580 start
= (*fun
) (str
, keyfield
->startwords
, keyfield
->startchars
,
581 keyfield
->ignore_blanks
);
582 if (keyfield
->endwords
< 0)
584 if (keyfield
->braced
)
585 end
= find_braced_end (start
);
589 while (*end
&& *end
!= '\n')
595 end
= (*fun
) (str
, keyfield
->endwords
, keyfield
->endchars
, 0);
596 if (end
- str
< start
- str
)
599 *lengthptr
= end
- start
;
603 /* Return a pointer to a specified place within STR,
604 skipping (from the beginning) WORDS words and then CHARS chars.
605 If IGNORE_BLANKS is nonzero, we skip all blanks
606 after finding the specified word. */
609 find_pos (str
, words
, chars
, ignore_blanks
)
617 for (i
= 0; i
< words
; i
++)
620 /* Find next bunch of nonblanks and skip them. */
621 while ((c
= *p
) == ' ' || c
== '\t')
623 while ((c
= *p
) && c
!= '\n' && !(c
== ' ' || c
== '\t'))
625 if (!*p
|| *p
== '\n')
629 while (*p
== ' ' || *p
== '\t')
632 for (i
= 0; i
< chars
; i
++)
634 if (!*p
|| *p
== '\n')
641 /* Like find_pos but assumes that each field is surrounded by braces
642 and that braces within fields are balanced. */
645 find_braced_pos (str
, words
, chars
, ignore_blanks
)
655 for (i
= 0; i
< words
; i
++)
658 while ((c
= *p
++) != '{' && c
!= '\n' && c
)
669 if (c
== 0 || c
== '\n')
674 while ((c
= *p
++) != '{' && c
!= '\n' && c
)
681 while ((c
= *p
) == ' ' || c
== '\t')
684 for (i
= 0; i
< chars
; i
++)
686 if (!*p
|| *p
== '\n')
693 /* Find the end of the balanced-brace field which starts at STR.
694 The position returned is just before the closing brace. */
697 find_braced_end (str
)
712 if (c
== 0 || c
== '\n')
719 find_value (start
, length
)
725 if (isdigit (*start
))
733 /* Vector used to translate characters for comparison.
734 This is how we make all alphanumerics follow all else,
735 and ignore case in the first sorting. */
742 for (i
= 1; i
< 256; i
++)
745 for (i
= '0'; i
<= '9'; i
++)
746 char_order
[i
] += 512;
748 for (i
= 'a'; i
<= 'z'; i
++)
750 char_order
[i
] = 512 + i
;
751 char_order
[i
+ 'A' - 'a'] = 512 + i
;
755 /* Compare two fields (each specified as a start pointer and a character count)
756 according to KEYFIELD.
757 The sign of the value reports the relation between the fields. */
760 compare_field (keyfield
, start1
, length1
, pos1
, start2
, length2
, pos2
)
761 struct keyfield
*keyfield
;
769 if (keyfields
->positional
)
776 if (keyfield
->numeric
)
778 long value
= find_value (start1
, length1
) - find_value (start2
, length2
);
789 char *e1
= start1
+ length1
;
790 char *e2
= start2
+ length2
;
805 if (char_order
[c1
] != char_order
[c2
])
806 return char_order
[c1
] - char_order
[c2
];
811 /* Strings are equal except possibly for case. */
828 /* Reverse sign here so upper case comes out last. */
838 /* A `struct linebuffer' is a structure which holds a line of text.
839 `readline' reads a line from a stream into a linebuffer
840 and works regardless of the length of the line. */
848 /* Initialize LINEBUFFER for use. */
851 initbuffer (linebuffer
)
852 struct linebuffer
*linebuffer
;
854 linebuffer
->size
= 200;
855 linebuffer
->buffer
= (char *) xmalloc (200);
858 /* Read a line of text from STREAM into LINEBUFFER.
859 Return the length of the line. */
862 readline (linebuffer
, stream
)
863 struct linebuffer
*linebuffer
;
866 char *buffer
= linebuffer
->buffer
;
867 char *p
= linebuffer
->buffer
;
868 char *end
= p
+ linebuffer
->size
;
872 int c
= getc (stream
);
875 buffer
= (char *) xrealloc (buffer
, linebuffer
->size
*= 2);
876 p
+= buffer
- linebuffer
->buffer
;
877 end
+= buffer
- linebuffer
->buffer
;
878 linebuffer
->buffer
= buffer
;
880 if (c
< 0 || c
== '\n')
891 /* Sort an input file too big to sort in core. */
894 sort_offline (infile
, nfiles
, total
, outfile
)
900 /* More than enough. */
901 int ntemps
= 2 * (total
+ MAX_IN_CORE_SORT
- 1) / MAX_IN_CORE_SORT
;
902 char **tempfiles
= (char **) xmalloc (ntemps
* sizeof (char *));
903 FILE *istream
= fopen (infile
, "r");
905 struct linebuffer lb
;
911 /* Read in one line of input data. */
913 linelength
= readline (&lb
, istream
);
915 if (lb
.buffer
[0] != '\\' && lb
.buffer
[0] != '@')
917 error (_("%s: not a texinfo index file"), infile
);
921 /* Split up the input into `ntemps' temporary files, or maybe fewer,
922 and put the new files' names into `tempfiles' */
924 for (i
= 0; i
< ntemps
; i
++)
926 char *outname
= maketempname (++tempcount
);
927 FILE *ostream
= fopen (outname
, "w");
931 pfatal_with_name (outname
);
932 tempfiles
[i
] = outname
;
934 /* Copy lines into this temp file as long as it does not make file
935 "too big" or until there are no more lines. */
937 while (tempsize
+ linelength
+ 1 <= MAX_IN_CORE_SORT
)
939 tempsize
+= linelength
+ 1;
940 fputs (lb
.buffer
, ostream
);
941 putc ('\n', ostream
);
943 /* Read another line of input data. */
945 linelength
= readline (&lb
, istream
);
946 if (!linelength
&& feof (istream
))
949 if (lb
.buffer
[0] != '\\' && lb
.buffer
[0] != '@')
951 error (_("%s: not a texinfo index file"), infile
);
964 /* Record number of temp files we actually needed. */
968 /* Sort each tempfile into another tempfile.
969 Delete the first set of tempfiles and put the names of the second
972 for (i
= 0; i
< ntemps
; i
++)
974 char *newtemp
= maketempname (++tempcount
);
975 sort_in_core (&tempfiles
[i
], MAX_IN_CORE_SORT
, newtemp
);
977 unlink (tempfiles
[i
]);
978 tempfiles
[i
] = newtemp
;
984 /* Merge the tempfiles together and indexify. */
986 merge_files (tempfiles
, ntemps
, outfile
);
989 /* Sort INFILE, whose size is TOTAL,
990 assuming that is small enough to be done in-core,
991 then indexify it and send the output to OUTFILE (or to stdout). */
994 sort_in_core (infile
, total
, outfile
)
1000 char *data
= (char *) xmalloc (total
+ 1);
1004 FILE *ostream
= stdout
;
1005 struct lineinfo
*lineinfo
;
1007 /* Read the contents of the file into the moby array `data'. */
1009 int desc
= open (infile
, O_RDONLY
, 0);
1012 fatal (_("failure reopening %s"), infile
);
1013 for (file_size
= 0;;)
1015 i
= read (desc
, data
+ file_size
, total
- file_size
);
1021 data
[file_size
] = 0;
1025 if (file_size
> 0 && data
[0] != '\\' && data
[0] != '@')
1027 error (_("%s: not a texinfo index file"), infile
);
1033 /* Sort routines want to know this address. */
1037 /* Create the array of pointers to lines, with a default size
1038 frequently enough. */
1040 nlines
= total
/ 50;
1043 linearray
= (char **) xmalloc (nlines
* sizeof (char *));
1045 /* `nextline' points to the next free slot in this array.
1046 `nlines' is the allocated size. */
1048 nextline
= linearray
;
1050 /* Parse the input file's data, and make entries for the lines. */
1052 nextline
= parsefile (infile
, nextline
, file_data
, file_size
);
1055 error (_("%s: not a texinfo index file"), infile
);
1059 /* Sort the lines. */
1061 /* If we have enough space, find the first keyfield of each line in advance.
1062 Make a `struct lineinfo' for each line, which records the keyfield
1063 as well as the line, and sort them. */
1065 lineinfo
= (struct lineinfo
*) malloc ((nextline
- linearray
) * sizeof (struct lineinfo
));
1069 struct lineinfo
*lp
;
1072 for (lp
= lineinfo
, p
= linearray
; p
!= nextline
; lp
++, p
++)
1075 lp
->key
.text
= find_field (keyfields
, *p
, &lp
->keylen
);
1076 if (keyfields
->numeric
)
1077 lp
->key
.number
= find_value (lp
->key
.text
, lp
->keylen
);
1080 qsort (lineinfo
, nextline
- linearray
, sizeof (struct lineinfo
),
1083 for (lp
= lineinfo
, p
= linearray
; p
!= nextline
; lp
++, p
++)
1089 qsort (linearray
, nextline
- linearray
, sizeof (char *), compare_full
);
1091 /* Open the output file. */
1095 ostream
= fopen (outfile
, "w");
1097 pfatal_with_name (outfile
);
1100 writelines (linearray
, nextline
- linearray
, ostream
);
1108 /* Parse an input string in core into lines.
1109 DATA is the input string, and SIZE is its length.
1110 Data goes in LINEARRAY starting at NEXTLINE.
1111 The value returned is the first entry in LINEARRAY still unused.
1112 Value 0 means input file contents are invalid. */
1115 parsefile (filename
, nextline
, data
, size
)
1122 char **line
= nextline
;
1130 if (p
[0] != '\\' && p
[0] != '@')
1134 while (*p
&& *p
!= '\n')
1140 if (line
== linearray
+ nlines
)
1142 char **old
= linearray
;
1143 linearray
= (char **) xrealloc (linearray
, sizeof (char *) * (nlines
*= 4));
1144 line
+= linearray
- old
;
1151 /* Indexification is a filter applied to the sorted lines
1152 as they are being written to the output file.
1153 Multiple entries for the same name, with different page numbers,
1154 get combined into a single entry with multiple page numbers.
1155 The first braced field, which is used for sorting, is discarded.
1156 However, its first character is examined, folded to lower case,
1157 and if it is different from that in the previous line fed to us
1158 a \initial line is written with one argument, the new initial.
1160 If an entry has four braced fields, then the second and third
1161 constitute primary and secondary names.
1162 In this case, each change of primary name
1163 generates a \primary line which contains only the primary name,
1164 and in between these are \secondary lines which contain
1165 just a secondary name and page numbers. */
1167 /* The last primary name we wrote a \primary entry for.
1168 If only one level of indexing is being done, this is the last name seen. */
1170 /* Length of storage allocated for lastprimary. */
1171 int lastprimarylength
;
1173 /* Similar, for the secondary name. */
1174 char *lastsecondary
;
1175 int lastsecondarylength
;
1177 /* Zero if we are not in the middle of writing an entry.
1178 One if we have written the beginning of an entry but have not
1179 yet written any page numbers into it.
1180 Greater than one if we have written the beginning of an entry
1181 plus at least one page number. */
1184 /* The initial (for sorting purposes) of the last primary entry written.
1185 When this changes, a \initial {c} line is written */
1189 int lastinitiallength
;
1191 /* When we need a string of length 1 for the value of lastinitial,
1194 char lastinitial1
[2];
1196 /* Initialize static storage for writing an index. */
1202 lastinitial
= lastinitial1
;
1203 lastinitial1
[0] = 0;
1204 lastinitial1
[1] = 0;
1205 lastinitiallength
= 0;
1206 lastprimarylength
= 100;
1207 lastprimary
= (char *) xmalloc (lastprimarylength
+ 1);
1208 memset (lastprimary
, '\0', lastprimarylength
+ 1);
1209 lastsecondarylength
= 100;
1210 lastsecondary
= (char *) xmalloc (lastsecondarylength
+ 1);
1211 memset (lastsecondary
, '\0', lastsecondarylength
+ 1);
1214 /* Indexify. Merge entries for the same name,
1215 insert headers for each initial character, etc. */
1218 indexify (line
, ostream
)
1222 char *primary
, *secondary
, *pagenumber
;
1223 int primarylength
, secondarylength
= 0, pagelength
;
1230 /* First, analyze the parts of the entry fed to us this time. */
1232 p
= find_braced_pos (line
, 0, 0, 0);
1236 /* Get length of inner pair of braces starting at `p',
1237 including that inner pair of braces. */
1238 initiallength
= find_braced_end (p
+ 1) + 1 - p
;
1247 if (initial1
[0] >= 'a' && initial1
[0] <= 'z')
1251 pagenumber
= find_braced_pos (line
, 1, 0, 0);
1252 pagelength
= find_braced_end (pagenumber
) - pagenumber
;
1253 if (pagelength
== 0)
1256 primary
= find_braced_pos (line
, 2, 0, 0);
1257 primarylength
= find_braced_end (primary
) - primary
;
1259 secondary
= find_braced_pos (line
, 3, 0, 0);
1260 nosecondary
= !*secondary
;
1262 secondarylength
= find_braced_end (secondary
) - secondary
;
1264 /* If the primary is different from before, make a new primary entry. */
1265 if (strncmp (primary
, lastprimary
, primarylength
))
1267 /* Close off current secondary entry first, if one is open. */
1270 fputs ("}\n", ostream
);
1274 /* If this primary has a different initial, include an entry for
1276 if (initiallength
!= lastinitiallength
||
1277 strncmp (initial
, lastinitial
, initiallength
))
1279 fprintf (ostream
, "\\initial {");
1280 fwrite (initial
, 1, initiallength
, ostream
);
1281 fputs ("}\n", ostream
);
1282 if (initial
== initial1
)
1284 lastinitial
= lastinitial1
;
1285 *lastinitial1
= *initial1
;
1289 lastinitial
= initial
;
1291 lastinitiallength
= initiallength
;
1294 /* Make the entry for the primary. */
1296 fputs ("\\entry {", ostream
);
1298 fputs ("\\primary {", ostream
);
1299 fwrite (primary
, primarylength
, 1, ostream
);
1302 fputs ("}{", ostream
);
1306 fputs ("}\n", ostream
);
1308 /* Record name of most recent primary. */
1309 if (lastprimarylength
< primarylength
)
1311 lastprimarylength
= primarylength
+ 100;
1312 lastprimary
= (char *) xrealloc (lastprimary
,
1313 1 + lastprimarylength
);
1315 strncpy (lastprimary
, primary
, primarylength
);
1316 lastprimary
[primarylength
] = 0;
1318 /* There is no current secondary within this primary, now. */
1319 lastsecondary
[0] = 0;
1322 /* Should not have an entry with no subtopic following one with a subtopic. */
1324 if (nosecondary
&& *lastsecondary
)
1325 error (_("entry %s follows an entry with a secondary name"), line
);
1327 /* Start a new secondary entry if necessary. */
1328 if (!nosecondary
&& strncmp (secondary
, lastsecondary
, secondarylength
))
1332 fputs ("}\n", ostream
);
1336 /* Write the entry for the secondary. */
1337 fputs ("\\secondary {", ostream
);
1338 fwrite (secondary
, secondarylength
, 1, ostream
);
1339 fputs ("}{", ostream
);
1342 /* Record name of most recent secondary. */
1343 if (lastsecondarylength
< secondarylength
)
1345 lastsecondarylength
= secondarylength
+ 100;
1346 lastsecondary
= (char *) xrealloc (lastsecondary
,
1347 1 + lastsecondarylength
);
1349 strncpy (lastsecondary
, secondary
, secondarylength
);
1350 lastsecondary
[secondarylength
] = 0;
1353 /* Here to add one more page number to the current entry. */
1355 fputs (", ", ostream
); /* Punctuate first, if this is not the first. */
1356 fwrite (pagenumber
, pagelength
, 1, ostream
);
1359 /* Close out any unfinished output entry. */
1362 finish_index (ostream
)
1366 fputs ("}\n", ostream
);
1368 free (lastsecondary
);
1371 /* Copy the lines in the sorted order.
1372 Each line is copied out of the input file it was found in. */
1375 writelines (linearray
, nlines
, ostream
)
1380 char **stop_line
= linearray
+ nlines
;
1385 /* Output the text of the lines, and free the buffer space. */
1387 for (next_line
= linearray
; next_line
!= stop_line
; next_line
++)
1389 /* If -u was specified, output the line only if distinct from previous one. */
1390 if (next_line
== linearray
1391 /* Compare previous line with this one, using only the
1392 explicitly specd keyfields. */
1393 || compare_general (*(next_line
- 1), *next_line
, 0L, 0L, num_keyfields
- 1))
1395 char *p
= *next_line
;
1398 while ((c
= *p
++) && c
!= '\n')
1401 indexify (*next_line
, ostream
);
1405 finish_index (ostream
);
1408 /* Assume (and optionally verify) that each input file is sorted;
1409 merge them and output the result.
1410 Returns nonzero if any input file fails to be sorted.
1412 This is the high-level interface that can handle an unlimited
1415 #define MAX_DIRECT_MERGE 10
1418 merge_files (infiles
, nfiles
, outfile
)
1427 int start_tempcount
= tempcount
;
1429 if (nfiles
<= MAX_DIRECT_MERGE
)
1430 return merge_direct (infiles
, nfiles
, outfile
);
1432 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1433 making a temporary file to hold each group's result. */
1435 ntemps
= (nfiles
+ MAX_DIRECT_MERGE
- 1) / MAX_DIRECT_MERGE
;
1436 tempfiles
= (char **) xmalloc (ntemps
* sizeof (char *));
1437 for (i
= 0; i
< ntemps
; i
++)
1439 int nf
= MAX_DIRECT_MERGE
;
1440 if (i
+ 1 == ntemps
)
1441 nf
= nfiles
- i
* MAX_DIRECT_MERGE
;
1442 tempfiles
[i
] = maketempname (++tempcount
);
1443 value
|= merge_direct (&infiles
[i
* MAX_DIRECT_MERGE
], nf
, tempfiles
[i
]);
1446 /* All temporary files that existed before are no longer needed
1447 since their contents have been merged into our new tempfiles.
1449 flush_tempfiles (start_tempcount
);
1451 /* Now merge the temporary files we created. */
1453 merge_files (tempfiles
, ntemps
, outfile
);
1460 /* Assume (and optionally verify) that each input file is sorted;
1461 merge them and output the result.
1462 Returns nonzero if any input file fails to be sorted.
1464 This version of merging will not work if the number of
1465 input files gets too high. Higher level functions
1466 use it only with a bounded number of input files. */
1469 merge_direct (infiles
, nfiles
, outfile
)
1474 struct linebuffer
*lb1
, *lb2
;
1475 struct linebuffer
**thisline
, **prevline
;
1481 struct linebuffer
*prev_out
= 0;
1482 FILE *ostream
= stdout
;
1486 ostream
= fopen (outfile
, "w");
1489 pfatal_with_name (outfile
);
1500 /* For each file, make two line buffers.
1501 Also, for each file, there is an element of `thisline'
1502 which points at any time to one of the file's two buffers,
1503 and an element of `prevline' which points to the other buffer.
1504 `thisline' is supposed to point to the next available line from the file,
1505 while `prevline' holds the last file line used,
1506 which is remembered so that we can verify that the file is properly sorted. */
1508 /* lb1 and lb2 contain one buffer each per file. */
1509 lb1
= (struct linebuffer
*) xmalloc (nfiles
* sizeof (struct linebuffer
));
1510 lb2
= (struct linebuffer
*) xmalloc (nfiles
* sizeof (struct linebuffer
));
1512 /* thisline[i] points to the linebuffer holding the next available line in file i,
1513 or is zero if there are no lines left in that file. */
1514 thisline
= (struct linebuffer
**)
1515 xmalloc (nfiles
* sizeof (struct linebuffer
*));
1516 /* prevline[i] points to the linebuffer holding the last used line
1517 from file i. This is just for verifying that file i is properly
1519 prevline
= (struct linebuffer
**)
1520 xmalloc (nfiles
* sizeof (struct linebuffer
*));
1521 /* streams[i] holds the input stream for file i. */
1522 streams
= (FILE **) xmalloc (nfiles
* sizeof (FILE *));
1523 /* file_lossage[i] is nonzero if we already know file i is not
1525 file_lossage
= (int *) xmalloc (nfiles
* sizeof (int));
1527 /* Allocate and initialize all that storage. */
1529 for (i
= 0; i
< nfiles
; i
++)
1531 initbuffer (&lb1
[i
]);
1532 initbuffer (&lb2
[i
]);
1533 thisline
[i
] = &lb1
[i
];
1534 prevline
[i
] = &lb2
[i
];
1535 file_lossage
[i
] = 0;
1536 streams
[i
] = fopen (infiles
[i
], "r");
1538 pfatal_with_name (infiles
[i
]);
1540 readline (thisline
[i
], streams
[i
]);
1543 /* Keep count of number of files not at eof. */
1548 struct linebuffer
*best
= 0;
1549 struct linebuffer
*exch
;
1553 /* Look at the next avail line of each file; choose the least one. */
1555 for (i
= 0; i
< nfiles
; i
++)
1559 0 < compare_general (best
->buffer
, thisline
[i
]->buffer
,
1560 (long) bestfile
, (long) i
, num_keyfields
)))
1567 /* Output that line, unless it matches the previous one and we
1568 don't want duplicates. */
1571 !compare_general (prev_out
->buffer
,
1572 best
->buffer
, 0L, 1L, num_keyfields
- 1)))
1573 indexify (best
->buffer
, ostream
);
1576 /* Now make the line the previous of its file, and fetch a new
1577 line from that file. */
1579 exch
= prevline
[bestfile
];
1580 prevline
[bestfile
] = thisline
[bestfile
];
1581 thisline
[bestfile
] = exch
;
1585 /* If the file has no more, mark it empty. */
1587 if (feof (streams
[bestfile
]))
1589 thisline
[bestfile
] = 0;
1590 /* Update the number of files still not empty. */
1594 readline (thisline
[bestfile
], streams
[bestfile
]);
1595 if (thisline
[bestfile
]->buffer
[0] || !feof (streams
[bestfile
]))
1600 finish_index (ostream
);
1602 /* Free all storage and close all input streams. */
1604 for (i
= 0; i
< nfiles
; i
++)
1606 fclose (streams
[i
]);
1607 free (lb1
[i
].buffer
);
1608 free (lb2
[i
].buffer
);
1610 free (file_lossage
);
1623 /* Print error message and exit. */
1629 error (format
, arg
);
1630 exit (TI_FATAL_ERROR
);
1633 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1638 printf ("%s: ", program_name
);
1639 printf (format
, arg
);
1640 if (format
[strlen (format
) -1] != '\n')
1645 perror_with_name (name
)
1650 s
= strerror (errno
);
1651 printf ("%s: ", program_name
);
1652 printf ("%s; for file `%s'.\n", s
, name
);
1656 pfatal_with_name (name
)
1661 s
= strerror (errno
);
1662 printf ("%s: ", program_name
);
1663 printf (_("%s; for file `%s'.\n"), s
, name
);
1664 exit (TI_FATAL_ERROR
);
1667 /* Return a newly-allocated string whose contents concatenate those of
1674 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
1675 char *result
= (char *) xmalloc (len1
+ len2
+ len3
+ 1);
1677 strcpy (result
, s1
);
1678 strcpy (result
+ len1
, s2
);
1679 strcpy (result
+ len1
+ len2
, s3
);
1680 *(result
+ len1
+ len2
+ len3
) = 0;
1685 #if !defined (HAVE_STRERROR)
1686 extern char *sys_errlist
[];
1687 extern int sys_nerr
;
1693 if (num
>= sys_nerr
)
1696 return (sys_errlist
[num
]);
1698 #endif /* !HAVE_STRERROR */
1700 #if !defined (HAVE_STRCHR)
1702 strrchr (string
, character
)
1708 for (i
= strlen (string
) - 1; i
> -1; i
--)
1709 if (string
[i
] == character
)
1710 return (string
+ i
);
1712 return ((char *)NULL
);
1714 #endif /* HAVE_STRCHR */
1717 memory_error (callers_name
, bytes_wanted
)
1721 char printable_string
[80];
1723 sprintf (printable_string
,
1724 _("Virtual memory exhausted in %s ()! Needed %d bytes."),
1725 callers_name
, bytes_wanted
);
1727 error (printable_string
);
1731 /* Just like malloc, but kills the program in case of fatal error. */
1736 void *temp
= (void *) malloc (nbytes
);
1738 if (nbytes
&& temp
== (void *)NULL
)
1739 memory_error ("xmalloc", nbytes
);
1744 /* Like realloc (), but barfs if there isn't enough memory. */
1746 xrealloc (pointer
, nbytes
)
1753 temp
= (void *)xmalloc (nbytes
);
1755 temp
= (void *)realloc (pointer
, nbytes
);
1757 if (nbytes
&& !temp
)
1758 memory_error ("xrealloc", nbytes
);