amd64 port: mainly on the pmap headers, identify_cpu and initcpu
[dragonfly/port-amd64.git] / contrib / diffutils-2.8 / src / io.c
blob3ddc1f8ab3ecf2e2f7174545ebcdc2c1ff508d43
1 /* File I/O for GNU DIFF.
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4 2004 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "diff.h"
24 #include <cmpbuf.h>
25 #include <file-type.h>
26 #include <setmode.h>
27 #include <xalloc.h>
29 /* Rotate an unsigned value to the left. */
30 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
32 /* Given a hash value and a new character, return a new hash value. */
33 #define HASH(h, c) ((c) + ROL (h, 7))
35 /* The type of a hash value. */
36 typedef size_t hash_value;
37 verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
39 /* Lines are put into equivalence classes of lines that match in lines_differ.
40 Each equivalence class is represented by one of these structures,
41 but only while the classes are being computed.
42 Afterward, each class is represented by a number. */
43 struct equivclass
45 lin next; /* Next item in this bucket. */
46 hash_value hash; /* Hash of lines in this class. */
47 char const *line; /* A line that fits this class. */
48 size_t length; /* That line's length, not counting its newline. */
51 /* Hash-table: array of buckets, each being a chain of equivalence classes.
52 buckets[-1] is reserved for incomplete lines. */
53 static lin *buckets;
55 /* Number of buckets in the hash table array, not counting buckets[-1]. */
56 static size_t nbuckets;
58 /* Array in which the equivalence classes are allocated.
59 The bucket-chains go through the elements in this array.
60 The number of an equivalence class is its index in this array. */
61 static struct equivclass *equivs;
63 /* Index of first free element in the array `equivs'. */
64 static lin equivs_index;
66 /* Number of elements allocated in the array `equivs'. */
67 static lin equivs_alloc;
69 /* Read a block of data into a file buffer, checking for EOF and error. */
71 void
72 file_block_read (struct file_data *current, size_t size)
74 if (size && ! current->eof)
76 size_t s = block_read (current->desc,
77 FILE_BUFFER (current) + current->buffered, size);
78 if (s == SIZE_MAX)
79 pfatal_with_name (current->name);
80 current->buffered += s;
81 current->eof = s < size;
85 /* Check for binary files and compare them for exact identity. */
87 /* Return 1 if BUF contains a non text character.
88 SIZE is the number of characters in BUF. */
90 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
92 /* Get ready to read the current file.
93 Return nonzero if SKIP_TEST is zero,
94 and if it appears to be a binary file. */
96 static bool
97 sip (struct file_data *current, bool skip_test)
99 /* If we have a nonexistent file at this stage, treat it as empty. */
100 if (current->desc < 0)
102 /* Leave room for a sentinel. */
103 current->bufsize = sizeof (word);
104 current->buffer = xmalloc (current->bufsize);
106 else
108 current->bufsize = buffer_lcm (sizeof (word),
109 STAT_BLOCKSIZE (current->stat),
110 PTRDIFF_MAX - 2 * sizeof (word));
111 current->buffer = xmalloc (current->bufsize);
113 if (! skip_test)
115 /* Check first part of file to see if it's a binary file. */
117 bool was_binary = set_binary_mode (current->desc, true);
118 off_t buffered;
119 file_block_read (current, current->bufsize);
120 buffered = current->buffered;
122 if (! was_binary)
124 /* Revert to text mode and seek back to the beginning to
125 reread the file. Use relative seek, since file
126 descriptors like stdin might not start at offset
127 zero. */
129 if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
130 pfatal_with_name (current->name);
131 set_binary_mode (current->desc, false);
132 current->buffered = 0;
133 current->eof = false;
136 return binary_file_p (current->buffer, buffered);
140 current->buffered = 0;
141 current->eof = false;
142 return false;
145 /* Slurp the rest of the current file completely into memory. */
147 static void
148 slurp (struct file_data *current)
150 size_t cc;
152 if (current->desc < 0)
154 /* The file is nonexistent. */
155 return;
158 if (S_ISREG (current->stat.st_mode))
160 /* It's a regular file; slurp in the rest all at once. */
162 /* Get the size out of the stat block.
163 Allocate just enough room for appended newline plus word sentinel,
164 plus word-alignment since we want the buffer word-aligned. */
165 size_t file_size = current->stat.st_size;
166 cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
167 if (file_size != current->stat.st_size || cc < file_size
168 || PTRDIFF_MAX <= cc)
169 xalloc_die ();
171 if (current->bufsize < cc)
173 current->bufsize = cc;
174 current->buffer = xrealloc (current->buffer, cc);
177 /* Try to read at least 1 more byte than the size indicates, to
178 detect whether the file is growing. This is a nicety for
179 users who run 'diff' on files while they are changing. */
181 if (current->buffered <= file_size)
183 file_block_read (current, file_size + 1 - current->buffered);
184 if (current->buffered <= file_size)
185 return;
189 /* It's not a regular file, or it's a growing regular file; read it,
190 growing the buffer as needed. */
192 file_block_read (current, current->bufsize - current->buffered);
194 if (current->buffered)
196 while (current->buffered == current->bufsize)
198 if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
199 xalloc_die ();
200 current->bufsize *= 2;
201 current->buffer = xrealloc (current->buffer, current->bufsize);
202 file_block_read (current, current->bufsize - current->buffered);
205 /* Allocate just enough room for appended newline plus word
206 sentinel, plus word-alignment. */
207 cc = current->buffered + 2 * sizeof (word);
208 current->bufsize = cc - cc % sizeof (word);
209 current->buffer = xrealloc (current->buffer, current->bufsize);
213 /* Split the file into lines, simultaneously computing the equivalence
214 class for each line. */
216 static void
217 find_and_hash_each_line (struct file_data *current)
219 hash_value h;
220 char const *p = current->prefix_end;
221 unsigned char c;
222 lin i, *bucket;
223 size_t length;
225 /* Cache often-used quantities in local variables to help the compiler. */
226 char const **linbuf = current->linbuf;
227 lin alloc_lines = current->alloc_lines;
228 lin line = 0;
229 lin linbuf_base = current->linbuf_base;
230 lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
231 struct equivclass *eqs = equivs;
232 lin eqs_index = equivs_index;
233 lin eqs_alloc = equivs_alloc;
234 char const *suffix_begin = current->suffix_begin;
235 char const *bufend = FILE_BUFFER (current) + current->buffered;
236 bool diff_length_compare_anyway =
237 ignore_white_space != IGNORE_NO_WHITE_SPACE;
238 bool same_length_diff_contents_compare_anyway =
239 diff_length_compare_anyway | ignore_case;
241 while (p < suffix_begin)
243 char const *ip = p;
245 h = 0;
247 /* Hash this line until we find a newline. */
248 if (ignore_case)
249 switch (ignore_white_space)
251 case IGNORE_ALL_SPACE:
252 while ((c = *p++) != '\n')
253 if (! isspace (c))
254 h = HASH (h, tolower (c));
255 break;
257 case IGNORE_SPACE_CHANGE:
258 while ((c = *p++) != '\n')
260 if (isspace (c))
263 if ((c = *p++) == '\n')
264 goto hashing_done;
265 while (isspace (c));
267 h = HASH (h, ' ');
270 /* C is now the first non-space. */
271 h = HASH (h, tolower (c));
273 break;
275 case IGNORE_TAB_EXPANSION:
277 size_t column = 0;
278 while ((c = *p++) != '\n')
280 size_t repetitions = 1;
282 switch (c)
284 case '\b':
285 column -= 0 < column;
286 break;
288 case '\t':
289 c = ' ';
290 repetitions = tabsize - column % tabsize;
291 column = (column + repetitions < column
293 : column + repetitions);
294 break;
296 case '\r':
297 column = 0;
298 break;
300 default:
301 c = tolower (c);
302 column++;
303 break;
307 h = HASH (h, c);
308 while (--repetitions != 0);
311 break;
313 default:
314 while ((c = *p++) != '\n')
315 h = HASH (h, tolower (c));
316 break;
318 else
319 switch (ignore_white_space)
321 case IGNORE_ALL_SPACE:
322 while ((c = *p++) != '\n')
323 if (! isspace (c))
324 h = HASH (h, c);
325 break;
327 case IGNORE_SPACE_CHANGE:
328 while ((c = *p++) != '\n')
330 if (isspace (c))
333 if ((c = *p++) == '\n')
334 goto hashing_done;
335 while (isspace (c));
337 h = HASH (h, ' ');
340 /* C is now the first non-space. */
341 h = HASH (h, c);
343 break;
345 case IGNORE_TAB_EXPANSION:
347 size_t column = 0;
348 while ((c = *p++) != '\n')
350 size_t repetitions = 1;
352 switch (c)
354 case '\b':
355 column -= 0 < column;
356 break;
358 case '\t':
359 c = ' ';
360 repetitions = tabsize - column % tabsize;
361 column = (column + repetitions < column
363 : column + repetitions);
364 break;
366 case '\r':
367 column = 0;
368 break;
370 default:
371 column++;
372 break;
376 h = HASH (h, c);
377 while (--repetitions != 0);
380 break;
382 default:
383 while ((c = *p++) != '\n')
384 h = HASH (h, c);
385 break;
388 hashing_done:;
390 bucket = &buckets[h % nbuckets];
391 length = p - ip - 1;
393 if (p == bufend
394 && current->missing_newline
395 && ROBUST_OUTPUT_STYLE (output_style))
397 /* This line is incomplete. If this is significant,
398 put the line into buckets[-1]. */
399 if (ignore_white_space < IGNORE_SPACE_CHANGE)
400 bucket = &buckets[-1];
402 /* Omit the inserted newline when computing linbuf later. */
403 p--;
404 bufend = suffix_begin = p;
407 for (i = *bucket; ; i = eqs[i].next)
408 if (!i)
410 /* Create a new equivalence class in this bucket. */
411 i = eqs_index++;
412 if (i == eqs_alloc)
414 if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
415 xalloc_die ();
416 eqs_alloc *= 2;
417 eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
419 eqs[i].next = *bucket;
420 eqs[i].hash = h;
421 eqs[i].line = ip;
422 eqs[i].length = length;
423 *bucket = i;
424 break;
426 else if (eqs[i].hash == h)
428 char const *eqline = eqs[i].line;
430 /* Reuse existing class if lines_differ reports the lines
431 equal. */
432 if (eqs[i].length == length)
434 /* Reuse existing equivalence class if the lines are identical.
435 This detects the common case of exact identity
436 faster than lines_differ would. */
437 if (memcmp (eqline, ip, length) == 0)
438 break;
439 if (!same_length_diff_contents_compare_anyway)
440 continue;
442 else if (!diff_length_compare_anyway)
443 continue;
445 if (! lines_differ (eqline, ip))
446 break;
449 /* Maybe increase the size of the line table. */
450 if (line == alloc_lines)
452 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
453 if (PTRDIFF_MAX / 3 <= alloc_lines
454 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
455 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
456 xalloc_die ();
457 alloc_lines = 2 * alloc_lines - linbuf_base;
458 cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
459 linbuf += linbuf_base;
460 linbuf = xrealloc (linbuf,
461 (alloc_lines - linbuf_base) * sizeof *linbuf);
462 linbuf -= linbuf_base;
464 linbuf[line] = ip;
465 cureqs[line] = i;
466 ++line;
469 current->buffered_lines = line;
471 for (i = 0; ; i++)
473 /* Record the line start for lines in the suffix that we care about.
474 Record one more line start than lines,
475 so that we can compute the length of any buffered line. */
476 if (line == alloc_lines)
478 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
479 if (PTRDIFF_MAX / 3 <= alloc_lines
480 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
481 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
482 xalloc_die ();
483 alloc_lines = 2 * alloc_lines - linbuf_base;
484 linbuf += linbuf_base;
485 linbuf = xrealloc (linbuf,
486 (alloc_lines - linbuf_base) * sizeof *linbuf);
487 linbuf -= linbuf_base;
489 linbuf[line] = p;
491 if (p == bufend)
492 break;
494 if (context <= i && no_diff_means_no_output)
495 break;
497 line++;
499 while (*p++ != '\n')
500 continue;
503 /* Done with cache in local variables. */
504 current->linbuf = linbuf;
505 current->valid_lines = line;
506 current->alloc_lines = alloc_lines;
507 current->equivs = cureqs;
508 equivs = eqs;
509 equivs_alloc = eqs_alloc;
510 equivs_index = eqs_index;
513 /* Prepare the text. Make sure the text end is initialized.
514 Make sure text ends in a newline,
515 but remember that we had to add one.
516 Strip trailing CRs, if that was requested. */
518 static void
519 prepare_text (struct file_data *current)
521 size_t buffered = current->buffered;
522 char *p = FILE_BUFFER (current);
523 char *dst;
525 if (buffered == 0 || p[buffered - 1] == '\n')
526 current->missing_newline = false;
527 else
529 p[buffered++] = '\n';
530 current->missing_newline = true;
533 if (!p)
534 return;
536 /* Don't use uninitialized storage when planting or using sentinels. */
537 memset (p + buffered, 0, sizeof (word));
539 if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
541 char const *src = dst;
542 char const *srclim = p + buffered;
545 dst += ! ((*dst = *src++) == '\r' && *src == '\n');
546 while (src < srclim);
548 buffered -= src - dst;
551 current->buffered = buffered;
554 /* We have found N lines in a buffer of size S; guess the
555 proportionate number of lines that will be found in a buffer of
556 size T. However, do not guess a number of lines so large that the
557 resulting line table might cause overflow in size calculations. */
558 static lin
559 guess_lines (lin n, size_t s, size_t t)
561 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
562 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
563 return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
566 /* Given a vector of two file_data objects, find the identical
567 prefixes and suffixes of each object. */
569 static void
570 find_identical_ends (struct file_data filevec[])
572 word *w0, *w1;
573 char *p0, *p1, *buffer0, *buffer1;
574 char const *end0, *beg0;
575 char const **linbuf0, **linbuf1;
576 lin i, lines;
577 size_t n0, n1;
578 lin alloc_lines0, alloc_lines1;
579 lin buffered_prefix, prefix_count, prefix_mask;
580 lin middle_guess, suffix_guess;
582 slurp (&filevec[0]);
583 prepare_text (&filevec[0]);
584 if (filevec[0].desc != filevec[1].desc)
586 slurp (&filevec[1]);
587 prepare_text (&filevec[1]);
589 else
591 filevec[1].buffer = filevec[0].buffer;
592 filevec[1].bufsize = filevec[0].bufsize;
593 filevec[1].buffered = filevec[0].buffered;
594 filevec[1].missing_newline = filevec[0].missing_newline;
597 /* Find identical prefix. */
599 w0 = filevec[0].buffer;
600 w1 = filevec[1].buffer;
601 p0 = buffer0 = (char *) w0;
602 p1 = buffer1 = (char *) w1;
603 n0 = filevec[0].buffered;
604 n1 = filevec[1].buffered;
606 if (p0 == p1)
607 /* The buffers are the same; sentinels won't work. */
608 p0 = p1 += n1;
609 else
611 /* Insert end sentinels, in this case characters that are guaranteed
612 to make the equality test false, and thus terminate the loop. */
614 if (n0 < n1)
615 p0[n0] = ~p1[n0];
616 else
617 p1[n1] = ~p0[n1];
619 /* Loop until first mismatch, or to the sentinel characters. */
621 /* Compare a word at a time for speed. */
622 while (*w0 == *w1)
623 w0++, w1++;
625 /* Do the last few bytes of comparison a byte at a time. */
626 p0 = (char *) w0;
627 p1 = (char *) w1;
628 while (*p0 == *p1)
629 p0++, p1++;
631 /* Don't mistakenly count missing newline as part of prefix. */
632 if (ROBUST_OUTPUT_STYLE (output_style)
633 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
635 (buffer1 + n1 - filevec[1].missing_newline < p1)))
636 p0--, p1--;
639 /* Now P0 and P1 point at the first nonmatching characters. */
641 /* Skip back to last line-beginning in the prefix,
642 and then discard up to HORIZON_LINES lines from the prefix. */
643 i = horizon_lines;
644 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
645 p0--, p1--;
647 /* Record the prefix. */
648 filevec[0].prefix_end = p0;
649 filevec[1].prefix_end = p1;
651 /* Find identical suffix. */
653 /* P0 and P1 point beyond the last chars not yet compared. */
654 p0 = buffer0 + n0;
655 p1 = buffer1 + n1;
657 if (! ROBUST_OUTPUT_STYLE (output_style)
658 || filevec[0].missing_newline == filevec[1].missing_newline)
660 end0 = p0; /* Addr of last char in file 0. */
662 /* Get value of P0 at which we should stop scanning backward:
663 this is when either P0 or P1 points just past the last char
664 of the identical prefix. */
665 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
667 /* Scan back until chars don't match or we reach that point. */
668 for (; p0 != beg0; p0--, p1--)
669 if (*p0 != *p1)
671 /* Point at the first char of the matching suffix. */
672 beg0 = p0;
673 break;
676 /* Are we at a line-beginning in both files? If not, add the rest of
677 this line to the main body. Discard up to HORIZON_LINES lines from
678 the identical suffix. Also, discard one extra line,
679 because shift_boundaries may need it. */
680 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
682 (buffer1 == p1 || p1[-1] == '\n'));
683 while (i-- && p0 != end0)
684 while (*p0++ != '\n')
685 continue;
687 p1 += p0 - beg0;
690 /* Record the suffix. */
691 filevec[0].suffix_begin = p0;
692 filevec[1].suffix_begin = p1;
694 /* Calculate number of lines of prefix to save.
696 prefix_count == 0 means save the whole prefix;
697 we need this for options like -D that output the whole file,
698 or for enormous contexts (to avoid worrying about arithmetic overflow).
699 We also need it for options like -F that output some preceding line;
700 at least we will need to find the last few lines,
701 but since we don't know how many, it's easiest to find them all.
703 Otherwise, prefix_count != 0. Save just prefix_count lines at start
704 of the line buffer; they'll be moved to the proper location later.
705 Handle 1 more line than the context says (because we count 1 too many),
706 rounded up to the next power of 2 to speed index computation. */
708 if (no_diff_means_no_output && ! function_regexp.fastmap
709 && context < LIN_MAX / 4 && context < n0)
711 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
712 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
713 for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
714 continue;
715 alloc_lines0 = (prefix_count + middle_guess
716 + MIN (context, suffix_guess));
718 else
720 prefix_count = 0;
721 alloc_lines0 = guess_lines (0, 0, n0);
724 prefix_mask = prefix_count - 1;
725 lines = 0;
726 linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
727 p0 = buffer0;
729 /* If the prefix is needed, find the prefix lines. */
730 if (! (no_diff_means_no_output
731 && filevec[0].prefix_end == p0
732 && filevec[1].prefix_end == p1))
734 end0 = filevec[0].prefix_end;
735 while (p0 != end0)
737 lin l = lines++ & prefix_mask;
738 if (l == alloc_lines0)
740 if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
741 xalloc_die ();
742 alloc_lines0 *= 2;
743 linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
745 linbuf0[l] = p0;
746 while (*p0++ != '\n')
747 continue;
750 buffered_prefix = prefix_count && context < lines ? context : lines;
752 /* Allocate line buffer 1. */
754 middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
755 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
756 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
757 if (alloc_lines1 < buffered_prefix
758 || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
759 xalloc_die ();
760 linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
762 if (buffered_prefix != lines)
764 /* Rotate prefix lines to proper location. */
765 for (i = 0; i < buffered_prefix; i++)
766 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
767 for (i = 0; i < buffered_prefix; i++)
768 linbuf0[i] = linbuf1[i];
771 /* Initialize line buffer 1 from line buffer 0. */
772 for (i = 0; i < buffered_prefix; i++)
773 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
775 /* Record the line buffer, adjusted so that
776 linbuf[0] points at the first differing line. */
777 filevec[0].linbuf = linbuf0 + buffered_prefix;
778 filevec[1].linbuf = linbuf1 + buffered_prefix;
779 filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
780 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
781 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
782 filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
785 /* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
786 than 2**k. This table is derived from Chris K. Caldwell's list
787 <http://www.utm.edu/research/primes/lists/2small/>. */
789 static unsigned char const prime_offset[] =
791 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
792 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
793 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
794 55, 93, 1, 57, 25
797 /* Verify that this host's size_t is not too wide for the above table. */
799 verify (enough_prime_offsets,
800 sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
802 /* Given a vector of two file_data objects, read the file associated
803 with each one, and build the table of equivalence classes.
804 Return nonzero if either file appears to be a binary file.
805 If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
807 bool
808 read_files (struct file_data filevec[], bool pretend_binary)
810 int i;
811 bool skip_test = text | pretend_binary;
812 bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
814 if (filevec[0].desc != filevec[1].desc)
815 appears_binary |= sip (&filevec[1], skip_test | appears_binary);
816 else
818 filevec[1].buffer = filevec[0].buffer;
819 filevec[1].bufsize = filevec[0].bufsize;
820 filevec[1].buffered = filevec[0].buffered;
822 if (appears_binary)
824 set_binary_mode (filevec[0].desc, true);
825 set_binary_mode (filevec[1].desc, true);
826 return true;
829 find_identical_ends (filevec);
831 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
832 if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
833 xalloc_die ();
834 equivs = xmalloc (equivs_alloc * sizeof *equivs);
835 /* Equivalence class 0 is permanently safe for lines that were not
836 hashed. Real equivalence classes start at 1. */
837 equivs_index = 1;
839 /* Allocate (one plus) a prime number of hash buckets. Use a prime
840 number between 1/3 and 2/3 of the value of equiv_allocs,
841 approximately. */
842 for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
843 continue;
844 nbuckets = ((size_t) 1 << i) - prime_offset[i];
845 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
846 xalloc_die ();
847 buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
848 buckets++;
850 for (i = 0; i < 2; i++)
851 find_and_hash_each_line (&filevec[i]);
853 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
855 free (equivs);
856 free (buckets - 1);
858 return false;