rc.conf: Add and document the missing root_rw_mount=YES
[dragonfly.git] / contrib / diffutils / src / io.c
blob6c03c70ab8fc6de0b811fb973230bb0aa31f68a6
1 /* File I/O for GNU DIFF.
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
4 2015-2018 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "diff.h"
22 #include <binary-io.h>
23 #include <cmpbuf.h>
24 #include <file-type.h>
25 #include <xalloc.h>
27 /* Rotate an unsigned value to the left. */
28 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
30 /* Given a hash value and a new character, return a new hash value. */
31 #define HASH(h, c) ((c) + ROL (h, 7))
33 /* The type of a hash value. */
34 typedef size_t hash_value;
35 verify (! TYPE_SIGNED (hash_value));
37 /* Lines are put into equivalence classes of lines that match in lines_differ.
38 Each equivalence class is represented by one of these structures,
39 but only while the classes are being computed.
40 Afterward, each class is represented by a number. */
41 struct equivclass
43 lin next; /* Next item in this bucket. */
44 hash_value hash; /* Hash of lines in this class. */
45 char const *line; /* A line that fits this class. */
46 size_t length; /* That line's length, not counting its newline. */
49 /* Hash-table: array of buckets, each being a chain of equivalence classes.
50 buckets[-1] is reserved for incomplete lines. */
51 static lin *buckets;
53 /* Number of buckets in the hash table array, not counting buckets[-1]. */
54 static size_t nbuckets;
56 /* Array in which the equivalence classes are allocated.
57 The bucket-chains go through the elements in this array.
58 The number of an equivalence class is its index in this array. */
59 static struct equivclass *equivs;
61 /* Index of first free element in the array 'equivs'. */
62 static lin equivs_index;
64 /* Number of elements allocated in the array 'equivs'. */
65 static lin equivs_alloc;
67 /* Read a block of data into a file buffer, checking for EOF and error. */
69 void
70 file_block_read (struct file_data *current, size_t size)
72 if (size && ! current->eof)
74 size_t s = block_read (current->desc,
75 FILE_BUFFER (current) + current->buffered, size);
76 if (s == SIZE_MAX)
77 pfatal_with_name (current->name);
78 current->buffered += s;
79 current->eof = s < size;
83 /* Check for binary files and compare them for exact identity. */
85 /* Return 1 if BUF contains a non text character.
86 SIZE is the number of characters in BUF. */
88 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
90 /* Get ready to read the current file.
91 Return nonzero if SKIP_TEST is zero,
92 and if it appears to be a binary file. */
94 static bool
95 sip (struct file_data *current, bool skip_test)
97 /* If we have a nonexistent file at this stage, treat it as empty. */
98 if (current->desc < 0)
100 /* Leave room for a sentinel. */
101 current->bufsize = sizeof (word);
102 current->buffer = xmalloc (current->bufsize);
104 else
106 current->bufsize = buffer_lcm (sizeof (word),
107 STAT_BLOCKSIZE (current->stat),
108 PTRDIFF_MAX - 2 * sizeof (word));
109 current->buffer = xmalloc (current->bufsize);
111 #ifdef __KLIBC__
112 /* Skip test if seek is not possible */
113 skip_test = skip_test
114 || (lseek (current->desc, 0, SEEK_CUR) < 0
115 && errno == ESPIPE);
116 #endif
118 if (! skip_test)
120 /* Check first part of file to see if it's a binary file. */
122 int prev_mode = set_binary_mode (current->desc, O_BINARY);
123 off_t buffered;
124 file_block_read (current, current->bufsize);
125 buffered = current->buffered;
127 if (prev_mode != O_BINARY)
129 /* Revert to text mode and seek back to the start to reread
130 the file. Use relative seek, since file descriptors
131 like stdin might not start at offset zero. */
132 if (lseek (current->desc, - buffered, SEEK_CUR) < 0)
133 pfatal_with_name (current->name);
134 set_binary_mode (current->desc, prev_mode);
135 current->buffered = 0;
136 current->eof = false;
139 return binary_file_p (current->buffer, buffered);
143 current->buffered = 0;
144 current->eof = false;
145 return false;
148 /* Slurp the rest of the current file completely into memory. */
150 static void
151 slurp (struct file_data *current)
153 size_t cc;
155 if (current->desc < 0)
157 /* The file is nonexistent. */
158 return;
161 if (S_ISREG (current->stat.st_mode))
163 /* It's a regular file; slurp in the rest all at once. */
165 /* Get the size out of the stat block.
166 Allocate just enough room for appended newline plus word sentinel,
167 plus word-alignment since we want the buffer word-aligned. */
168 size_t file_size = current->stat.st_size;
169 cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
170 if (file_size != current->stat.st_size || cc < file_size
171 || PTRDIFF_MAX <= cc)
172 xalloc_die ();
174 if (current->bufsize < cc)
176 current->bufsize = cc;
177 current->buffer = xrealloc (current->buffer, cc);
180 /* Try to read at least 1 more byte than the size indicates, to
181 detect whether the file is growing. This is a nicety for
182 users who run 'diff' on files while they are changing. */
184 if (current->buffered <= file_size)
186 file_block_read (current, file_size + 1 - current->buffered);
187 if (current->buffered <= file_size)
188 return;
192 /* It's not a regular file, or it's a growing regular file; read it,
193 growing the buffer as needed. */
195 file_block_read (current, current->bufsize - current->buffered);
197 if (current->buffered)
199 while (current->buffered == current->bufsize)
201 if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
202 xalloc_die ();
203 current->bufsize *= 2;
204 current->buffer = xrealloc (current->buffer, current->bufsize);
205 file_block_read (current, current->bufsize - current->buffered);
208 /* Allocate just enough room for appended newline plus word
209 sentinel, plus word-alignment. */
210 cc = current->buffered + 2 * sizeof (word);
211 current->bufsize = cc - cc % sizeof (word);
212 current->buffer = xrealloc (current->buffer, current->bufsize);
216 /* Split the file into lines, simultaneously computing the equivalence
217 class for each line. */
219 static void
220 find_and_hash_each_line (struct file_data *current)
222 char const *p = current->prefix_end;
223 lin i, *bucket;
224 size_t length;
226 /* Cache often-used quantities in local variables to help the compiler. */
227 char const **linbuf = current->linbuf;
228 lin alloc_lines = current->alloc_lines;
229 lin line = 0;
230 lin linbuf_base = current->linbuf_base;
231 lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
232 struct equivclass *eqs = equivs;
233 lin eqs_index = equivs_index;
234 lin eqs_alloc = equivs_alloc;
235 char const *suffix_begin = current->suffix_begin;
236 char const *bufend = FILE_BUFFER (current) + current->buffered;
237 bool ig_case = ignore_case;
238 enum DIFF_white_space ig_white_space = ignore_white_space;
239 bool diff_length_compare_anyway =
240 ig_white_space != IGNORE_NO_WHITE_SPACE;
241 bool same_length_diff_contents_compare_anyway =
242 diff_length_compare_anyway | ig_case;
244 while (p < suffix_begin)
246 char const *ip = p;
247 hash_value h = 0;
248 unsigned char c;
250 /* Hash this line until we find a newline. */
251 switch (ig_white_space)
253 case IGNORE_ALL_SPACE:
254 while ((c = *p++) != '\n')
255 if (! isspace (c))
256 h = HASH (h, ig_case ? tolower (c) : c);
257 break;
259 case IGNORE_SPACE_CHANGE:
260 while ((c = *p++) != '\n')
262 if (isspace (c))
265 if ((c = *p++) == '\n')
266 goto hashing_done;
267 while (isspace (c));
269 h = HASH (h, ' ');
272 /* C is now the first non-space. */
273 h = HASH (h, ig_case ? tolower (c) : c);
275 break;
277 case IGNORE_TAB_EXPANSION:
278 case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
279 case IGNORE_TRAILING_SPACE:
281 size_t column = 0;
282 while ((c = *p++) != '\n')
284 if (ig_white_space & IGNORE_TRAILING_SPACE
285 && isspace (c))
287 char const *p1 = p;
288 unsigned char c1;
290 if ((c1 = *p1++) == '\n')
292 p = p1;
293 goto hashing_done;
295 while (isspace (c1));
298 size_t repetitions = 1;
300 if (ig_white_space & IGNORE_TAB_EXPANSION)
301 switch (c)
303 case '\b':
304 column -= 0 < column;
305 break;
307 case '\t':
308 c = ' ';
309 repetitions = tabsize - column % tabsize;
310 column = (column + repetitions < column
312 : column + repetitions);
313 break;
315 case '\r':
316 column = 0;
317 break;
319 default:
320 column++;
321 break;
324 if (ig_case)
325 c = tolower (c);
328 h = HASH (h, c);
329 while (--repetitions != 0);
332 break;
334 default:
335 if (ig_case)
336 while ((c = *p++) != '\n')
337 h = HASH (h, tolower (c));
338 else
339 while ((c = *p++) != '\n')
340 h = HASH (h, c);
341 break;
344 hashing_done:;
346 bucket = &buckets[h % nbuckets];
347 length = p - ip - 1;
349 if (p == bufend
350 && current->missing_newline
351 && ROBUST_OUTPUT_STYLE (output_style))
353 /* The last line is incomplete and we do not silently
354 complete lines. If the line cannot compare equal to any
355 complete line, put it into buckets[-1] so that it can
356 compare equal only to the other file's incomplete line
357 (if one exists). */
358 if (ig_white_space < IGNORE_TRAILING_SPACE)
359 bucket = &buckets[-1];
362 for (i = *bucket; ; i = eqs[i].next)
363 if (!i)
365 /* Create a new equivalence class in this bucket. */
366 i = eqs_index++;
367 if (i == eqs_alloc)
369 if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
370 xalloc_die ();
371 eqs_alloc *= 2;
372 eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
374 eqs[i].next = *bucket;
375 eqs[i].hash = h;
376 eqs[i].line = ip;
377 eqs[i].length = length;
378 *bucket = i;
379 break;
381 else if (eqs[i].hash == h)
383 char const *eqline = eqs[i].line;
385 /* Reuse existing class if lines_differ reports the lines
386 equal. */
387 if (eqs[i].length == length)
389 /* Reuse existing equivalence class if the lines are identical.
390 This detects the common case of exact identity
391 faster than lines_differ would. */
392 if (memcmp (eqline, ip, length) == 0)
393 break;
394 if (!same_length_diff_contents_compare_anyway)
395 continue;
397 else if (!diff_length_compare_anyway)
398 continue;
400 if (! lines_differ (eqline, ip))
401 break;
404 /* Maybe increase the size of the line table. */
405 if (line == alloc_lines)
407 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
408 if (PTRDIFF_MAX / 3 <= alloc_lines
409 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
410 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
411 xalloc_die ();
412 alloc_lines = 2 * alloc_lines - linbuf_base;
413 cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
414 linbuf += linbuf_base;
415 linbuf = xrealloc (linbuf,
416 (alloc_lines - linbuf_base) * sizeof *linbuf);
417 linbuf -= linbuf_base;
419 linbuf[line] = ip;
420 cureqs[line] = i;
421 ++line;
424 current->buffered_lines = line;
426 for (i = 0; ; i++)
428 /* Record the line start for lines in the suffix that we care about.
429 Record one more line start than lines,
430 so that we can compute the length of any buffered line. */
431 if (line == alloc_lines)
433 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
434 if (PTRDIFF_MAX / 3 <= alloc_lines
435 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
436 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
437 xalloc_die ();
438 alloc_lines = 2 * alloc_lines - linbuf_base;
439 linbuf += linbuf_base;
440 linbuf = xrealloc (linbuf,
441 (alloc_lines - linbuf_base) * sizeof *linbuf);
442 linbuf -= linbuf_base;
444 linbuf[line] = p;
446 if (p == bufend)
448 /* If the last line is incomplete and we do not silently
449 complete lines, don't count its appended newline. */
450 if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style))
451 linbuf[line]--;
452 break;
455 if (context <= i && no_diff_means_no_output)
456 break;
458 line++;
460 while (*p++ != '\n')
461 continue;
464 /* Done with cache in local variables. */
465 current->linbuf = linbuf;
466 current->valid_lines = line;
467 current->alloc_lines = alloc_lines;
468 current->equivs = cureqs;
469 equivs = eqs;
470 equivs_alloc = eqs_alloc;
471 equivs_index = eqs_index;
474 /* Prepare the text. Make sure the text end is initialized.
475 Make sure text ends in a newline,
476 but remember that we had to add one.
477 Strip trailing CRs, if that was requested. */
479 static void
480 prepare_text (struct file_data *current)
482 size_t buffered = current->buffered;
483 char *p = FILE_BUFFER (current);
484 if (!p)
485 return;
487 if (strip_trailing_cr)
489 char *srclim = p + buffered;
490 *srclim = '\r';
491 char *dst = rawmemchr (p, '\r');
493 for (char const *src = dst; src != srclim; src++)
495 src += *src == '\r' && src[1] == '\n';
496 *dst++ = *src;
499 buffered -= srclim - dst;
502 if (buffered != 0 && p[buffered - 1] != '\n')
504 p[buffered++] = '\n';
505 current->missing_newline = true;
508 /* Don't use uninitialized storage when planting or using sentinels. */
509 memset (p + buffered, 0, sizeof (word));
511 current->buffered = buffered;
514 /* We have found N lines in a buffer of size S; guess the
515 proportionate number of lines that will be found in a buffer of
516 size T. However, do not guess a number of lines so large that the
517 resulting line table might cause overflow in size calculations. */
518 static lin
519 guess_lines (lin n, size_t s, size_t t)
521 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
522 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
523 return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
526 /* Given a vector of two file_data objects, find the identical
527 prefixes and suffixes of each object. */
529 static void
530 find_identical_ends (struct file_data filevec[])
532 word *w0, *w1;
533 char *p0, *p1, *buffer0, *buffer1;
534 char const *end0, *beg0;
535 char const **linbuf0, **linbuf1;
536 lin i, lines;
537 size_t n0, n1;
538 lin alloc_lines0, alloc_lines1;
539 bool prefix_needed;
540 lin buffered_prefix, prefix_count, prefix_mask;
541 lin middle_guess, suffix_guess;
543 slurp (&filevec[0]);
544 prepare_text (&filevec[0]);
545 if (filevec[0].desc != filevec[1].desc)
547 slurp (&filevec[1]);
548 prepare_text (&filevec[1]);
550 else
552 filevec[1].buffer = filevec[0].buffer;
553 filevec[1].bufsize = filevec[0].bufsize;
554 filevec[1].buffered = filevec[0].buffered;
555 filevec[1].missing_newline = filevec[0].missing_newline;
558 /* Find identical prefix. */
560 w0 = filevec[0].buffer;
561 w1 = filevec[1].buffer;
562 p0 = buffer0 = (char *) w0;
563 p1 = buffer1 = (char *) w1;
564 n0 = filevec[0].buffered;
565 n1 = filevec[1].buffered;
567 if (p0 == p1)
568 /* The buffers are the same; sentinels won't work. */
569 p0 = p1 += n1;
570 else
572 /* Insert end sentinels, in this case characters that are guaranteed
573 to make the equality test false, and thus terminate the loop. */
575 if (n0 < n1)
576 p0[n0] = ~p1[n0];
577 else
578 p1[n1] = ~p0[n1];
580 /* Loop until first mismatch, or to the sentinel characters. */
582 /* Compare a word at a time for speed. */
583 while (*w0 == *w1)
584 w0++, w1++;
586 /* Do the last few bytes of comparison a byte at a time. */
587 p0 = (char *) w0;
588 p1 = (char *) w1;
589 while (*p0 == *p1)
590 p0++, p1++;
592 /* Don't mistakenly count missing newline as part of prefix. */
593 if (ROBUST_OUTPUT_STYLE (output_style)
594 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
596 (buffer1 + n1 - filevec[1].missing_newline < p1)))
597 p0--, p1--;
600 /* Now P0 and P1 point at the first nonmatching characters. */
602 /* Skip back to last line-beginning in the prefix,
603 and then discard up to HORIZON_LINES lines from the prefix. */
604 i = horizon_lines;
605 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
606 p0--, p1--;
608 /* Record the prefix. */
609 filevec[0].prefix_end = p0;
610 filevec[1].prefix_end = p1;
612 /* Find identical suffix. */
614 /* P0 and P1 point beyond the last chars not yet compared. */
615 p0 = buffer0 + n0;
616 p1 = buffer1 + n1;
618 if (! ROBUST_OUTPUT_STYLE (output_style)
619 || filevec[0].missing_newline == filevec[1].missing_newline)
621 end0 = p0; /* Addr of last char in file 0. */
623 /* Get value of P0 at which we should stop scanning backward:
624 this is when either P0 or P1 points just past the last char
625 of the identical prefix. */
626 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
628 /* Scan back until chars don't match or we reach that point. */
629 while (p0 != beg0)
630 if (*--p0 != *--p1)
632 /* Point at the first char of the matching suffix. */
633 ++p0, ++p1;
634 beg0 = p0;
635 break;
638 /* Are we at a line-beginning in both files? If not, add the rest of
639 this line to the main body. Discard up to HORIZON_LINES lines from
640 the identical suffix. Also, discard one extra line,
641 because shift_boundaries may need it. */
642 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
644 (buffer1 == p1 || p1[-1] == '\n'));
645 while (i-- && p0 != end0)
646 while (*p0++ != '\n')
647 continue;
649 p1 += p0 - beg0;
652 /* Record the suffix. */
653 filevec[0].suffix_begin = p0;
654 filevec[1].suffix_begin = p1;
656 /* Calculate number of lines of prefix to save.
658 prefix_count == 0 means save the whole prefix;
659 we need this for options like -D that output the whole file,
660 or for enormous contexts (to avoid worrying about arithmetic overflow).
661 We also need it for options like -F that output some preceding line;
662 at least we will need to find the last few lines,
663 but since we don't know how many, it's easiest to find them all.
665 Otherwise, prefix_count != 0. Save just prefix_count lines at start
666 of the line buffer; they'll be moved to the proper location later.
667 Handle 1 more line than the context says (because we count 1 too many),
668 rounded up to the next power of 2 to speed index computation. */
670 if (no_diff_means_no_output && ! function_regexp.fastmap
671 && context < LIN_MAX / 4 && context < n0)
673 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
674 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
675 for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
676 continue;
677 alloc_lines0 = (prefix_count + middle_guess
678 + MIN (context, suffix_guess));
680 else
682 prefix_count = 0;
683 alloc_lines0 = guess_lines (0, 0, n0);
686 prefix_mask = prefix_count - 1;
687 lines = 0;
688 linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
689 prefix_needed = ! (no_diff_means_no_output
690 && filevec[0].prefix_end == p0
691 && filevec[1].prefix_end == p1);
692 p0 = buffer0;
694 /* If the prefix is needed, find the prefix lines. */
695 if (prefix_needed)
697 end0 = filevec[0].prefix_end;
698 while (p0 != end0)
700 lin l = lines++ & prefix_mask;
701 if (l == alloc_lines0)
703 if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
704 xalloc_die ();
705 alloc_lines0 *= 2;
706 linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
708 linbuf0[l] = p0;
709 while (*p0++ != '\n')
710 continue;
713 buffered_prefix = prefix_count && context < lines ? context : lines;
715 /* Allocate line buffer 1. */
717 middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
718 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
719 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
720 if (alloc_lines1 < buffered_prefix
721 || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
722 xalloc_die ();
723 linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
725 if (buffered_prefix != lines)
727 /* Rotate prefix lines to proper location. */
728 for (i = 0; i < buffered_prefix; i++)
729 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
730 for (i = 0; i < buffered_prefix; i++)
731 linbuf0[i] = linbuf1[i];
734 /* Initialize line buffer 1 from line buffer 0. */
735 for (i = 0; i < buffered_prefix; i++)
736 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
738 /* Record the line buffer, adjusted so that
739 linbuf[0] points at the first differing line. */
740 filevec[0].linbuf = linbuf0 + buffered_prefix;
741 filevec[1].linbuf = linbuf1 + buffered_prefix;
742 filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
743 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
744 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
745 filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
748 /* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
749 than 2**k. This table is derived from Chris K. Caldwell's list
750 <http://www.utm.edu/research/primes/lists/2small/>. */
752 static unsigned char const prime_offset[] =
754 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
755 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
756 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
757 55, 93, 1, 57, 25
760 /* Verify that this host's size_t is not too wide for the above table. */
762 verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
764 /* Given a vector of two file_data objects, read the file associated
765 with each one, and build the table of equivalence classes.
766 Return nonzero if either file appears to be a binary file.
767 If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
769 bool
770 read_files (struct file_data filevec[], bool pretend_binary)
772 int i;
773 bool skip_test = text | pretend_binary;
774 bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
776 if (filevec[0].desc != filevec[1].desc)
777 appears_binary |= sip (&filevec[1], skip_test | appears_binary);
778 else
780 filevec[1].buffer = filevec[0].buffer;
781 filevec[1].bufsize = filevec[0].bufsize;
782 filevec[1].buffered = filevec[0].buffered;
784 if (appears_binary)
786 set_binary_mode (filevec[0].desc, O_BINARY);
787 set_binary_mode (filevec[1].desc, O_BINARY);
788 return true;
791 find_identical_ends (filevec);
793 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
794 if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
795 xalloc_die ();
796 equivs = xmalloc (equivs_alloc * sizeof *equivs);
797 /* Equivalence class 0 is permanently safe for lines that were not
798 hashed. Real equivalence classes start at 1. */
799 equivs_index = 1;
801 /* Allocate (one plus) a prime number of hash buckets. Use a prime
802 number between 1/3 and 2/3 of the value of equiv_allocs,
803 approximately. */
804 for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
805 continue;
806 nbuckets = ((size_t) 1 << i) - prime_offset[i];
807 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
808 xalloc_die ();
809 buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
810 buckets++;
812 for (i = 0; i < 2; i++)
813 find_and_hash_each_line (&filevec[i]);
815 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
817 free (equivs);
818 free (buckets - 1);
820 return false;