From 728f8f0a00a22e544789952c76f9eac0c3f8b2c6 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 2 Sep 2011 22:23:17 -0700 Subject: [PATCH] * fileio.c: Fix bugs with large file offsets. The previous code assumed that file offsets (off_t values) fit in EMACS_INT variables, which is not true on typical 32-bit hosts. The code messed up by falsely reporting buffer overflow in cases such as (insert-file-contents "big" nil 1 2) into an empty buffer when "big" contains more than 2**29 bytes, even though this inserts just one byte and does not overflow the buffer. (Finsert_file_contents): Store file offsets as off_t values, not as EMACS_INT values. Check for overflow when converting between EMACS_INT and off_t. When checking for buffer overflow or for overlap, take the offsets into account. Don't use EMACS_INT for small values where int suffices. When checking for overlap, fix a typo: ZV was used where ZV_BYTE was intended. (Fwrite_region): Don't assume off_t fits into 'long'. * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT. --- src/ChangeLog | 19 ++++++++++++ src/buffer.h | 2 +- src/fileio.c | 92 ++++++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 52ec796d6cf..c80a12c1c00 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,22 @@ +2011-09-03 Paul Eggert + + * fileio.c: Fix bugs with large file offsets. + The previous code assumed that file offsets (off_t values) fit in + EMACS_INT variables, which is not true on typical 32-bit hosts. + The code messed up by falsely reporting buffer overflow in cases + such as (insert-file-contents "big" nil 1 2) into an empty buffer + when "big" contains more than 2**29 bytes, even though this + inserts just one byte and does not overflow the buffer. + (Finsert_file_contents): Store file offsets as off_t + values, not as EMACS_INT values. Check for overflow when + converting between EMACS_INT and off_t. When checking for + buffer overflow or for overlap, take the offsets into account. + Don't use EMACS_INT for small values where int suffices. + When checking for overlap, fix a typo: ZV was used where + ZV_BYTE was intended. + (Fwrite_region): Don't assume off_t fits into 'long'. + * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT. + 2011-08-30 Chong Yidong * syntax.c (find_defun_start): Update all cache variables if diff --git a/src/buffer.h b/src/buffer.h index 06864dd5789..c50cfe56c77 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -559,7 +559,7 @@ struct buffer is still the same (since it's rounded up to seconds) but we're actually not up-to-date. -1 means the size is unknown. Only meaningful if modtime is actually set. */ - EMACS_INT modtime_size; + off_t modtime_size; /* The value of text->modiff at the last auto-save. */ int auto_save_modified; /* The value of text->modiff at the last display error. diff --git a/src/fileio.c b/src/fileio.c index 60ee35bb399..fe0fb593208 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -3179,6 +3179,7 @@ variable `last-coding-system-used' to the coding system actually used. */) EMACS_INT inserted = 0; int nochange = 0; register EMACS_INT how_much; + off_t beg_offset, end_offset; register EMACS_INT unprocessed; int count = SPECPDL_INDEX (); struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; @@ -3284,15 +3285,6 @@ variable `last-coding-system-used' to the coding system actually used. */) record_unwind_protect (close_file_unwind, make_number (fd)); - /* Check whether the size is too large or negative, which can happen on a - platform that allows file sizes greater than the maximum off_t value. */ - if (! not_regular - && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX)) - buffer_overflow (); - - /* Prevent redisplay optimizations. */ - current_buffer->clip_changed = 1; - if (!NILP (visit)) { if (!NILP (beg) || !NILP (end)) @@ -3302,26 +3294,64 @@ variable `last-coding-system-used' to the coding system actually used. */) } if (!NILP (beg)) - CHECK_NUMBER (beg); + { + if (! (RANGED_INTEGERP (0, beg, TYPE_MAXIMUM (off_t)))) + wrong_type_argument (intern ("file-offset"), beg); + beg_offset = XFASTINT (beg); + } else - XSETFASTINT (beg, 0); + beg_offset = 0; if (!NILP (end)) - CHECK_NUMBER (end); + { + if (! (RANGED_INTEGERP (0, end, TYPE_MAXIMUM (off_t)))) + wrong_type_argument (intern ("file-offset"), end); + end_offset = XFASTINT (end); + } else { - if (! not_regular) + if (not_regular) + end_offset = TYPE_MAXIMUM (off_t); + else { - XSETINT (end, st.st_size); + end_offset = st.st_size; + + /* A negative size can happen on a platform that allows file + sizes greater than the maximum off_t value. */ + if (end_offset < 0) + buffer_overflow (); /* The file size returned from stat may be zero, but data may be readable nonetheless, for example when this is a file in the /proc filesystem. */ - if (st.st_size == 0) - XSETINT (end, READ_BUF_SIZE); + if (end_offset == 0) + end_offset = READ_BUF_SIZE; } } + /* Check now whether the buffer will become too large, + in the likely case where the file's length is not changing. + This saves a lot of needless work before a buffer overflow. */ + if (! not_regular) + { + /* The likely offset where we will stop reading. We could read + more (or less), if the file grows (or shrinks) as we read it. */ + off_t likely_end = min (end_offset, st.st_size); + + if (beg_offset < likely_end) + { + ptrdiff_t buf_bytes = + Z_BYTE - (!NILP (replace) ? ZV_BYTE - BEGV_BYTE : 0); + ptrdiff_t buf_growth_max = BUF_BYTES_MAX - buf_bytes; + off_t likely_growth = likely_end - beg_offset; + if (buf_growth_max < likely_growth) + buffer_overflow (); + } + } + + /* Prevent redisplay optimizations. */ + current_buffer->clip_changed = 1; + if (EQ (Vcoding_system_for_read, Qauto_save_coding)) { coding_system = coding_inherit_eol_type (Qutf_8_emacs, Qunix); @@ -3465,9 +3495,9 @@ variable `last-coding-system-used' to the coding system actually used. */) give up on handling REPLACE in the optimized way. */ int giveup_match_end = 0; - if (XINT (beg) != 0) + if (beg_offset != 0) { - if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) + if (lseek (fd, beg_offset, SEEK_SET) < 0) report_file_error ("Setting file position", Fcons (orig_filename, Qnil)); } @@ -3515,7 +3545,7 @@ variable `last-coding-system-used' to the coding system actually used. */) immediate_quit = 0; /* If the file matches the buffer completely, there's no need to replace anything. */ - if (same_at_start - BEGV_BYTE == XINT (end)) + if (same_at_start - BEGV_BYTE == end_offset) { emacs_close (fd); specpdl_ptr--; @@ -3530,16 +3560,17 @@ variable `last-coding-system-used' to the coding system actually used. */) already found that decoding is necessary, don't waste time. */ while (!giveup_match_end) { - EMACS_INT total_read, nread, bufpos, curpos, trial; + int total_read, nread, bufpos, trial; + off_t curpos; /* At what file position are we now scanning? */ - curpos = XINT (end) - (ZV_BYTE - same_at_end); + curpos = end_offset - (ZV_BYTE - same_at_end); /* If the entire file matches the buffer tail, stop the scan. */ if (curpos == 0) break; /* How much can we scan in the next step? */ trial = min (curpos, sizeof buffer); - if (emacs_lseek (fd, curpos - trial, SEEK_SET) < 0) + if (lseek (fd, curpos - trial, SEEK_SET) < 0) report_file_error ("Setting file position", Fcons (orig_filename, Qnil)); @@ -3606,13 +3637,14 @@ variable `last-coding-system-used' to the coding system actually used. */) /* Don't try to reuse the same piece of text twice. */ overlap = (same_at_start - BEGV_BYTE - - (same_at_end + st.st_size - ZV)); + - (same_at_end + + (! NILP (end) ? end_offset : st.st_size) - ZV_BYTE)); if (overlap > 0) same_at_end += overlap; /* Arrange to read only the nonmatching middle part of the file. */ - XSETFASTINT (beg, XINT (beg) + (same_at_start - BEGV_BYTE)); - XSETFASTINT (end, XINT (end) - (ZV_BYTE - same_at_end)); + beg_offset += same_at_start - BEGV_BYTE; + end_offset -= ZV_BYTE - same_at_end; del_range_byte (same_at_start, same_at_end, 0); /* Insert from the file at the proper position. */ @@ -3657,7 +3689,7 @@ variable `last-coding-system-used' to the coding system actually used. */) /* First read the whole file, performing code conversion into CONVERSION_BUFFER. */ - if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) + if (lseek (fd, beg_offset, SEEK_SET) < 0) report_file_error ("Setting file position", Fcons (orig_filename, Qnil)); @@ -3824,7 +3856,7 @@ variable `last-coding-system-used' to the coding system actually used. */) } if (! not_regular) - total = XINT (end) - XINT (beg); + total = end_offset - beg_offset; else /* For a special file, all we can do is guess. */ total = READ_BUF_SIZE; @@ -3845,9 +3877,9 @@ variable `last-coding-system-used' to the coding system actually used. */) if (GAP_SIZE < total) make_gap (total - GAP_SIZE); - if (XINT (beg) != 0 || !NILP (replace)) + if (beg_offset != 0 || !NILP (replace)) { - if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) + if (lseek (fd, beg_offset, SEEK_SET) < 0) report_file_error ("Setting file position", Fcons (orig_filename, Qnil)); } @@ -4576,7 +4608,7 @@ This calls `write-region-annotate-functions' at the start, and if (!NILP (append) && !NILP (Ffile_regular_p (filename))) { - long ret; + off_t ret; if (NUMBERP (append)) ret = emacs_lseek (desc, XINT (append), SEEK_CUR); -- 2.11.4.GIT