From 01c4ea4cfd54f272780fe7971e6fd8bc06333e8d Mon Sep 17 00:00:00 2001 From: Scott Hovestadt Date: Mon, 1 Aug 2022 08:42:09 -0700 Subject: [PATCH] Fix bug where fgets incorrectly handles \r when at end of chunk. Summary: Currently, when fgets see `\r` at the end of a chunk, it ignores it because it's worried that the next character will be `\n`. While that strategy correctly handles `\r\n`, it means that `\r` without `\n` is ignored and fgets will return 2 lines instead of 1. In this diff, I fix the bug by: - Removing the logic to ignore `\r` when at end of chunk. - Preventing a situation where a chunk has incomplete information; chunks will never end on `\r` unless we're at end of file. Reviewed By: ricklavoie Differential Revision: D38297477 fbshipit-source-id: 37c9ad6befef7f03e037a2a9ac9a37d5ee112179 --- hphp/runtime/base/file.cpp | 23 +++++++++++++++++++++-- hphp/test/slow/file/fgets-cr.php | 12 ++++++++++++ hphp/test/slow/file/fgets-cr.php.expect | 3 +++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 hphp/test/slow/file/fgets-cr.php create mode 100644 hphp/test/slow/file/fgets-cr.php.expect diff --git a/hphp/runtime/base/file.cpp b/hphp/runtime/base/file.cpp index fab500a215e..ea396a015c5 100644 --- a/hphp/runtime/base/file.cpp +++ b/hphp/runtime/base/file.cpp @@ -488,13 +488,13 @@ String File::readLine(int64_t maxlen /* = 0 */) { const char *lf; cr = (const char *)memchr(readptr, '\r', avail); lf = (const char *)memchr(readptr, '\n', avail); - if (cr && lf != cr + 1 && !(lf && lf < cr) && cr != &readptr[avail - 1]) { + if (cr && lf != cr + 1 && !(lf && lf < cr)) { /* mac */ eol = cr; } else if ((cr && lf && cr == lf - 1) || (lf)) { /* dos or unix endings */ eol = lf; - } else if (cr != &readptr[avail - 1]) { + } else { eol = cr; } @@ -532,11 +532,30 @@ String File::readLine(int64_t maxlen /* = 0 */) { m_data->m_buffer = (char *)malloc(m_data->m_chunkSize); m_data->m_bufferSize = m_data->m_chunkSize; } + if (m_data->m_bufferSize != m_data->m_chunkSize) { + m_data->m_buffer = (char *)realloc(m_data->m_buffer, m_data->m_chunkSize); + m_data->m_bufferSize = m_data->m_chunkSize; + } m_data->m_writepos = readImpl(m_data->m_buffer, m_data->m_bufferSize); m_data->m_readpos = 0; + if (bufferedLen() == 0) { break; } + + // refuse to end chunk on \r when the next character may be \n + while (m_data->m_buffer[bufferedLen() - 1] == '\r') { + char extrachar[1]; + int64_t len = readImpl(extrachar, 1); + if (len == 1) { + m_data->m_buffer = (char *)realloc(m_data->m_buffer, m_data->m_bufferSize + 1); + m_data->m_buffer[bufferedLen()] = extrachar[0]; + m_data->m_writepos = m_data->m_writepos + 1; + m_data->m_bufferSize = m_data->m_bufferSize + 1; + } else { + break; + } + } } } diff --git a/hphp/test/slow/file/fgets-cr.php b/hphp/test/slow/file/fgets-cr.php new file mode 100644 index 00000000000..d1249f52db4 --- /dev/null +++ b/hphp/test/slow/file/fgets-cr.php @@ -0,0 +1,12 @@ +> +function main_fgets_cr() { + $fh = tmpfile(); + fwrite($fh, str_repeat('x', 8191) . "\r\rend"); + fseek($fh, 0); + $i = 0; + while($f = fgets($fh)) { + echo $i++,"\n"; + } +} diff --git a/hphp/test/slow/file/fgets-cr.php.expect b/hphp/test/slow/file/fgets-cr.php.expect new file mode 100644 index 00000000000..4539bbf2d22 --- /dev/null +++ b/hphp/test/slow/file/fgets-cr.php.expect @@ -0,0 +1,3 @@ +0 +1 +2 -- 2.11.4.GIT