Refactor handling of input files
[xapian.git] / xapian-applications / omega / loadfile.cc
blob7ea4a7f9fea673566e37b6f56be3899c9ac8b50d
1 /** @file loadfile.cc
2 * @brief load a file into a std::string.
3 */
4 /* Copyright (C) 2006,2010,2012,2015,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "loadfile.h"
25 #include <algorithm>
26 #include <cerrno>
27 #include <string>
29 #include "safefcntl.h"
30 #include <sys/types.h>
31 #include "safesysstat.h"
32 #include "safeunistd.h"
34 using namespace std;
36 bool
37 load_file_from_fd(int fd, string& output)
39 output.resize(0);
40 char blk[4096];
41 while (true) {
42 ssize_t c = read(fd, blk, sizeof(blk));
43 if (c <= 0) {
44 if (c == 0) break;
45 if (errno == EINTR) continue;
46 return false;
48 output.append(blk, c);
51 return true;
54 bool
55 load_file(const string& file_name, size_t max_to_read, int flags,
56 string& output, bool* truncated)
58 mode_t mode = O_BINARY | O_RDONLY;
59 #if defined O_NOATIME && O_NOATIME != 0
60 if (flags & NOATIME) mode |= O_NOATIME;
61 #endif
63 int fd = open(file_name.c_str(), mode);
64 #if defined O_NOATIME && O_NOATIME != 0
65 if (fd < 0 && (mode & O_NOATIME)) {
66 mode &= ~O_NOATIME;
67 fd = open(file_name.c_str(), mode);
69 #endif
70 if (fd < 0) return false;
72 #ifdef HAVE_POSIX_FADVISE
73 # ifndef __linux__
74 // On Linux, POSIX_FADV_NOREUSE has been a no-op since 2.6.18 (released
75 // 2006) and before that it was incorrectly implemented as an alias for
76 // POSIX_FADV_WILLNEED. There have been a few attempts to make
77 // POSIX_FADV_NOREUSE actually work on Linux but nothing has been merged so
78 // for now let's not waste effort making a syscall we know to currently be
79 // a no-op. We can revise this conditional if it gets usefully
80 // implemented.
81 if (flags & NOCACHE)
82 posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
83 # endif
84 #endif
86 struct stat st;
87 if (fstat(fd, &st) < 0) {
88 int errno_save = errno;
89 close(fd);
90 errno = errno_save;
91 return false;
94 if (!S_ISREG(st.st_mode)) {
95 close(fd);
96 errno = EINVAL;
97 return false;
100 size_t n = st.st_size;
101 if (max_to_read && max_to_read < n) {
102 n = max_to_read;
103 if (truncated) *truncated = true;
104 } else {
105 if (truncated) *truncated = false;
108 output.resize(0);
109 output.reserve(n);
110 while (n) {
111 char blk[4096];
112 int c = read(fd, blk, min(n, sizeof(blk)));
113 if (c <= 0) {
114 if (c == 0) break;
115 if (errno == EINTR) continue;
116 return false;
118 output.append(blk, c);
119 n -= c;
122 if (flags & NOCACHE) {
123 #ifdef HAVE_POSIX_FADVISE
124 # ifdef __linux__
125 // Linux doesn't implement POSIX_FADV_NOREUSE so instead we use
126 // POSIX_FADV_DONTNEED just before closing the fd. This is a bit more
127 // aggressive than we ideally want - really we just want to stop our
128 // reads from pushing other pages out of the OS cache, but if the
129 // pages we read are already cached it would probably be better to
130 // leave them cached after the read.
131 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
132 # endif
133 #endif
135 close(fd);
137 return true;