[honey] Fix portability to systems without pread()
[xapian.git] / xapian-core / common / io_utils.cc
blobe1a272fefda6a6cb523df52eae0e188c7a7831d1
1 /** @file io_utils.cc
2 * @brief Wrappers for low-level POSIX I/O routines.
3 */
4 /* Copyright (C) 2004,2006,2007,2008,2009,2011,2012,2014,2015,2016,2018 Olly Betts
5 * Copyright (C) 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "io_utils.h"
25 #include "posixy_wrapper.h"
27 #include "safeerrno.h"
28 #include "safeunistd.h"
30 #include <cstring>
31 #include <string>
33 #include <xapian/error.h>
35 #include "omassert.h"
36 #include "str.h"
38 // Trying to include the correct headers with the correct defines set to
39 // get pread() and pwrite() prototyped on every platform without breaking any
40 // other platform is a real can of worms. So instead we probe for what
41 // prototypes (if any) are required in configure and put them into
42 // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
43 #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
44 PREAD_PROTOTYPE
45 #endif
46 #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
47 PWRITE_PROTOTYPE
48 #endif
50 bool
51 io_unlink(const std::string & filename)
53 if (posixy_unlink(filename.c_str()) == 0) {
54 return true;
56 if (errno != ENOENT) {
57 throw Xapian::DatabaseError(filename + ": delete failed", errno);
59 return false;
62 // The smallest fd we want to use for a writable handle.
63 const int MIN_WRITE_FD = 3;
65 int
66 io_open_block_wr(const char * fname, bool anew)
68 int flags = O_RDWR | O_BINARY | O_CLOEXEC;
69 if (anew) flags |= O_CREAT | O_TRUNC;
70 int fd = ::open(fname, flags, 0666);
71 if (fd >= MIN_WRITE_FD || fd < 0) return fd;
73 // We want to avoid using fd < MIN_WRITE_FD, in case some other code in
74 // the same process tries to write to stdout or stderr, which would end up
75 // corrupting our database.
76 int badfd = fd;
77 #ifdef F_DUPFD_CLOEXEC
78 // dup to the first unused fd >= MIN_WRITE_FD.
79 fd = fcntl(badfd, F_DUPFD_CLOEXEC, MIN_WRITE_FD);
80 // F_DUPFD_CLOEXEC may not be supported.
81 if (fd < 0 && errno == EINVAL)
82 #endif
83 #ifdef F_DUPFD
85 fd = fcntl(badfd, F_DUPFD, MIN_WRITE_FD);
86 # ifdef FD_CLOEXEC
87 if (fd >= 0)
88 (void)fcntl(fd, F_SETFD, FD_CLOEXEC);
89 # endif
91 int save_errno = errno;
92 (void)close(badfd);
93 errno = save_errno;
94 #else
96 char toclose[MIN_WRITE_FD];
97 memset(toclose, 0, sizeof(toclose));
98 fd = badfd;
99 do {
100 toclose[fd] = 1;
101 fd = dup(fd);
102 } while (fd >= 0 && fd < MIN_WRITE_FD);
103 int save_errno = errno;
104 for (badfd = 0; badfd != MIN_WRITE_FD; ++badfd)
105 if (toclose[badfd])
106 close(badfd);
107 if (fd < 0) {
108 errno = save_errno;
109 } else {
110 # ifdef FD_CLOEXEC
111 (void)fcntl(fd, F_SETFD, FD_CLOEXEC);
112 # endif
115 #endif
116 Assert(fd >= MIN_WRITE_FD || fd < 0);
117 return fd;
120 size_t
121 io_read(int fd, char * p, size_t n, size_t min)
123 size_t total = 0;
124 while (n) {
125 ssize_t c = read(fd, p, n);
126 if (c <= 0) {
127 if (c == 0) {
128 if (total >= min) break;
129 throw Xapian::DatabaseCorruptError("Couldn't read enough (EOF)");
131 if (errno == EINTR) continue;
132 throw Xapian::DatabaseError("Error reading from file", errno);
134 p += c;
135 total += c;
136 n -= c;
138 return total;
141 /** Write n bytes from block pointed to by p to file descriptor fd. */
142 void
143 io_write(int fd, const char * p, size_t n)
145 while (n) {
146 ssize_t c = write(fd, p, n);
147 if (c < 0) {
148 if (errno == EINTR) continue;
149 throw Xapian::DatabaseError("Error writing to file", errno);
151 p += c;
152 n -= c;
156 size_t
157 io_pread(int fd, char * p, size_t n, off_t o, size_t min)
159 size_t total = 0;
160 #ifdef HAVE_PREAD
161 while (true) {
162 ssize_t c = pread(fd, p, n, o);
163 // We should get a full read most of the time, so streamline that case.
164 if (usual(c == ssize_t(n)))
165 return total + n;
166 // -1 is error, 0 is EOF
167 if (c <= 0) {
168 // We get EINTR if the syscall was interrupted by a signal.
169 // In this case we should retry the read.
170 if (errno == EINTR) continue;
171 if (c == 0)
172 throw Xapian::DatabaseError("EOF reading database");
173 throw Xapian::DatabaseError("Error reading database", errno);
175 total += c;
176 if (total >= min)
177 return total;
178 p += c;
179 n -= c;
180 o += c;
182 #else
183 if (rare(lseek(fd, o, SEEK_SET) < 0))
184 throw Xapian::DatabaseError("Error seeking database", errno);
185 while (true) {
186 ssize_t c = read(fd, p, n);
187 // We should get a full read most of the time, so streamline that case.
188 if (usual(c == ssize_t(n)))
189 return total + n;
190 if (c <= 0) {
191 // We get EINTR if the syscall was interrupted by a signal.
192 // In this case we should retry the read.
193 if (errno == EINTR) continue;
194 if (c == 0)
195 throw Xapian::DatabaseError("EOF reading database");
196 throw Xapian::DatabaseError("Error reading database", errno);
198 total += c;
199 if (total >= min)
200 return total;
201 p += c;
202 n -= c;
204 #endif
207 [[noreturn]]
208 static void
209 throw_block_error(const char * s, off_t b, int e = 0)
211 std::string m = s;
212 m += str(b);
213 throw Xapian::DatabaseError(m, e);
216 #ifdef HAVE_POSIX_FADVISE
217 bool
218 io_readahead_block(int fd, size_t n, off_t b, off_t o)
220 o += b * n;
221 // Assume that any failure is likely to also happen for another call with
222 // the same fd.
223 return posix_fadvise(fd, o, n, POSIX_FADV_WILLNEED) == 0;
225 #endif
227 void
228 io_read_block(int fd, char * p, size_t n, off_t b, off_t o)
230 o += b * n;
231 // Prefer pread if available since it's typically implemented as a
232 // separate syscall, and that eliminates the overhead of an extra syscall
233 // per block read.
234 #ifdef HAVE_PREAD
235 while (true) {
236 ssize_t c = pread(fd, p, n, o);
237 // We should get a full read most of the time, so streamline that case.
238 if (usual(c == ssize_t(n)))
239 return;
240 // -1 is error, 0 is EOF
241 if (c <= 0) {
242 // We get EINTR if the syscall was interrupted by a signal.
243 // In this case we should retry the read.
244 if (errno == EINTR) continue;
245 if (c == 0)
246 throw_block_error("EOF reading block ", b);
247 throw_block_error("Error reading block ", b, errno);
249 p += c;
250 n -= c;
251 o += c;
253 #else
254 if (rare(lseek(fd, o, SEEK_SET) < 0))
255 throw_block_error("Error seeking to block ", b, errno);
256 while (true) {
257 ssize_t c = read(fd, p, n);
258 // We should get a full read most of the time, so streamline that case.
259 if (usual(c == ssize_t(n)))
260 return;
261 if (c <= 0) {
262 // We get EINTR if the syscall was interrupted by a signal.
263 // In this case we should retry the read.
264 if (errno == EINTR) continue;
265 if (c == 0)
266 throw_block_error("EOF reading block ", b);
267 throw_block_error("Error reading block ", b, errno);
269 p += c;
270 n -= c;
272 #endif
275 void
276 io_write_block(int fd, const char * p, size_t n, off_t b, off_t o)
278 o += b * n;
279 // Prefer pwrite if available since it's typically implemented as a
280 // separate syscall, and that eliminates the overhead of an extra syscall
281 // per block write.
282 #ifdef HAVE_PWRITE
283 while (true) {
284 ssize_t c = pwrite(fd, p, n, o);
285 // We should get a full write most of the time, so streamline that case.
286 if (usual(c == ssize_t(n)))
287 return;
288 if (c < 0) {
289 // We get EINTR if the syscall was interrupted by a signal.
290 // In this case we should retry the write.
291 if (errno == EINTR) continue;
292 throw_block_error("Error writing block ", b, errno);
294 p += c;
295 n -= c;
296 o += c;
298 #else
299 if (rare(lseek(fd, o, SEEK_SET) < 0))
300 throw_block_error("Error seeking to block ", b, errno);
301 while (true) {
302 ssize_t c = write(fd, p, n);
303 // We should get a full write most of the time, so streamline that case.
304 if (usual(c == ssize_t(n)))
305 return;
306 if (c < 0) {
307 // We get EINTR if the syscall was interrupted by a signal.
308 // In this case we should retry the write.
309 if (errno == EINTR) continue;
310 throw_block_error("Error writing block ", b, errno);
312 p += c;
313 n -= c;
315 #endif
318 bool
319 io_tmp_rename(const std::string & tmp_file, const std::string & real_file)
321 #ifdef EXDEV
322 // We retry on EXDEV a few times as some older Linux kernels are buggy and
323 // fail with EXDEV when the two files are on the same device (as they
324 // always ought to be when this function is used). Don't retry forever in
325 // case someone calls this with files on different devices.
327 // We're not sure exactly which kernels are buggy in this way, but there's
328 // discussion here: http://www.spinics.net/lists/linux-nfs/msg17306.html
330 // Reported at: https://trac.xapian.org/ticket/698
331 int retries = 5;
332 retry:
333 #endif
334 if (posixy_rename(tmp_file.c_str(), real_file.c_str()) < 0) {
335 #ifdef EXDEV
336 if (errno == EXDEV && --retries > 0) goto retry;
337 #endif
338 // With NFS, rename() failing may just mean that the server crashed
339 // after successfully renaming, but before reporting this, and then
340 // the retried operation fails. So we need to check if the source
341 // file still exists, which we do by calling unlink(), since we want
342 // to remove the temporary file anyway.
343 int saved_errno = errno;
344 if (unlink(tmp_file.c_str()) == 0 || errno != ENOENT) {
345 errno = saved_errno;
346 return false;
349 return true;