Refactor handling of input files
[xapian.git] / xapian-applications / omega / diritor.h
blobe15bc41ef2153268b9fafc62f46c87c1949a57f7
1 /** @file diritor.h
2 * @brief Iterator through entries in a directory.
3 */
4 /* Copyright (C) 2007,2008,2010,2011,2012,2013,2014,2015,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef OMEGA_INCLUDED_DIRITOR_H
22 #define OMEGA_INCLUDED_DIRITOR_H
24 #include <cerrno>
25 #include <string>
27 #include "safedirent.h"
28 #include "safefcntl.h"
29 #include "safesysstat.h"
30 #include "safeunistd.h"
32 #include <sys/types.h>
34 #ifndef __WIN32__
35 #include <grp.h> // For getgrgid().
36 #include <pwd.h> // For getpwuid().
37 #endif
39 #include <magic.h>
40 #include <zlib.h>
42 #include "common/noreturn.h"
44 #include "loadfile.h"
45 #include "md5wrap.h"
46 #include "runfilter.h" // For class ReadError.
48 struct FileNotFound { };
50 // Exception to signify changes should be committed, but indexing aborted.
51 class CommitAndExit {
52 std::string msg;
54 public:
55 CommitAndExit(const char * msg_, const std::string & path, int errno_);
56 CommitAndExit(const char * msg_, int errno_);
57 CommitAndExit(const char * msg_, const char * error);
59 const std::string & what() const { return msg; }
62 class DirectoryIterator {
63 #if defined O_NOATIME && O_NOATIME != 0
64 static uid_t euid;
65 #endif
67 static magic_t magic_cookie;
69 std::string path;
70 std::string::size_type path_len;
72 DIR * dir;
73 struct dirent *entry;
74 struct stat statbuf;
75 bool statbuf_valid;
76 bool follow_symlinks;
77 int fd;
79 void call_stat();
81 void ensure_statbuf_valid() {
82 if (!statbuf_valid) {
83 call_stat();
84 statbuf_valid = true;
88 void build_path();
90 void open_fd();
92 void close_fd();
94 public:
96 explicit DirectoryIterator(bool follow_symlinks_)
97 : dir(NULL), follow_symlinks(follow_symlinks_), fd(-1) { }
99 ~DirectoryIterator() {
100 if (dir) closedir(dir);
101 if (fd >= 0) close_fd();
104 /// Start iterating through entries in @a path.
106 // Throws a std::string exception upon failure.
107 void start(const std::string & path);
109 /// Read the next directory entry which doesn't start with ".".
111 // We do this to skip ".", "..", and Unix hidden files.
113 // @return false if there are no more entries.
114 bool next() {
115 if (fd >= 0) close_fd();
116 path.resize(path_len);
117 errno = 0;
118 do {
119 entry = readdir(dir);
120 } while (entry && entry->d_name[0] == '.');
121 statbuf_valid = false;
122 if (entry == NULL && errno != 0) next_failed();
123 return (entry != NULL);
126 XAPIAN_NORETURN(void next_failed() const);
128 const char * leafname() const { return entry->d_name; }
130 const std::string & pathname() const { return path; }
132 typedef enum { REGULAR_FILE, DIRECTORY, OTHER } type;
134 type get_type() {
135 #ifdef DT_UNKNOWN
136 /* Possible values:
137 * DT_UNKNOWN DT_FIFO DT_CHR DT_DIR DT_BLK DT_REG DT_LNK DT_SOCK DT_WHT
139 switch (entry->d_type) {
140 case DT_UNKNOWN:
141 // The current filing system doesn't support d_type.
142 break;
143 case DT_REG:
144 return REGULAR_FILE;
145 case DT_DIR:
146 return DIRECTORY;
147 #ifdef HAVE_LSTAT
148 case DT_LNK:
149 if (follow_symlinks) break;
150 return OTHER;
151 #endif
152 default:
153 return OTHER;
155 #endif
157 ensure_statbuf_valid();
159 if (S_ISREG(statbuf.st_mode)) return REGULAR_FILE;
160 if (S_ISDIR(statbuf.st_mode)) return DIRECTORY;
161 return OTHER;
164 off_t get_size() {
165 ensure_statbuf_valid();
166 return statbuf.st_size;
169 time_t get_mtime() {
170 ensure_statbuf_valid();
171 return statbuf.st_mtime;
174 time_t get_ctime() {
175 ensure_statbuf_valid();
176 return statbuf.st_ctime;
179 const char * get_owner() {
180 #ifndef __WIN32__
181 ensure_statbuf_valid();
182 struct passwd * pwentry = getpwuid(statbuf.st_uid);
183 return pwentry ? pwentry->pw_name : NULL;
184 #else
185 return NULL;
186 #endif
189 const char * get_group() {
190 #ifndef __WIN32__
191 ensure_statbuf_valid();
192 struct group * grentry = getgrgid(statbuf.st_gid);
193 return grentry ? grentry->gr_name : NULL;
194 #else
195 return NULL;
196 #endif
199 bool is_owner_readable() {
200 ensure_statbuf_valid();
201 #ifndef __WIN32__
202 return (statbuf.st_mode & S_IRUSR);
203 #else
204 return (statbuf.st_mode & S_IREAD);
205 #endif
208 bool is_group_readable() {
209 ensure_statbuf_valid();
210 #ifndef __WIN32__
211 return (statbuf.st_mode & S_IRGRP);
212 #else
213 return false;
214 #endif
217 bool is_other_readable() {
218 ensure_statbuf_valid();
219 #ifndef __WIN32__
220 return (statbuf.st_mode & S_IROTH);
221 #else
222 return false;
223 #endif
226 bool try_noatime() {
227 #if defined O_NOATIME && O_NOATIME != 0
228 if (euid == 0) return true;
229 ensure_statbuf_valid();
230 return statbuf.st_uid == euid;
231 #else
232 return false;
233 #endif
236 std::string get_magic_mimetype();
238 std::string file_to_string() {
239 std::string out;
240 if (!load_file_from_fd(get_fd(), out)) {
241 throw ReadError("loading file failed");
243 return out;
246 std::string gzfile_to_string() {
247 int dup_fd = dup(get_fd());
248 if (fd < 0) {
249 throw ReadError("dup() failed");
251 gzFile zfh = gzdopen(dup_fd, "rb");
252 if (zfh == NULL) {
253 throw ReadError("gzdopen() failed");
255 std::string out;
256 char buf[8192];
257 while (true) {
258 int r = gzread(zfh, buf, sizeof(buf));
259 if (r < 0) {
260 gzclose(zfh);
261 throw ReadError("gzread() failed");
263 out.append(buf, r);
264 if (unsigned(r) < sizeof(buf)) break;
266 gzclose(zfh);
267 return out;
270 int get_fd() {
271 if (fd < 0) {
272 open_fd();
273 } else {
274 if (lseek(fd, 0, SEEK_SET) < 0)
275 throw CommitAndExit("Can't rewind file descriptor", path, errno);
277 return fd;
280 bool md5(std::string& md5) {
281 return md5_fd(get_fd(), md5);
285 #endif // OMEGA_INCLUDED_DIRITOR_H