Drop special handling for Compaq C++
[xapian.git] / xapian-applications / omega / diritor.cc
blob2a472bf65648a6da5dc2e45616cfa0499741f8ec
1 /* diritor.cc: Iterator through entries in a directory.
3 * Copyright (C) 2007,2008,2010,2011,2012,2013,2014 Olly Betts
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include <config.h>
22 #include "diritor.h"
24 #include "safeunistd.h"
25 #include <sys/types.h>
27 #include <cerrno>
28 #include <cstring>
30 using namespace std;
32 CommitAndExit::CommitAndExit(const char * msg_, const std::string & path,
33 int errno_)
35 msg = msg_;
36 msg += " \"";
37 msg += path;
38 msg += "\" (";
39 msg += strerror(errno_);
40 msg += ")";
43 CommitAndExit::CommitAndExit(const char * msg_, int errno_)
45 msg = msg_;
46 msg += " (";
47 msg += strerror(errno_);
48 msg += ")";
51 CommitAndExit::CommitAndExit(const char * msg_, const char * error)
53 msg = msg_;
54 msg += " (";
55 msg += error;
56 msg += ")";
59 #if defined O_NOATIME && O_NOATIME != 0
60 uid_t DirectoryIterator::euid = geteuid();
61 #endif
63 magic_t DirectoryIterator::magic_cookie = NULL;
65 void
66 DirectoryIterator::call_stat()
68 build_path();
69 int retval;
70 if (fd >= 0) {
71 retval = fstat(fd, &statbuf);
72 #ifdef HAVE_LSTAT
73 } else if (!follow_symlinks) {
74 retval = lstat(path.c_str(), &statbuf);
75 #endif
76 } else {
77 retval = stat(path.c_str(), &statbuf);
79 if (retval == -1) {
80 if (errno == ENOENT || errno == ENOTDIR)
81 throw FileNotFound();
82 if (errno == EACCES)
83 throw string(strerror(errno));
84 // Commit changes to files processed so far.
85 throw CommitAndExit("Can't stat", path, errno);
89 void
90 DirectoryIterator::build_path()
92 if (path.length() == path_len) {
93 path += '/';
94 path += leafname();
98 void
99 DirectoryIterator::start(const std::string & path_)
101 if (dir) closedir(dir);
102 path = path_;
103 path_len = path.length();
104 dir = opendir(path.c_str());
105 if (dir == NULL) {
106 if (errno == ENOENT || errno == ENOTDIR)
107 throw FileNotFound();
108 if (errno == EACCES)
109 throw string(strerror(errno));
110 // Commit changes to files processed so far.
111 throw CommitAndExit("Can't open directory", path, errno);
115 void
116 DirectoryIterator::next_failed() const
118 // The Linux getdents() syscall (which readdir uses internally) is
119 // documented as being able to return ENOENT and ENOTDIR. Also,
120 // EACCES has been observed here on CIFS mounts.
121 if (errno == ENOENT || errno == ENOTDIR)
122 throw FileNotFound();
123 if (errno == EACCES)
124 throw string(strerror(errno));
125 throw CommitAndExit("Can't read next entry from directory", path, errno);
128 string
129 DirectoryIterator::get_magic_mimetype()
131 if (rare(magic_cookie == NULL)) {
132 #ifdef MAGIC_MIME_TYPE
133 magic_cookie = magic_open(MAGIC_SYMLINK|MAGIC_MIME_TYPE|MAGIC_ERROR);
134 #else
135 // MAGIC_MIME_TYPE was added in 4.22, released 2007-12-27. If we don't
136 // have it then use MAGIC_MIME instead and trim any encoding off below.
137 magic_cookie = magic_open(MAGIC_SYMLINK|MAGIC_MIME|MAGIC_ERROR);
138 #endif
139 if (magic_cookie == NULL) {
140 // Commit changes to files processed so far.
141 throw CommitAndExit("Failed to initialise the file magic library",
142 errno);
144 if (magic_load(magic_cookie, NULL) == -1) {
145 // Commit changes to files processed so far.
146 const char * err = magic_error(magic_cookie);
147 throw CommitAndExit("Failed to load the file magic database", err);
151 const char * res = NULL;
152 // Prior to 5.15, magic_descriptor() closed the fd passed, so avoid it.
153 #if defined MAGIC_VERSION && MAGIC_VERSION - 0 >= 515
154 if (fd >= 0) {
155 if (lseek(fd, 0, SEEK_SET) == 0)
156 res = magic_descriptor(magic_cookie, fd);
157 } else
158 #endif
160 build_path();
161 res = magic_file(magic_cookie, path.c_str());
163 if (!res) {
164 const char * err = magic_error(magic_cookie);
165 if (rare(err)) {
166 int eno = magic_errno(magic_cookie);
167 if (eno == ENOENT || eno == ENOTDIR)
168 throw FileNotFound();
169 string m("Failed to use magic on file: ");
170 m += err;
171 throw m;
173 return string();
176 // Sometimes libmagic returns this string instead of a mime-type for some
177 // Microsoft documents, so pick a suitable MIME content-type based on the
178 // extension. Newer versions seem to return "application/CDFV2-corrupt"
179 // instead for this case (on Debian, file 5.11 gives the former and file
180 // 5.18 the latter).
181 #define COMPOSITE_DOC "Composite Document File V2 Document"
182 if (strncmp(res, COMPOSITE_DOC, sizeof(COMPOSITE_DOC) - 1) == 0 ||
183 strcmp(res, "application/CDFV2-corrupt") == 0) {
184 // Default to something self-explanatory.
185 res = "application/x-compound-document-file";
186 const char * leaf = leafname();
187 const char * ext = strrchr(leaf, '.');
188 if (ext && strlen(++ext) == 3) {
189 char e[3];
190 for (int i = 0; i != 3; ++i) {
191 if (ext[i] <= 'Z' && ext[i] >= 'A')
192 e[i] = ext[i] + ('a' - 'A');
193 else
194 e[i] = ext[i];
196 switch (e[0]) {
197 case 'd':
198 if (e[1] == 'o')
199 res = "application/msword";
200 break;
201 case 'm':
202 if (e[1] == 's' && e[2] == 'g')
203 res = "application/vnd.ms-outlook";
204 break;
205 case 'p':
206 if (e[1] == 'p' || e[1] == 'o')
207 res = "application/vnd.ms-powerpoint";
208 else if (e[1] == 'u' && e[2] == 'b')
209 res = "application/x-mspublisher";
210 break;
211 case 'x':
212 if (e[1] == 'l')
213 res = "application/vnd.ms-excel";
214 break;
215 case 'w':
216 if (e[1] == 'p' && e[2] != 'd')
217 res = "application/vnd.ms-works";
218 break;
221 } else {
222 #ifndef MAGIC_MIME_TYPE
223 // Discard any encoding returned.
224 char * spc = strchr(res, ' ');
225 if (spc)
226 *spc = '\0';
227 #endif
230 return res;