[ci] Fix netbsd job to upgrade existing packages
[xapian.git] / xapian-applications / omega / index_file.h
blobf5c6d2a34d8a31167078d72430dc548ec2dd6414
1 /** @file
2 * @brief Handle indexing a document from a file
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2005 James Aylett
6 * Copyright 2001,2002 Ananova Ltd
7 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2017,2019 Olly Betts
8 * Copyright 2019 Bruno Baruffaldi
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 * USA
26 #ifndef OMEGA_INCLUDED_INDEX_FILE_H
27 #define OMEGA_INCLUDED_INDEX_FILE_H
29 #include <sys/types.h>
30 #include <map>
31 #include <string>
32 #include <xapian.h>
34 class Worker;
35 class DirectoryIterator;
37 enum skip_flags { SKIP_VERBOSE_ONLY = 0x01, SKIP_SHOW_FILENAME = 0x02 };
39 enum empty_body_type {
40 EMPTY_BODY_WARN, EMPTY_BODY_INDEX, EMPTY_BODY_SKIP
43 enum dup_action_type {
44 DUP_SKIP, DUP_CHECK_LAZILY
47 // Commands which take a filename as the last argument, and output UTF-8
48 // text or some other mime type are common, so we handle these with a std::map.
49 struct Filter {
50 std::string cmd;
51 std::string output_type;
52 std::string output_charset;
53 enum {
54 USE_SHELL = 1,
55 PIPE_IN = 2,
56 PIPE_DEV_STDIN = 4,
57 SEEK_DEV_STDIN = 8
59 unsigned flags = 0;
60 /** Set if this is a mapping for a worker sub-process. */
61 Worker* worker = nullptr;
63 Filter() { }
64 explicit Filter(const std::string& cmd_, unsigned flags_ = 0)
65 : cmd(cmd_), output_type(), flags(flags_) { }
66 Filter(const std::string& cmd_, const std::string& output_type_,
67 unsigned flags_ = 0)
68 : cmd(cmd_), output_type(output_type_), flags(flags_) { }
69 Filter(const std::string& cmd_, const std::string& output_type_,
70 const std::string& output_charset_,
71 unsigned flags_ = 0)
72 : cmd(cmd_), output_type(output_type_),
73 output_charset(output_charset_), flags(flags_) { }
74 explicit Filter(Worker* worker_) : worker(worker_) { }
75 bool use_shell() const { return flags & USE_SHELL; }
76 bool input_on_stdin() const {
77 #ifdef HAVE_DEV_STDIN
78 return flags & (PIPE_IN | PIPE_DEV_STDIN | SEEK_DEV_STDIN);
79 #else
80 return flags & PIPE_IN;
81 #endif
83 bool dev_stdin() const {
84 #ifdef HAVE_DEV_STDIN
85 return flags & (PIPE_DEV_STDIN | SEEK_DEV_STDIN);
86 #else
87 return false;
88 #endif
92 extern std::map<std::string, Filter> commands;
94 inline void
95 index_library(const std::string& type, Worker* worker)
97 commands[type] = Filter(worker);
100 inline void
101 index_command(const std::string& type, const Filter& filter)
103 commands[type] = filter;
106 inline void
107 index_command(const char* type, const Filter& filter)
109 commands[type] = filter;
112 void
113 skip(const std::string& urlterm, const std::string& context,
114 const std::string& msg,
115 off_t size, time_t last_mod, unsigned flags = 0);
117 /// Call index_command() to set up the default command filters.
118 void
119 index_add_default_filters();
121 /// Call to set up the default libraries.
122 void
123 index_add_default_libraries();
125 /// Initialise.
126 void
127 index_init(const std::string& dbpath, const Xapian::Stem& stemmer,
128 const std::string& root_,
129 const std::string& site_term_, const std::string& host_term_,
130 empty_body_type empty_body_, dup_action_type dup_action_,
131 size_t sample_size_, size_t title_size_,
132 size_t max_ext_len_,
133 bool overwrite, bool retry_failed_,
134 bool delete_removed_documents, bool verbose_, bool use_ctime_,
135 bool spelling, bool ignore_exclusions_, bool description_as_sample,
136 bool date_terms);
138 void
139 index_remove_failed_entry(const std::string& urlterm);
141 void
142 index_add_document(const std::string& urlterm, time_t last_altered,
143 Xapian::docid did, const Xapian::Document& doc);
145 /// Index a file into the database.
146 void
147 index_mimetype(const std::string& file,
148 const std::string& urlterm,
149 const std::string& url,
150 const std::string& ext,
151 std::string mimetype,
152 DirectoryIterator& d,
153 std::string pathterm,
154 std::string record);
156 /// Delete any previously indexed documents we haven't seen.
157 void index_handle_deletion();
159 /// Commit any pending changes.
160 void index_commit();
162 /// Clean up and release any resources, etc.
163 void index_done();
165 #endif // OMEGA_INCLUDED_INDEX_FILE_H