Avoid using the shell to run most external commands
[xapian.git] / xapian-applications / omega / index_file.h
blobc0402d4c1394e64b1658af019d4744f164db4b1d
1 /** @file index_file.h
2 * @brief Handle indexing a document from a file
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2005 James Aylett
6 * Copyright 2001,2002 Ananova Ltd
7 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015 Olly Betts
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef OMEGA_INCLUDED_INDEX_FILE_H
26 #define OMEGA_INCLUDED_INDEX_FILE_H
28 #include <sys/types.h>
29 #include <map>
30 #include <string>
31 #include <xapian.h>
33 class DirectoryIterator;
35 enum skip_flags { SKIP_VERBOSE_ONLY = 0x01, SKIP_SHOW_FILENAME = 0x02 };
37 enum empty_body_type {
38 EMPTY_BODY_WARN, EMPTY_BODY_INDEX, EMPTY_BODY_SKIP
41 enum dup_action_type {
42 DUP_SKIP, DUP_CHECK_LAZILY, DUP_CHECK_PARANOID
45 // Commands which take a filename as the last argument, and output UTF-8
46 // text or some other mime type are common, so we handle these with a std::map.
47 struct Filter {
48 std::string cmd;
49 std::string output_type;
50 std::string output_charset;
51 bool no_shell;
52 Filter() : cmd(), output_type(), no_shell(false) { }
53 explicit Filter(const std::string & cmd_, bool use_shell = true)
54 : cmd(cmd_), output_type(), no_shell(!use_shell) { }
55 Filter(const std::string & cmd_, const std::string & output_type_,
56 bool use_shell = true)
57 : cmd(cmd_), output_type(output_type_), no_shell(!use_shell) { }
58 Filter(const std::string & cmd_, const std::string & output_type_,
59 const std::string & output_charset_,
60 bool use_shell = true)
61 : cmd(cmd_), output_type(output_type_),
62 output_charset(output_charset_), no_shell(!use_shell) { }
63 bool use_shell() const { return !no_shell; }
66 extern std::map<std::string, Filter> commands;
68 inline void
69 index_command(const std::string & type, const Filter & filter)
71 commands[type] = filter;
74 inline void
75 index_command(const char * type, const Filter & filter)
77 commands[type] = filter;
80 void
81 skip(const std::string & urlterm, const std::string & context,
82 const std::string & msg,
83 off_t size, time_t last_mod, unsigned flags = 0);
85 /// Call index_command() to set up the default command filters.
86 void
87 index_add_default_filters();
89 /// Initialise.
90 void
91 index_init(const std::string & dbpath, const Xapian::Stem & stemmer,
92 const std::string & root_, const std::string & site_term_,
93 const std::string & host_term_,
94 empty_body_type empty_body_, dup_action_type dup_action_,
95 size_t sample_size_, size_t title_size_, size_t max_ext_len_,
96 bool overwrite, bool retry_failed_,
97 bool delete_removed_documents, bool verbose_, bool use_ctime_,
98 bool spelling, bool ignore_exclusions_);
100 void
101 index_add_document(const std::string & urlterm, time_t last_altered,
102 Xapian::docid did, const Xapian::Document & doc);
104 /// Index a file into the database.
105 void
106 index_mimetype(const std::string & file, const std::string & urlterm,
107 const std::string & url,
108 const std::string & ext,
109 const std::string &mimetype, DirectoryIterator &d,
110 Xapian::Document &doc,
111 std::string record);
113 /// Delete any previously indexed documents we haven't seen.
114 void index_handle_deletion();
116 /// Commit any pending changes.
117 void index_commit();
119 /// Clean up and release any resources, etc.
120 void index_done();
122 #endif // OMEGA_INCLUDED_INDEX_FILE_H