3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2001 Sam Liddicott
5 * Copyright 2001,2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2017,2018 Olly Betts
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
38 #include "safeerrno.h"
42 #include "commonhelp.h"
45 #include "myhtmlparse.h"
47 #include "stringutils.h"
49 #include "utf8truncate.h"
53 #include "gnu_getopt.h"
57 #define PROG_NAME "scriptindex"
58 #define PROG_DESC "index arbitrary data as described by an index script"
66 prefix_needs_colon(const string
& prefix
, unsigned ch
)
68 if (!C_isupper(ch
) && ch
!= ':') return false;
69 string::size_type len
= prefix
.length();
70 return (len
> 1 && prefix
[len
- 1] != ':');
73 const char * action_names
[] = {
75 "boolean", "date", "field", "hash", "hextobin", "index", "indexnopos",
76 "load", "lower", "parsedate", "spell", "truncate", "unhtml", "unique",
77 "value", "valuenumeric", "valuepacked", "weight"
81 #define DUMP_ACTION(A) cout << action_names[(A).get_action()] << "(" << (A).get_string_arg() << "," << (A).get_num_arg() << ")" << endl
87 BOOLEAN
, DATE
, FIELD
, HASH
, HEXTOBIN
, INDEX
, INDEXNOPOS
, LOAD
, LOWER
,
88 PARSEDATE
, SPELL
, TRUNCATE
, UNHTML
, UNIQUE
, VALUE
,
89 VALUENUMERIC
, VALUEPACKED
, WEIGHT
96 Action(type action_
) : action(action_
), num_arg(0) { }
97 Action(type action_
, const string
& arg
)
98 : action(action_
), string_arg(arg
) {
99 num_arg
= atoi(string_arg
.c_str());
101 Action(type action_
, const string
& arg
, int num
)
102 : action(action_
), num_arg(num
), string_arg(arg
) { }
103 type
get_action() const { return action
; }
104 int get_num_arg() const { return num_arg
; }
105 void set_num_arg(int num
) { num_arg
= num
; }
106 const string
& get_string_arg() const { return string_arg
; }
110 report_useless_action(const string
&file
, size_t line
, size_t pos
,
111 const string
&action
)
113 cerr
<< file
<< ':' << line
;
114 if (pos
!= string::npos
) cerr
<< ':' << pos
;
115 cerr
<< ": Warning: Index action '" << action
<< "' has no effect" << endl
;
117 static bool given_left_to_right_warning
= false;
118 if (!given_left_to_right_warning
) {
119 given_left_to_right_warning
= true;
120 cerr
<< file
<< ':' << line
121 << ": Warning: Note that actions are executed from left to right"
126 static map
<string
, vector
<Action
> > index_spec
;
129 parse_index_script(const string
&filename
)
131 ifstream
script(filename
.c_str());
132 if (!script
.is_open()) {
133 cerr
<< filename
<< ": " << strerror(errno
) << endl
;
138 bool had_unique
= false;
139 while (getline(script
, line
)) {
141 vector
<string
> fields
;
142 vector
<Action
> actions
;
143 string::const_iterator i
, j
;
144 const string
&s
= line
;
145 i
= find_if(s
.begin(), s
.end(), [](char ch
) { return !C_isspace(ch
); });
146 if (i
== s
.end() || *i
== '#') continue;
148 if (!C_isalnum(*i
)) {
149 cerr
<< filename
<< ':' << line_no
150 << ": field name must start with alphanumeric" << endl
;
153 j
= find_if(i
, s
.end(),
154 [](char ch
) { return !C_isalnum(ch
) && ch
!= '_'; });
155 fields
.push_back(string(i
, j
));
156 i
= find_if(j
, s
.end(), [](char ch
) { return !C_isspace(ch
); });
157 if (i
== s
.end()) break;
160 i
= find_if(i
, s
.end(), [](char ch
) { return !C_isspace(ch
); });
164 cerr
<< filename
<< ':' << line_no
165 << ": bad character '" << *j
<< "' in fieldname" << endl
;
169 Xapian::termcount weight
= 1;
170 size_t useless_weight_pos
= string::npos
;
171 map
<string
, Action::type
> boolmap
;
173 while (j
!= s
.end()) {
174 i
= find_if(j
, s
.end(), [](char ch
) { return !C_isalnum(ch
); });
175 string
action(s
, j
- s
.begin(), i
- j
);
176 Action::type code
= Action::BAD
;
177 unsigned min_args
= 0, max_args
= 0;
178 bool takes_integer_argument
= false;
179 if (!action
.empty()) {
182 if (action
== "boolean") {
183 code
= Action::BOOLEAN
;
188 if (action
== "date") {
190 min_args
= max_args
= 1;
194 if (action
== "field") {
195 code
= Action::FIELD
;
200 if (action
== "hash") {
203 takes_integer_argument
= true;
204 } else if (action
== "hextobin") {
205 code
= Action::HEXTOBIN
;
209 if (action
== "index") {
210 code
= Action::INDEX
;
212 } else if (action
== "indexnopos") {
213 code
= Action::INDEXNOPOS
;
218 if (action
== "lower") {
219 code
= Action::LOWER
;
220 } else if (action
== "load") {
225 if (action
== "parsedate") {
226 code
= Action::PARSEDATE
;
227 min_args
= max_args
= 1;
231 if (action
== "spell") {
232 code
= Action::SPELL
;
236 if (action
== "truncate") {
237 code
= Action::TRUNCATE
;
238 min_args
= max_args
= 1;
239 takes_integer_argument
= true;
243 if (action
== "unhtml") {
244 code
= Action::UNHTML
;
245 } else if (action
== "unique") {
246 code
= Action::UNIQUE
;
247 min_args
= max_args
= 1;
251 if (action
== "value") {
252 code
= Action::VALUE
;
253 min_args
= max_args
= 1;
254 takes_integer_argument
= true;
255 } else if (action
== "valuenumeric") {
256 code
= Action::VALUENUMERIC
;
257 min_args
= max_args
= 1;
258 takes_integer_argument
= true;
259 } else if (action
== "valuepacked") {
260 code
= Action::VALUEPACKED
;
261 min_args
= max_args
= 1;
262 takes_integer_argument
= true;
266 if (action
== "weight") {
267 code
= Action::WEIGHT
;
268 min_args
= max_args
= 1;
269 takes_integer_argument
= true;
274 if (code
== Action::BAD
) {
275 cerr
<< filename
<< ':' << line_no
276 << ": Unknown index action '" << action
<< "'" << endl
;
279 auto i_after_action
= i
;
280 i
= find_if(i
, s
.end(), [](char ch
) { return !C_isspace(ch
); });
282 if (i
!= s
.end() && *i
== '=') {
283 if (i
!= i_after_action
) {
284 cerr
<< filename
<< ':' << line_no
285 << ": warning: putting spaces between the action and "
286 "'=' is deprecated." << endl
;
290 cerr
<< filename
<< ':' << line_no
291 << ": Index action '" << action
292 << "' doesn't take an argument" << endl
;
297 j
= find_if(i
, s
.end(), [](char ch
) { return !C_isspace(ch
); });
299 cerr
<< filename
<< ':' << line_no
300 << ": warning: putting spaces between '=' and the "
301 "argument is deprecated." << endl
;
306 if (j
!= s
.end() && *j
== '"') {
309 i
= find(j
, s
.end(), '"');
311 cerr
<< filename
<< ':' << line_no
312 << ": No closing quote" << endl
;
315 vals
.emplace_back(j
, i
);
317 if (i
== s
.end() || C_isspace(*i
)) break;
319 cerr
<< filename
<< ':' << line_no
320 << ": Unexpected character '" << *i
321 << "' after closing quote" << endl
;
325 } else if (max_args
> 1) {
326 // Unquoted argument, split on comma.
327 i
= find_if(j
, s
.end(),
329 return C_isspace(ch
) || ch
== ',';
331 vals
.emplace_back(j
, i
);
332 if (*i
!= ',') break;
335 // Unquoted argument, including any commas.
336 i
= find_if(j
, s
.end(),
337 [](char ch
) { return C_isspace(ch
); });
338 vals
.emplace_back(j
, i
);
342 if (vals
.size() == max_args
) {
343 cerr
<< filename
<< ':' << line_no
344 << ": Index action '" << action
345 << "' takes at most " << max_args
<< " arguments"
351 if (vals
.size() < min_args
) {
352 if (min_args
== max_args
) {
353 cerr
<< filename
<< ':' << line_no
354 << ": Index action '" << action
355 << "' requires " << min_args
<< " arguments"
359 cerr
<< filename
<< ':' << line_no
360 << ": Index action '" << action
361 << "' requires at least " << min_args
<< " arguments"
371 if (takes_integer_argument
) {
372 if (val
.find('.') != string::npos
) {
373 cerr
<< filename
<< ':' << line_no
374 << ": Warning: Index action '" << action
375 << "' takes an integer argument" << endl
;
380 case Action::INDEXNOPOS
:
381 actions
.emplace_back(code
, val
, weight
);
382 useless_weight_pos
= string::npos
;
385 // We don't push an Action for WEIGHT - instead we
386 // store it ready to use in the INDEX and INDEXNOPOS
388 weight
= atoi(val
.c_str());
389 if (useless_weight_pos
!= string::npos
) {
390 report_useless_action(filename
, line_no
,
391 useless_weight_pos
, action
);
393 useless_weight_pos
= j
- s
.begin();
395 case Action::TRUNCATE
:
396 if (!actions
.empty() &&
397 actions
.back().get_action() == Action::LOAD
) {
398 /* Turn "load truncate=n" into "load" with
399 * num_arg n, so that we don't needlessly
400 * allocate memory and read data we're just
406 actions
.emplace_back(code
, val
);
410 cerr
<< filename
<< ':' << line_no
411 << ": Index action 'unique' used more than "
416 if (boolmap
.find(val
) == boolmap
.end())
417 boolmap
[val
] = Action::UNIQUE
;
418 actions
.emplace_back(code
, val
);
421 actions
.emplace_back(code
, val
);
422 auto& obj
= actions
.back();
423 auto max_length
= obj
.get_num_arg();
424 if (max_length
< 6) {
425 cerr
<< filename
<< ':' << line_no
426 << ": Index action 'hash' takes an integer "
427 "argument which must be at least 6" << endl
;
432 case Action::BOOLEAN
:
433 boolmap
[val
] = Action::BOOLEAN
;
436 actions
.emplace_back(code
, val
);
438 i
= find_if(i
, s
.end(), [](char ch
) { return !C_isspace(ch
); });
441 if (min_args
== max_args
) {
442 cerr
<< filename
<< ':' << line_no
443 << ": Index action '" << action
444 << "' requires " << min_args
<< " arguments"
448 cerr
<< filename
<< ':' << line_no
449 << ": Index action '" << action
450 << "' requires at least " << min_args
<< " arguments"
454 if (code
== Action::INDEX
|| code
== Action::INDEXNOPOS
) {
455 useless_weight_pos
= string::npos
;
456 actions
.emplace_back(code
, "", weight
);
457 } else if (code
== Action::HASH
) {
458 actions
.emplace_back(code
, "", MAX_SAFE_TERM_LENGTH
- 1);
460 actions
.emplace_back(code
);
466 if (useless_weight_pos
!= string::npos
) {
467 report_useless_action(filename
, line_no
, useless_weight_pos
,
471 while (!actions
.empty()) {
473 Action::type action
= actions
.back().get_action();
476 case Action::HEXTOBIN
:
478 case Action::PARSEDATE
:
480 case Action::TRUNCATE
:
483 report_useless_action(filename
, line_no
, string::npos
,
484 action_names
[action
]);
493 map
<string
, Action::type
>::const_iterator boolpfx
;
494 for (boolpfx
= boolmap
.begin(); boolpfx
!= boolmap
.end(); ++boolpfx
) {
495 if (boolpfx
->second
== Action::UNIQUE
) {
496 cerr
<< filename
<< ':' << line_no
497 << ": Warning: Index action 'unique=" << boolpfx
->first
498 << "' without 'boolean=" << boolpfx
->first
<< "'" << endl
;
499 static bool given_doesnt_imply_boolean_warning
= false;
500 if (!given_doesnt_imply_boolean_warning
) {
501 given_doesnt_imply_boolean_warning
= true;
502 cerr
<< filename
<< ':' << line_no
503 << ": Warning: Note 'unique' doesn't implicitly add "
504 "a boolean term" << endl
;
509 vector
<string
>::const_iterator field
;
510 for (field
= fields
.begin(); field
!= fields
.end(); ++field
) {
511 vector
<Action
> &v
= index_spec
[*field
];
513 if (fields
.size() == 1) {
514 // Optimise common case where there's only one fieldname
515 // for a list of actions.
516 v
= std::move(actions
);
521 v
.emplace_back(Action::NEW
);
522 v
.insert(v
.end(), actions
.begin(), actions
.end());
527 if (index_spec
.empty()) {
528 cerr
<< filename
<< ": No rules found in index script" << endl
;
534 index_file(const char *fname
, istream
&stream
,
535 Xapian::WritableDatabase
&database
, Xapian::TermGenerator
&indexer
)
539 while (!stream
.eof() && getline(stream
, line
)) {
541 Xapian::Document doc
;
542 indexer
.set_document(doc
);
543 Xapian::docid docid
= 0;
544 map
<string
, list
<string
> > fields
;
545 bool seen_content
= false;
546 while (!line
.empty()) {
547 // Cope with files from MS Windows (\r\n end of lines).
548 // Trim multiple \r characters, since that seems the best way
549 // to handle that case.
550 string::size_type last
= line
.find_last_not_of('\r');
551 if (last
== string::npos
) break;
552 line
.resize(last
+ 1);
554 string::size_type eq
= line
.find('=');
555 if (eq
== string::npos
&& !line
.empty()) {
556 cerr
<< fname
<< ':' << line_no
<< ": expected = somewhere "
557 "in this line" << endl
;
558 // FIXME: die or what?
560 string
field(line
, 0, eq
);
561 string
value(line
, eq
+ 1, string::npos
);
562 while (getline(stream
, line
)) {
564 if (line
.empty() || line
[0] != '=') break;
565 // Cope with files from MS Windows (\r\n end of lines).
566 // Trim multiple \r characters, since that seems the best way
567 // to handle that case.
568 last
= line
.find_last_not_of('\r');
569 // line[0] == '=', so last != string::npos.
570 // Replace the '=' with a '\n' so we don't have to use substr.
572 line
.resize(last
+ 1);
576 // Default to not indexing spellings.
577 indexer
.set_flags(Xapian::TermGenerator::flags(0));
579 const vector
<Action
> &v
= index_spec
[field
];
580 string old_value
= value
;
581 vector
<Action
>::const_iterator i
;
582 bool this_field_is_content
= true;
583 for (i
= v
.begin(); i
!= v
.end(); ++i
) {
584 switch (i
->get_action()) {
589 // We're processing the same field again - give it a
591 this_field_is_content
= true;
594 if (!value
.empty()) {
595 string f
= i
->get_string_arg();
596 if (f
.empty()) f
= field
;
597 // replace newlines with spaces
599 string::size_type j
= 0;
600 while ((j
= s
.find('\n', j
)) != string::npos
)
602 fields
[f
].push_back(s
);
606 indexer
.index_text(value
,
608 i
->get_string_arg());
610 case Action::INDEXNOPOS
:
611 // No positional information so phrase searching
612 // won't work. However, the database will use much
614 indexer
.index_text_without_positions(value
,
616 i
->get_string_arg());
618 case Action::BOOLEAN
: {
619 // Do nothing if there's no text.
620 if (value
.empty()) break;
622 string term
= i
->get_string_arg();
623 if (prefix_needs_colon(term
, value
[0])) term
+= ':';
626 doc
.add_boolean_term(term
);
630 unsigned int max_length
= i
->get_num_arg();
631 if (value
.length() > max_length
)
632 value
= hash_long_term(value
, max_length
);
635 case Action::HEXTOBIN
: {
636 size_t len
= value
.length();
638 cerr
<< "hextobin: input must have even length"
642 output
.reserve(len
/ 2);
643 for (size_t j
= 0; j
< len
; j
+= 2) {
645 char b
= value
[j
+ 1];
646 if (!C_isxdigit(a
) || !C_isxdigit(b
)) {
647 cerr
<< "hextobin: input must be all hex "
651 char r
= (hex_digit(a
) << 4) | hex_digit(b
);
654 value
= std::move(output
);
660 value
= Xapian::Unicode::tolower(value
);
663 bool truncated
= false;
664 // FIXME: Use NOATIME if we own the file or are root.
665 if (!load_file(value
, i
->get_num_arg(), NOCACHE
,
667 cerr
<< "Couldn't load file '" << value
<< "': "
668 << strerror(errno
) << endl
;
671 if (!truncated
) break;
674 case Action::TRUNCATE
:
675 utf8_truncate(value
, i
->get_num_arg());
678 indexer
.set_flags(indexer
.FLAG_SPELLING
);
680 case Action::UNHTML
: {
683 // Default HTML character set is latin 1, though
684 // not specifying one is deprecated these days.
685 p
.parse_html(value
, "iso-8859-1", false);
686 } catch (const string
& newcharset
) {
688 p
.parse_html(value
, newcharset
, true);
690 if (p
.indexing_allowed
)
696 case Action::UNIQUE
: {
697 // If there's no text, just issue a warning.
699 cerr
<< fname
<< ':' << line_no
700 << ": Ignoring UNIQUE action on empty text"
705 // Ensure that the value of this field is unique.
706 // If a record already exists with the same value,
707 // it will be replaced with the new record.
709 // Unique fields aren't considered content - if
710 // there are no other fields in the document, the
711 // document is to be deleted.
712 this_field_is_content
= false;
714 // Argument is the prefix to add to the field value
715 // to get the unique term.
716 string t
= i
->get_string_arg();
717 if (prefix_needs_colon(t
, value
[0])) t
+= ':';
721 Xapian::PostingIterator p
= database
.postlist_begin(t
);
722 if (p
!= database
.postlist_end(t
)) {
725 } catch (const Xapian::Error
&e
) {
726 // Hmm, what happened?
727 cerr
<< "Caught exception in UNIQUE!" << endl
;
728 cerr
<< "E: " << e
.get_description() << endl
;
736 doc
.add_value(i
->get_num_arg(), value
);
738 case Action::VALUENUMERIC
: {
739 if (value
.empty()) break;
741 double dbl
= strtod(value
.c_str(), &end
);
743 cerr
<< fname
<< ':' << line_no
<< ": Warning: "
744 "Trailing characters in VALUENUMERIC: '"
745 << value
<< "'" << endl
;
747 doc
.add_value(i
->get_num_arg(),
748 Xapian::sortable_serialise(dbl
));
751 case Action::VALUEPACKED
: {
753 if (value
.empty() || !C_isdigit(value
[0])) {
754 // strtoul() accepts leading whitespace and negated
755 // values, neither of which we want to allow.
760 word
= strtoul(value
.c_str(), &q
, 10);
761 if (!errno
&& *q
!= '\0') {
762 // Trailing characters after converted value.
767 cerr
<< fname
<< ':' << line_no
<< ": Warning: "
768 "valuepacked \"" << value
<< "\" ";
769 if (errno
== ERANGE
) {
770 cerr
<< "out of range";
772 cerr
<< "not an unsigned integer";
776 int valueslot
= i
->get_num_arg();
777 doc
.add_value(valueslot
, int_to_binary_string(word
));
781 const string
& type
= i
->get_string_arg();
783 if (type
== "unix") {
784 time_t t
= atoi(value
.c_str());
785 struct tm
*tm
= localtime(&t
);
786 int y
= tm
->tm_year
+ 1900;
787 int m
= tm
->tm_mon
+ 1;
788 yyyymmdd
= date_to_string(y
, m
, tm
->tm_mday
);
789 } else if (type
== "yyyymmdd") {
790 if (value
.length() == 8) yyyymmdd
= value
;
792 if (yyyymmdd
.empty()) break;
794 doc
.add_boolean_term("D" + yyyymmdd
);
797 doc
.add_boolean_term("M" + yyyymmdd
);
800 doc
.add_boolean_term("Y" + yyyymmdd
);
803 case Action::PARSEDATE
: {
804 string dateformat
= i
->get_string_arg();
806 memset(&tm
, 0, sizeof(tm
));
807 auto ret
= strptime(value
.c_str(), dateformat
.c_str(), &tm
);
809 cerr
<< fname
<< ':' << line_no
<< ": Warning: "
810 "\"" << value
<< "\" doesn't match format "
811 "\"" << dateformat
<< '\"' << endl
;
816 cerr
<< fname
<< ':' << line_no
<< ": Warning: "
817 "\"" << value
<< "\" not fully matched by "
818 "format \"" << dateformat
<< "\" "
819 "(\"" << ret
<< "\" left over) but "
820 "indexing anyway" << endl
;
823 value
= str(timegm(&tm
));
827 /* Empty default case to avoid "unhandled enum value"
832 if (this_field_is_content
) seen_content
= true;
833 if (stream
.eof()) break;
836 // If we haven't seen any fields (other than unique identifiers)
837 // the document is to be deleted.
840 database
.delete_document(docid
);
841 if (verbose
) cout
<< "Del: " << docid
<< endl
;
846 for (auto&& i
: fields
) {
847 for (auto&& field_val
: i
.second
) {
855 // Put the data in the document
858 // Add the document to the database
861 database
.replace_document(docid
, doc
);
862 if (verbose
) cout
<< "Replace: " << docid
<< endl
;
864 } catch (const Xapian::Error
&e
) {
865 cerr
<< "E: " << e
.get_description() << endl
;
866 // Possibly the document was deleted by another
867 // process in the meantime...?
868 docid
= database
.add_document(doc
);
869 cerr
<< "Replace failed, adding as new: " << docid
<< endl
;
872 docid
= database
.add_document(doc
);
873 if (verbose
) cout
<< "Add: " << docid
<< endl
;
879 // Commit after each file to make sure all changes from that file make it
881 if (verbose
) cout
<< "Committing: " << endl
;
886 main(int argc
, char **argv
)
888 // If the database already exists, default to updating not overwriting.
889 int database_mode
= Xapian::DB_CREATE_OR_OPEN
;
891 Xapian::Stem
stemmer("english");
893 static const struct option longopts
[] = {
894 { "help", no_argument
, NULL
, 'h' },
895 { "version", no_argument
, NULL
, 'V' },
896 { "stemmer", required_argument
, NULL
, 's' },
897 { "overwrite", no_argument
, NULL
, 'o' },
898 { "verbose", no_argument
, NULL
, 'v' },
902 bool more
= true, show_help
= false;
904 switch (gnu_getopt_long(argc
, argv
, "vs:hV", longopts
, NULL
)) {
913 case 'V': // --version
914 print_package_info(PROG_NAME
);
916 case 'o': // --overwrite
917 database_mode
= Xapian::DB_CREATE_OR_OVERWRITE
;
924 stemmer
= Xapian::Stem(optarg
);
925 } catch (const Xapian::InvalidArgumentError
&) {
926 cerr
<< "Unknown stemming language '" << optarg
<< "'.\n";
927 cerr
<< "Available language names are: "
928 << Xapian::Stem::get_available_languages() << endl
;
937 if (show_help
|| argc
< 2) {
938 cout
<< PROG_NAME
" - " PROG_DESC
"\n"
939 "Usage: " PROG_NAME
" [OPTIONS] DATABASE INDEXER_SCRIPT [INPUT_FILE]...\n"
941 "Creates or updates a Xapian database with the data from the input files listed\n"
942 "on the command line. If no files are specified, data is read from stdin.\n"
944 "See https://xapian.org/docs/omega/scriptindex.html for documentation of the\n"
945 "format for INDEXER_SCRIPT.\n"
948 " -v, --verbose display additional messages to aid debugging\n"
949 " --overwrite create the database anew (the default is to update if\n"
950 " the database already exists)\n";
951 print_stemmer_help("");
952 print_help_and_version_help("");
953 exit(show_help
? 0 : 1);
956 parse_index_script(argv
[1]);
958 // Open the database. If another process is currently updating the
959 // database, wait for the lock to become available.
960 auto flags
= database_mode
| Xapian::DB_RETRY_LOCK
;
961 Xapian::WritableDatabase
database(argv
[0], flags
);
963 Xapian::TermGenerator indexer
;
964 indexer
.set_stemmer(stemmer
);
965 // Set the database for spellings to be added to by the "spell" action.
966 indexer
.set_database(database
);
974 index_file("<stdin>", cin
, database
, indexer
);
976 // Read file(s) listed on the command line.
977 for (int i
= 2; i
< argc
; ++i
) {
978 ifstream
stream(argv
[i
]);
980 index_file(argv
[i
], stream
, database
, indexer
);
982 cerr
<< "Can't open file " << argv
[i
] << endl
;
987 cout
<< "records (added, replaced, deleted) = (" << addcount
<< ", "
988 << repcount
<< ", " << delcount
<< ")" << endl
;
989 } catch (const Xapian::Error
&error
) {
990 cerr
<< "Exception: " << error
.get_description() << endl
;
992 } catch (const std::bad_alloc
&) {
993 cerr
<< "Exception: std::bad_alloc" << endl
;
996 cerr
<< "Unknown Exception" << endl
;