1 /** @file xapian-letor-update.cc
2 * @brief Update statistics in user meta-data used by letor module.
4 /* Copyright (C) 2011 Parth Gupta
5 * Copyright (C) 2012,2015 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
30 #include "gnu_getopt.h"
35 #define PROG_NAME "xapian-letor-update"
36 #define PROG_DESC "Update statistics in user meta-data used by letor module"
41 cout
<< "Usage: " PROG_NAME
" [OPTIONS] 'QUERY'\n"
42 " -d, --db=DIRECTORY database to update stats for\n"
43 " -h, --help display this help and exit\n"
44 " -v, --version output version information and exit\n";
48 main(int argc
, char **argv
)
50 const char * opts
= "d:hv";
51 static const struct option long_opts
[] = {
52 { "db", required_argument
, 0, 'd' },
53 { "help", no_argument
, 0, 'h' },
54 { "version", no_argument
, 0, 'v' },
58 Xapian::WritableDatabase db
;
62 while ((c
= gnu_getopt_long(argc
, argv
, opts
, long_opts
, 0)) != -1) {
65 db
= Xapian::WritableDatabase(optarg
, Xapian::DB_OPEN
);
69 cout
<< PROG_NAME
" - " PACKAGE_STRING
<< endl
;
72 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
75 case ':': // missing parameter
76 case '?': // unknown option
82 if (!have_db
|| argc
- optind
!= 1) {
87 // Calculate some extra collection statistics used to calculate features
88 // used by Letor, and store them as user metadata.
90 Xapian::termcount total_title_len
= 0;
91 Xapian::TermIterator t
;
92 for (t
= db
.allterms_begin("S"); t
!= db
.allterms_end("S"); ++t
) {
93 total_title_len
+= db
.get_collection_freq(*t
);
96 Xapian::totallength total_len
= db
.get_total_length();
98 db
.set_metadata("collection_len_title", str(total_title_len
));
99 db
.set_metadata("collection_len_body", str(total_len
- total_title_len
));
100 db
.set_metadata("collection_len_whole", str(total_len
));
102 } catch (const Xapian::Error
& e
) {
103 cout
<< e
.get_description() << endl
;