Rename Collapser::entries() to get_entries()
[xapian.git] / xapian-letor / bin / xapian-letor-update.cc
blob6690f4a6c0a5a77848e39a1e968f5c5cb5301de6
1 /** @file xapian-letor-update.cc
2 * @brief Update statistics in user meta-data used by letor module.
3 */
4 /* Copyright (C) 2011 Parth Gupta
5 * Copyright (C) 2012,2015 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include <xapian.h>
27 #include <cstdlib>
28 #include <iostream>
30 #include "gnu_getopt.h"
31 #include "str.h"
33 using namespace std;
35 #define PROG_NAME "xapian-letor-update"
36 #define PROG_DESC "Update statistics in user meta-data used by letor module"
38 static void
39 show_usage()
41 cout << "Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n"
42 " -d, --db=DIRECTORY database to update stats for\n"
43 " -h, --help display this help and exit\n"
44 " -v, --version output version information and exit\n";
47 int
48 main(int argc, char **argv)
49 try {
50 const char * opts = "d:hv";
51 static const struct option long_opts[] = {
52 { "db", required_argument, 0, 'd' },
53 { "help", no_argument, 0, 'h' },
54 { "version", no_argument, 0, 'v' },
55 { NULL, 0, 0, 0}
58 Xapian::WritableDatabase db;
59 bool have_db = false;
61 int c;
62 while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
63 switch (c) {
64 case 'd':
65 db = Xapian::WritableDatabase(optarg, Xapian::DB_OPEN);
66 have_db = true;
67 break;
68 case 'v':
69 cout << PROG_NAME " - " PACKAGE_STRING << endl;
70 exit(0);
71 case 'h':
72 cout << PROG_NAME " - " PROG_DESC "\n\n";
73 show_usage();
74 exit(0);
75 case ':': // missing parameter
76 case '?': // unknown option
77 show_usage();
78 exit(1);
82 if (!have_db || argc - optind != 1) {
83 show_usage();
84 exit(1);
87 // Calculate some extra collection statistics used to calculate features
88 // used by Letor, and store them as user metadata.
90 Xapian::termcount total_title_len = 0;
91 Xapian::TermIterator t;
92 for (t = db.allterms_begin("S"); t != db.allterms_end("S"); ++t) {
93 total_title_len += db.get_collection_freq(*t);
96 Xapian::totallength total_len = db.get_total_length();
98 db.set_metadata("collection_len_title", str(total_title_len));
99 db.set_metadata("collection_len_body", str(total_len - total_title_len));
100 db.set_metadata("collection_len_whole", str(total_len));
101 db.commit();
102 } catch (const Xapian::Error & e) {
103 cout << e.get_description() << endl;
104 exit(1);