Add README
[xapian-trec.git] / xapian-trec.cc
blob82789ca24dfafbe6783579ff002ae221655f035b
1 /*
2 ** xapian-trec.cc
3 **
4 ** AM 8/11/2006
5 ** An example TREC search mechanism using Xapian
6 **
7 */
9 #include <xapian.h>
10 #include <iostream>
11 #include <fstream>
12 #include <string>
13 #include <vector>
14 #include "split.h"
16 using namespace Xapian;
17 using namespace std;
19 int main (int argc, char *argv[]) {
21 // There must be four command line arguments passed in the order:
22 // database_name, query file name, results file name, db type, run type,
23 if (argc < 5) {
24 cerr << "ERROR: Insufficient arguments passed to program\n";
25 cerr << "USAGE: xapian-trec <database> <query file> <results file> <run id> \n";
26 exit(0);
27 } // END if
29 // Catch any Error exceptions thrown
30 try {
32 /* set up xapian search and term handling */
33 Database db(argv[1]);
34 Enquire enquire(db);
35 Stem stemmer("english");
37 /* open files for reading and writing, and items to use/record during search */
38 std::ifstream query_file( argv[2]);
39 if(!query_file) cerr << "main) Can't open the query file" << argv[2] << '\n';
40 std::ofstream results_file( argv[3]);
41 if(!results_file) cerr << "main) Can't open the results file" << argv[3] << '\n';
42 string run_id( argv[4] );
44 // iterate through query file until none left
45 while(!query_file.eof()) {
46 string topic_no; // the topic number of a given query
48 /* get the line */
49 char ch;
50 string line;
51 while( query_file.good() )
52 line += (char) query_file.get();
54 /* get the topic no, and terms from the line */
55 vector<string> terms;
56 split(line, ' ', terms );
58 /*get the topic number */
59 topic_no = terms[0];
60 vector<string>::const_iterator termiter = terms.begin();
61 termiter++;
63 /*get the terms for querying */
64 vector<string> stemmed_terms;
65 for( ; termiter != terms.end(); termiter++)
66 stemmed_terms.push_back(stemmer(*termiter));
68 /* create the query and get the results for it */
69 Query query(Query::OP_OR, stemmed_terms.begin(), stemmed_terms.end());
70 enquire.set_query(query);
71 MSet matches = enquire.get_mset(0, 1000 );
73 // get the results and save them to the file
74 int next=0;
75 for ( MSetIterator i = matches.begin(); i != matches.end(); ++i ) {
76 results_file << topic_no << "Q0" << i.get_document().get_data() << next << i.get_weight() << run_id << '\n';
77 next++;
78 } // END for
80 } /* END while */
82 } catch (const Error &error) {
83 cout << "Exception: " << error.get_msg() << endl;
84 exit(1);
85 } // END try/catch
87 } // END main