1 /* trec_search.cc: Example batch search for TREC experiments
3 * ----START-LICENCE----
4 * Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2003 Olly Betts
6 * Copyright 2003 Andy MacFarlane, City University
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 * -----END-LICENCE-----
25 #include "config_file.h"
36 using namespace Xapian
;
39 int load_query( std::ifstream
& queryfile
, int & topicno
, SW_STORE sw_store
, Xapian::Query
& query
, Xapian::Stem
& stemmer
) {
40 // load a query and record its terms
42 if( queryfile
.eof() ) return 0;
46 getline(queryfile
,line
);
47 line
[line
.size()-1] ='\0';
49 split(line
, ' ', data
);
50 vector
<string
> terms
;
51 for( vector
<string
>::const_iterator start
= data
.begin(); start
!= data
.end(); start
++ ) {
53 if( !found_topicno
) {
54 topicno
= atoi( start
->c_str());
56 } else if(!IsStopWord( sw_store
, (char *) start
->c_str() )) {
57 queryword
= stemmer(*start
);
58 cout
<< "Queryword: = " << queryword
<< endl
;
59 terms
.push_back(queryword
);
64 // make the query with the terms
65 Xapian::Query
consquery(Xapian::Query::OP_OR
, terms
.begin(), terms
.end());
72 int main(int argc
, char **argv
)
74 // Simplest possible options parsing: we just require two or more
77 cout
<< "usage: " << argv
[0] << " <config file>" << endl
;
81 // Catch any Xapian::Error exceptions thrown
83 // load the TREC experiment configuration file
85 config
.setup_config( string(argv
[1]) );
86 config
.check_search_config();
87 Xapian::Stem
stemmer( config
.get_language() );
88 struct timeval start_time
, finish_time
, timelapse
; /* timing variables */
91 Xapian::Database
db(Xapian::Flint::open(config
.get_db().c_str()));
93 // Start an enquire session
94 Xapian::Enquire
enquire(db
);
96 // open the query file
97 std::ifstream
queryfile( config
.get_queryfile().c_str() );
99 // open the results file
100 std::ofstream
resultsfile( config
.get_resultsfile().c_str() );
102 // open the transaction file
103 std::ofstream
transfile( config
.get_transfile().c_str() );
105 // load the stop word list
107 Read_SW_File( (char *) config
.get_stopsfile().c_str(), &sw_store
);
109 // count of no queries done
113 float total_qp_time
= 0.0;
115 // process the queries
116 while( !queryfile
.eof() ) {
118 int topicno
; // topic number for the query
121 // Build the query object
123 int gotquery
= load_query( queryfile
, topicno
, sw_store
, query
, stemmer
);
125 if(gotquery
&& !queryfile
.eof()) {
128 gettimeofday( &start_time
, 0 );
130 cout
<< "Running " << topicno
<< ", query = [" << query
.get_description() << "] getting " << config
.get_noresults() << " docs" << endl
;
132 // Give the query object to the enquire session
133 enquire
.set_query(query
);
135 // Get the top n results of the query
136 Xapian::MSet matches
= enquire
.get_mset( 0, config
.get_noresults() );
138 // record the number of matches made in this query
139 //int queryweightings = enquire.get_totalweightings();
140 //cout << "W's) for this query is -> " << queryweightings << endl;
142 // Display the results cout << matches.size() << " results found" << endl;
144 if( (count
% 1000) == 0 ) cout
<< "QUERIES PROCESSED) " << count
<< endl
;
146 // record the results in a 'trec.log' file
147 for (Xapian::MSetIterator i
= matches
.begin(); i
!= matches
.end(); i
++) {
148 Xapian::Document doc
= i
.get_document();
149 resultsfile
<< topicno
<< " Q0 " << doc
.get_data() << " " << i
.get_rank() << " " <<
150 i
.get_weight() << " " << config
.get_runname() << endl
;
155 gettimeofday( &finish_time
, 0 );
156 diff_time( finish_time
, start_time
, &timelapse
);
157 float qp_time
= time_real( timelapse
);
158 total_qp_time
+= qp_time
;
159 transfile
<< topicno
<< "," << qp_time
<< "," << len
<< endl
;
160 cout
<< topicno
<< "," << qp_time
<< "," << len
<< endl
;
166 // print the total time, and average time per query
168 float avg_qp_time
= total_qp_time
/(float) count
;
169 cout
<< "Average query time for " << count
<< " Queries is " <<
170 avg_qp_time
<< " secs, took a total of " << total_qp_time
<< " secs" << endl
;
172 } catch( const Xapian::Error
&error
) {
173 cout
<< "Exception: " << error
.get_msg() << endl
;
174 } // END try/catch block