1 /** @file weightinternal.cc
2 * @brief Xapian::Weight::Internal class, holding database and term statistics.
4 /* Copyright (C) 2007 Lemur Consulting Ltd
5 * Copyright (C) 2009,2010,2011,2012,2013,2014,2015,2017 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "weightinternal.h"
26 #include "xapian/enquire.h"
29 #include "api/rsetinternal.h"
31 #include "api/termlist.h"
39 TermFreqs::get_description() const {
40 string
desc("TermFreqs(termfreq=");
41 desc
+= str(termfreq
);
42 desc
+= ", reltermfreq=";
43 desc
+= str(reltermfreq
);
44 desc
+= ", collfreq=";
45 desc
+= str(collfreq
);
46 desc
+= ", max_part=";
47 desc
+= str(max_part
);
55 Weight::Internal::operator+=(const Weight::Internal
& inc
)
57 #ifdef XAPIAN_ASSERTIONS
61 total_length
+= inc
.total_length
;
62 collection_size
+= inc
.collection_size
;
63 rset_size
+= inc
.rset_size
;
64 total_term_count
+= inc
.total_term_count
;
66 // Add termfreqs and reltermfreqs
67 map
<string
, TermFreqs
>::const_iterator i
;
68 for (i
= inc
.termfreqs
.begin(); i
!= inc
.termfreqs
.end(); ++i
) {
69 termfreqs
[i
->first
] += i
->second
;
75 Weight::Internal::accumulate_stats(const Xapian::Database::Internal
&subdb
,
76 const Xapian::RSet
&rset
)
78 #ifdef XAPIAN_ASSERTIONS
82 total_length
+= subdb
.get_total_length();
83 collection_size
+= subdb
.get_doccount();
84 rset_size
+= rset
.size();
86 total_term_count
+= subdb
.get_doccount() * subdb
.get_total_length();
87 Xapian::TermIterator t
;
88 for (t
= query
.get_unique_terms_begin(); t
!= Xapian::TermIterator(); ++t
) {
89 const string
& term
= *t
;
91 Xapian::doccount sub_tf
;
92 Xapian::termcount sub_cf
;
93 subdb
.get_freqs(term
, &sub_tf
, &sub_cf
);
94 TermFreqs
& tf
= termfreqs
[term
];
95 tf
.termfreq
+= sub_tf
;
96 tf
.collfreq
+= sub_cf
;
99 if (!rset
.internal
.get())
102 for (Xapian::docid did
: rset
.internal
->docs
) {
104 // The query is likely to contain far fewer terms than the documents,
105 // and we can skip the document's termlist, so look for each query term
107 unique_ptr
<TermList
> tl(subdb
.open_term_list(did
));
108 map
<string
, TermFreqs
>::iterator i
;
109 for (i
= termfreqs
.begin(); i
!= termfreqs
.end(); ++i
) {
110 const string
& term
= i
->first
;
111 TermList
* ret
= tl
->skip_to(term
);
116 if (term
== tl
->get_termname())
117 ++i
->second
.reltermfreq
;
123 Weight::Internal::get_description() const
125 string desc
= "Weight::Internal(totlen=";
126 desc
+= str(total_length
);
127 desc
+= ", collection_size=";
128 desc
+= str(collection_size
);
129 desc
+= ", rset_size=";
130 desc
+= str(rset_size
);
131 desc
+= ", total_term_count=";
132 desc
+= str(total_term_count
);
133 #ifdef XAPIAN_ASSERTIONS
136 desc
+= ", finalised=";
137 desc
+= str(finalised
);
139 desc
+= ", termfreqs={";
140 map
<string
, TermFreqs
>::const_iterator i
;
141 for (i
= termfreqs
.begin(); i
!= termfreqs
.end(); ++i
) {
142 if (i
!= termfreqs
.begin())
146 desc
+= i
->second
.get_description();