myhtmlparse.cc: Remove unused header.
[xapian.git] / xapian-core / api / omdocument.cc
blob63bf005eeb80bb40d75941dcfb6d60cf36c12125
1 /* omdocument.cc: class for performing a match
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2003,2004,2006,2007,2008,2009,2011,2013,2014 Olly Betts
6 * Copyright 2009 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include <xapian/document.h>
28 #include "backends/document.h"
29 #include "documentvaluelist.h"
30 #include "maptermlist.h"
31 #include "net/serialise.h"
32 #include "str.h"
33 #include "unicode/description_append.h"
35 #include <xapian/error.h>
36 #include <xapian/types.h>
37 #include <xapian/valueiterator.h>
39 #include <algorithm>
40 #include <string>
42 using namespace std;
44 namespace Xapian {
46 // implementation of Document
48 Document::Document(Document::Internal *internal_) : internal(internal_)
52 Document::Document() : internal(new Xapian::Document::Internal)
56 string
57 Document::get_value(Xapian::valueno slot) const
59 LOGCALL(API, string, "Document::get_value", slot);
60 RETURN(internal->get_value(slot));
63 string
64 Document::get_data() const
66 LOGCALL(API, string, "Document::get_data", NO_ARGS);
67 RETURN(internal->get_data());
70 void
71 Document::set_data(const string &data)
73 LOGCALL_VOID(API, "Document::set_data", data);
74 internal->set_data(data);
77 void
78 Document::operator=(const Document &other)
80 // pointers are reference counted.
81 internal = other.internal;
84 Document::Document(const Document &other)
85 : internal(other.internal)
89 Document::~Document()
93 string
94 Document::get_description() const
96 return "Document(" + internal->get_description() + ")";
99 void
100 Document::add_value(Xapian::valueno slot, const string &value)
102 LOGCALL_VOID(API, "Document::add_value", slot | value);
103 internal->add_value(slot, value);
106 void
107 Document::remove_value(Xapian::valueno slot)
109 LOGCALL_VOID(API, "Document::remove_value", slot);
110 internal->remove_value(slot);
113 void
114 Document::clear_values()
116 LOGCALL_VOID(API, "Document::clear_values", NO_ARGS);
117 internal->clear_values();
120 void
121 Document::add_posting(const string & tname,
122 Xapian::termpos tpos,
123 Xapian::termcount wdfinc)
125 LOGCALL_VOID(API, "Document::add_posting", tname | tpos | wdfinc);
126 if (tname.empty()) {
127 throw InvalidArgumentError("Empty termnames aren't allowed.");
129 internal->add_posting(tname, tpos, wdfinc);
132 void
133 Document::add_term(const string & tname, Xapian::termcount wdfinc)
135 LOGCALL_VOID(API, "Document::add_term", tname | wdfinc);
136 if (tname.empty()) {
137 throw InvalidArgumentError("Empty termnames aren't allowed.");
139 internal->add_term(tname, wdfinc);
142 void
143 Document::remove_posting(const string & tname, Xapian::termpos tpos,
144 Xapian::termcount wdfdec)
146 LOGCALL_VOID(API, "Document::remove_posting", tname | tpos | wdfdec);
147 if (tname.empty()) {
148 throw InvalidArgumentError("Empty termnames aren't allowed.");
150 internal->remove_posting(tname, tpos, wdfdec);
153 void
154 Document::remove_term(const string & tname)
156 LOGCALL_VOID(API, "Document::remove_term", tname);
157 internal->remove_term(tname);
160 void
161 Document::clear_terms()
163 LOGCALL_VOID(API, "Document::clear_terms", NO_ARGS);
164 internal->clear_terms();
167 Xapian::termcount
168 Document::termlist_count() const {
169 LOGCALL(API, Xapian::termcount, "Document::termlist_count", NO_ARGS);
170 RETURN(internal->termlist_count());
173 TermIterator
174 Document::termlist_begin() const
176 LOGCALL(API, TermIterator, "Document::termlist_begin", NO_ARGS);
177 RETURN(TermIterator(internal->open_term_list()));
180 Xapian::termcount
181 Document::values_count() const {
182 LOGCALL(API, Xapian::termcount, "Document::values_count", NO_ARGS);
183 RETURN(internal->values_count());
186 ValueIterator
187 Document::values_begin() const
189 LOGCALL(API, ValueIterator, "Document::values_begin", NO_ARGS);
190 // Calling values_count() has the side effect of making sure that they have
191 // been read into the std::map "values" member of internal.
192 if (internal->values_count() == 0) RETURN(ValueIterator());
193 RETURN(ValueIterator(new DocumentValueList(internal)));
196 docid
197 Document::get_docid() const
199 LOGCALL(API, docid, "Document::get_docid", NO_ARGS);
200 RETURN(internal->get_docid());
203 std::string
204 Document::serialise() const
206 LOGCALL(API, std::string, "Document::serialise", NO_ARGS);
207 RETURN(serialise_document(*this));
210 Document
211 Document::unserialise(const std::string &s)
213 LOGCALL_STATIC(API, Document, "Document::unserialise", s);
214 RETURN(unserialise_document(s));
219 /////////////////////////////////////////////////////////////////////////////
221 void
222 OmDocumentTerm::add_position(Xapian::termpos tpos)
224 LOGCALL_VOID(DB, "OmDocumentTerm::add_position", tpos);
226 // We generally expect term positions to be added in approximately
227 // increasing order, so check the end first
228 if (positions.empty() || tpos > positions.back()) {
229 positions.push_back(tpos);
230 return;
233 // Search for the position the term occurs at. Use binary chop to
234 // search, since this is a sorted list.
235 vector<Xapian::termpos>::iterator i;
236 i = lower_bound(positions.begin(), positions.end(), tpos);
237 if (i == positions.end() || *i != tpos) {
238 positions.insert(i, tpos);
242 void
243 OmDocumentTerm::remove_position(Xapian::termpos tpos)
245 LOGCALL_VOID(DB, "OmDocumentTerm::remove_position", tpos);
247 // Search for the position the term occurs at. Use binary chop to
248 // search, since this is a sorted list.
249 vector<Xapian::termpos>::iterator i;
250 i = lower_bound(positions.begin(), positions.end(), tpos);
251 if (i == positions.end() || *i != tpos) {
252 throw Xapian::InvalidArgumentError("Position " + str(tpos) +
253 " not in list, can't remove");
255 positions.erase(i);
258 string
259 OmDocumentTerm::get_description() const
261 string description;
262 description = "OmDocumentTerm(wdf = ";
263 description += str(wdf);
264 description += ", positions[";
265 description += str(positions.size());
266 description += "])";
267 return description;
270 string
271 Xapian::Document::Internal::get_value(Xapian::valueno slot) const
273 if (values_here) {
274 map<Xapian::valueno, string>::const_iterator i;
275 i = values.find(slot);
276 if (i == values.end()) return string();
277 return i->second;
279 if (!database.get()) return string();
280 return do_get_value(slot);
283 string
284 Xapian::Document::Internal::get_data() const
286 LOGCALL(DB, string, "Xapian::Document::Internal::get_data", NO_ARGS);
287 if (data_here) RETURN(data);
288 if (!database.get()) RETURN(string());
289 RETURN(do_get_data());
292 void
293 Xapian::Document::Internal::set_data(const string &data_)
295 data = data_;
296 data_here = true;
299 TermList *
300 Xapian::Document::Internal::open_term_list() const
302 LOGCALL(DB, TermList *, "Document::Internal::open_term_list", NO_ARGS);
303 if (terms_here) {
304 RETURN(new MapTermList(terms.begin(), terms.end()));
306 if (!database.get()) RETURN(NULL);
307 RETURN(database->open_term_list(did));
310 void
311 Xapian::Document::Internal::add_value(Xapian::valueno slot, const string &value)
313 need_values();
314 if (!value.empty()) {
315 values[slot] = value;
316 } else {
317 // Empty values aren't stored, but replace any existing value by
318 // removing it.
319 values.erase(slot);
323 void
324 Xapian::Document::Internal::remove_value(Xapian::valueno slot)
326 need_values();
327 map<Xapian::valueno, string>::iterator i = values.find(slot);
328 if (i == values.end()) {
329 throw Xapian::InvalidArgumentError("Value #" + str(slot) +
330 " is not present in document, in "
331 "Xapian::Document::Internal::remove_value()");
333 values.erase(i);
336 void
337 Xapian::Document::Internal::clear_values()
339 values.clear();
340 values_here = true;
343 void
344 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
345 Xapian::termcount wdfinc)
347 need_terms();
348 positions_modified = true;
350 map<string, OmDocumentTerm>::iterator i;
351 i = terms.find(tname);
352 if (i == terms.end()) {
353 OmDocumentTerm newterm(wdfinc);
354 newterm.add_position(tpos);
355 terms.insert(make_pair(tname, newterm));
356 } else {
357 i->second.add_position(tpos);
358 if (wdfinc) i->second.inc_wdf(wdfinc);
362 void
363 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
365 need_terms();
367 map<string, OmDocumentTerm>::iterator i;
368 i = terms.find(tname);
369 if (i == terms.end()) {
370 OmDocumentTerm newterm(wdfinc);
371 terms.insert(make_pair(tname, newterm));
372 } else {
373 if (wdfinc) i->second.inc_wdf(wdfinc);
377 void
378 Xapian::Document::Internal::remove_posting(const string & tname,
379 Xapian::termpos tpos,
380 Xapian::termcount wdfdec)
382 need_terms();
384 map<string, OmDocumentTerm>::iterator i;
385 i = terms.find(tname);
386 if (i == terms.end()) {
387 throw Xapian::InvalidArgumentError("Term '" + tname +
388 "' is not present in document, in "
389 "Xapian::Document::Internal::remove_posting()");
391 i->second.remove_position(tpos);
392 if (wdfdec) i->second.dec_wdf(wdfdec);
393 positions_modified = true;
396 void
397 Xapian::Document::Internal::remove_term(const string & tname)
399 need_terms();
400 map<string, OmDocumentTerm>::iterator i;
401 i = terms.find(tname);
402 if (i == terms.end()) {
403 throw Xapian::InvalidArgumentError("Term '" + tname +
404 "' is not present in document, in "
405 "Xapian::Document::Internal::remove_term()");
407 positions_modified = !i->second.positions.empty();
408 terms.erase(i);
411 void
412 Xapian::Document::Internal::clear_terms()
414 terms.clear();
415 terms_here = true;
416 // Assume there was a term with positions for now.
417 // FIXME: may be worth checking...
418 positions_modified = true;
421 Xapian::termcount
422 Xapian::Document::Internal::termlist_count() const
424 if (!terms_here) {
425 // How equivalent is this line to the rest?
426 // return database.get() ? database->open_term_list(did)->get_approx_size() : 0;
427 need_terms();
429 Assert(terms_here);
430 return terms.size();
433 void
434 Xapian::Document::Internal::need_terms() const
436 if (terms_here) return;
437 if (database.get()) {
438 Xapian::TermIterator t(database->open_term_list(did));
439 Xapian::TermIterator tend(NULL);
440 for ( ; t != tend; ++t) {
441 Xapian::PositionIterator p = t.positionlist_begin();
442 OmDocumentTerm term(t.get_wdf());
443 for ( ; p != t.positionlist_end(); ++p) {
444 term.add_position(*p);
446 terms.insert(make_pair(*t, term));
449 terms_here = true;
452 Xapian::valueno
453 Xapian::Document::Internal::values_count() const
455 LOGCALL(DB, Xapian::valueno, "Document::Internal::values_count", NO_ARGS);
456 need_values();
457 Assert(values_here);
458 RETURN(values.size());
461 string
462 Xapian::Document::Internal::get_description() const
464 string description = "Xapian::Document::Internal(";
466 if (data_here) description += "data='" + data + "'";
468 if (values_here) {
469 if (data_here) description += ", ";
470 description += "values[" + str(values.size()) + "]";
473 if (terms_here) {
474 if (data_here || values_here) description += ", ";
475 description += "terms[" + str(terms.size()) + "]";
478 if (database.get()) {
479 if (data_here || values_here || terms_here) description += ", ";
480 description += "doc=";
481 description += "?"; // do_get_description(); ?
484 description += ')';
486 return description;
489 void
490 Xapian::Document::Internal::need_values() const
492 if (!values_here) {
493 if (database.get()) {
494 Assert(values.empty());
495 do_get_all_values(values);
497 values_here = true;
501 Xapian::Document::Internal::~Internal()
503 if (database.get())
504 database->invalidate_doc_object(this);