1 /* omdocument.cc: class for performing a match
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2003,2004,2006,2007,2008,2009,2011,2013,2014 Olly Betts
6 * Copyright 2009 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 #include <xapian/document.h>
28 #include "backends/document.h"
29 #include "documentvaluelist.h"
30 #include "maptermlist.h"
31 #include "net/serialise.h"
33 #include "unicode/description_append.h"
35 #include <xapian/error.h>
36 #include <xapian/types.h>
37 #include <xapian/valueiterator.h>
46 // implementation of Document
48 Document::Document(Document::Internal
*internal_
) : internal(internal_
)
52 Document::Document() : internal(new Xapian::Document::Internal
)
57 Document::get_value(Xapian::valueno slot
) const
59 LOGCALL(API
, string
, "Document::get_value", slot
);
60 RETURN(internal
->get_value(slot
));
64 Document::get_data() const
66 LOGCALL(API
, string
, "Document::get_data", NO_ARGS
);
67 RETURN(internal
->get_data());
71 Document::set_data(const string
&data
)
73 LOGCALL_VOID(API
, "Document::set_data", data
);
74 internal
->set_data(data
);
78 Document::operator=(const Document
&other
)
80 // pointers are reference counted.
81 internal
= other
.internal
;
84 Document::Document(const Document
&other
)
85 : internal(other
.internal
)
94 Document::get_description() const
96 return "Document(" + internal
->get_description() + ")";
100 Document::add_value(Xapian::valueno slot
, const string
&value
)
102 LOGCALL_VOID(API
, "Document::add_value", slot
| value
);
103 internal
->add_value(slot
, value
);
107 Document::remove_value(Xapian::valueno slot
)
109 LOGCALL_VOID(API
, "Document::remove_value", slot
);
110 internal
->remove_value(slot
);
114 Document::clear_values()
116 LOGCALL_VOID(API
, "Document::clear_values", NO_ARGS
);
117 internal
->clear_values();
121 Document::add_posting(const string
& tname
,
122 Xapian::termpos tpos
,
123 Xapian::termcount wdfinc
)
125 LOGCALL_VOID(API
, "Document::add_posting", tname
| tpos
| wdfinc
);
127 throw InvalidArgumentError("Empty termnames aren't allowed.");
129 internal
->add_posting(tname
, tpos
, wdfinc
);
133 Document::add_term(const string
& tname
, Xapian::termcount wdfinc
)
135 LOGCALL_VOID(API
, "Document::add_term", tname
| wdfinc
);
137 throw InvalidArgumentError("Empty termnames aren't allowed.");
139 internal
->add_term(tname
, wdfinc
);
143 Document::remove_posting(const string
& tname
, Xapian::termpos tpos
,
144 Xapian::termcount wdfdec
)
146 LOGCALL_VOID(API
, "Document::remove_posting", tname
| tpos
| wdfdec
);
148 throw InvalidArgumentError("Empty termnames aren't allowed.");
150 internal
->remove_posting(tname
, tpos
, wdfdec
);
154 Document::remove_term(const string
& tname
)
156 LOGCALL_VOID(API
, "Document::remove_term", tname
);
157 internal
->remove_term(tname
);
161 Document::clear_terms()
163 LOGCALL_VOID(API
, "Document::clear_terms", NO_ARGS
);
164 internal
->clear_terms();
168 Document::termlist_count() const {
169 LOGCALL(API
, Xapian::termcount
, "Document::termlist_count", NO_ARGS
);
170 RETURN(internal
->termlist_count());
174 Document::termlist_begin() const
176 LOGCALL(API
, TermIterator
, "Document::termlist_begin", NO_ARGS
);
177 RETURN(TermIterator(internal
->open_term_list()));
181 Document::values_count() const {
182 LOGCALL(API
, Xapian::termcount
, "Document::values_count", NO_ARGS
);
183 RETURN(internal
->values_count());
187 Document::values_begin() const
189 LOGCALL(API
, ValueIterator
, "Document::values_begin", NO_ARGS
);
190 // Calling values_count() has the side effect of making sure that they have
191 // been read into the std::map "values" member of internal.
192 if (internal
->values_count() == 0) RETURN(ValueIterator());
193 RETURN(ValueIterator(new DocumentValueList(internal
)));
197 Document::get_docid() const
199 LOGCALL(API
, docid
, "Document::get_docid", NO_ARGS
);
200 RETURN(internal
->get_docid());
204 Document::serialise() const
206 LOGCALL(API
, std::string
, "Document::serialise", NO_ARGS
);
207 RETURN(serialise_document(*this));
211 Document::unserialise(const std::string
&s
)
213 LOGCALL_STATIC(API
, Document
, "Document::unserialise", s
);
214 RETURN(unserialise_document(s
));
219 /////////////////////////////////////////////////////////////////////////////
222 OmDocumentTerm::add_position(Xapian::termpos tpos
)
224 LOGCALL_VOID(DB
, "OmDocumentTerm::add_position", tpos
);
226 // We generally expect term positions to be added in approximately
227 // increasing order, so check the end first
228 if (positions
.empty() || tpos
> positions
.back()) {
229 positions
.push_back(tpos
);
233 // Search for the position the term occurs at. Use binary chop to
234 // search, since this is a sorted list.
235 vector
<Xapian::termpos
>::iterator i
;
236 i
= lower_bound(positions
.begin(), positions
.end(), tpos
);
237 if (i
== positions
.end() || *i
!= tpos
) {
238 positions
.insert(i
, tpos
);
243 OmDocumentTerm::remove_position(Xapian::termpos tpos
)
245 LOGCALL_VOID(DB
, "OmDocumentTerm::remove_position", tpos
);
247 // Search for the position the term occurs at. Use binary chop to
248 // search, since this is a sorted list.
249 vector
<Xapian::termpos
>::iterator i
;
250 i
= lower_bound(positions
.begin(), positions
.end(), tpos
);
251 if (i
== positions
.end() || *i
!= tpos
) {
252 throw Xapian::InvalidArgumentError("Position " + str(tpos
) +
253 " not in list, can't remove");
259 OmDocumentTerm::get_description() const
262 description
= "OmDocumentTerm(wdf = ";
263 description
+= str(wdf
);
264 description
+= ", positions[";
265 description
+= str(positions
.size());
271 Xapian::Document::Internal::get_value(Xapian::valueno slot
) const
274 map
<Xapian::valueno
, string
>::const_iterator i
;
275 i
= values
.find(slot
);
276 if (i
== values
.end()) return string();
279 if (!database
.get()) return string();
280 return do_get_value(slot
);
284 Xapian::Document::Internal::get_data() const
286 LOGCALL(DB
, string
, "Xapian::Document::Internal::get_data", NO_ARGS
);
287 if (data_here
) RETURN(data
);
288 if (!database
.get()) RETURN(string());
289 RETURN(do_get_data());
293 Xapian::Document::Internal::set_data(const string
&data_
)
300 Xapian::Document::Internal::open_term_list() const
302 LOGCALL(DB
, TermList
*, "Document::Internal::open_term_list", NO_ARGS
);
304 RETURN(new MapTermList(terms
.begin(), terms
.end()));
306 if (!database
.get()) RETURN(NULL
);
307 RETURN(database
->open_term_list(did
));
311 Xapian::Document::Internal::add_value(Xapian::valueno slot
, const string
&value
)
314 if (!value
.empty()) {
315 values
[slot
] = value
;
317 // Empty values aren't stored, but replace any existing value by
324 Xapian::Document::Internal::remove_value(Xapian::valueno slot
)
327 map
<Xapian::valueno
, string
>::iterator i
= values
.find(slot
);
328 if (i
== values
.end()) {
329 throw Xapian::InvalidArgumentError("Value #" + str(slot
) +
330 " is not present in document, in "
331 "Xapian::Document::Internal::remove_value()");
337 Xapian::Document::Internal::clear_values()
344 Xapian::Document::Internal::add_posting(const string
& tname
, Xapian::termpos tpos
,
345 Xapian::termcount wdfinc
)
348 positions_modified
= true;
350 map
<string
, OmDocumentTerm
>::iterator i
;
351 i
= terms
.find(tname
);
352 if (i
== terms
.end()) {
353 OmDocumentTerm
newterm(wdfinc
);
354 newterm
.add_position(tpos
);
355 terms
.insert(make_pair(tname
, newterm
));
357 i
->second
.add_position(tpos
);
358 if (wdfinc
) i
->second
.inc_wdf(wdfinc
);
363 Xapian::Document::Internal::add_term(const string
& tname
, Xapian::termcount wdfinc
)
367 map
<string
, OmDocumentTerm
>::iterator i
;
368 i
= terms
.find(tname
);
369 if (i
== terms
.end()) {
370 OmDocumentTerm
newterm(wdfinc
);
371 terms
.insert(make_pair(tname
, newterm
));
373 if (wdfinc
) i
->second
.inc_wdf(wdfinc
);
378 Xapian::Document::Internal::remove_posting(const string
& tname
,
379 Xapian::termpos tpos
,
380 Xapian::termcount wdfdec
)
384 map
<string
, OmDocumentTerm
>::iterator i
;
385 i
= terms
.find(tname
);
386 if (i
== terms
.end()) {
387 throw Xapian::InvalidArgumentError("Term '" + tname
+
388 "' is not present in document, in "
389 "Xapian::Document::Internal::remove_posting()");
391 i
->second
.remove_position(tpos
);
392 if (wdfdec
) i
->second
.dec_wdf(wdfdec
);
393 positions_modified
= true;
397 Xapian::Document::Internal::remove_term(const string
& tname
)
400 map
<string
, OmDocumentTerm
>::iterator i
;
401 i
= terms
.find(tname
);
402 if (i
== terms
.end()) {
403 throw Xapian::InvalidArgumentError("Term '" + tname
+
404 "' is not present in document, in "
405 "Xapian::Document::Internal::remove_term()");
407 positions_modified
= !i
->second
.positions
.empty();
412 Xapian::Document::Internal::clear_terms()
416 // Assume there was a term with positions for now.
417 // FIXME: may be worth checking...
418 positions_modified
= true;
422 Xapian::Document::Internal::termlist_count() const
425 // How equivalent is this line to the rest?
426 // return database.get() ? database->open_term_list(did)->get_approx_size() : 0;
434 Xapian::Document::Internal::need_terms() const
436 if (terms_here
) return;
437 if (database
.get()) {
438 Xapian::TermIterator
t(database
->open_term_list(did
));
439 Xapian::TermIterator
tend(NULL
);
440 for ( ; t
!= tend
; ++t
) {
441 Xapian::PositionIterator p
= t
.positionlist_begin();
442 OmDocumentTerm
term(t
.get_wdf());
443 for ( ; p
!= t
.positionlist_end(); ++p
) {
444 term
.add_position(*p
);
446 terms
.insert(make_pair(*t
, term
));
453 Xapian::Document::Internal::values_count() const
455 LOGCALL(DB
, Xapian::valueno
, "Document::Internal::values_count", NO_ARGS
);
458 RETURN(values
.size());
462 Xapian::Document::Internal::get_description() const
464 string description
= "Xapian::Document::Internal(";
466 if (data_here
) description
+= "data='" + data
+ "'";
469 if (data_here
) description
+= ", ";
470 description
+= "values[" + str(values
.size()) + "]";
474 if (data_here
|| values_here
) description
+= ", ";
475 description
+= "terms[" + str(terms
.size()) + "]";
478 if (database
.get()) {
479 if (data_here
|| values_here
|| terms_here
) description
+= ", ";
480 description
+= "doc=";
481 description
+= "?"; // do_get_description(); ?
490 Xapian::Document::Internal::need_values() const
493 if (database
.get()) {
494 Assert(values
.empty());
495 do_get_all_values(values
);
501 Xapian::Document::Internal::~Internal()
504 database
->invalidate_doc_object(this);