Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / api / vectortermlist.h
blobcbb174502fa3bc913efb288112c6154c390f1a80
1 /** @file vectortermlist.h
2 * @brief A vector-like container of terms which can be iterated.
3 */
4 /* Copyright (C) 2011,2012 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_VECTORTERMLIST_H
22 #define XAPIAN_INCLUDED_VECTORTERMLIST_H
24 #include "xapian/types.h"
26 #include "net/length.h"
27 #include "termlist.h"
29 /** This class stores a list of terms.
31 * To be memory efficient, we store the terms in a single string using a
32 * suitable simple encoding. This way the number of bytes needed will
33 * usually be the sum of the lengths of all the terms plus the number of
34 * terms. If we used std::vector<std::string> here like we used to, that
35 * would need something like an additional 30 bytes per term (30 calculated
36 * for GCC 4.x on x86_64).
38 class VectorTermList : public TermList {
39 /// The encoded terms.
40 std::string data;
42 /// Pointer to the next term's data, or NULL if we are at end.
43 const char * p;
45 /// The number of terms in the list.
46 Xapian::termcount num_terms;
48 /// The current term.
49 std::string current_term;
51 public:
52 template<typename I>
53 VectorTermList(I begin, I end) : num_terms(0)
55 // First calculate how much space we'll need so we can reserve it.
56 size_t total_size = 0;
57 I i;
58 for (i = begin; i != end; ++i) {
59 ++num_terms;
60 const std::string & s = *i;
61 total_size += s.size() + 1;
62 if (s.size() >= 255) {
63 // Not a common case, so just assume the worst case rather than
64 // trying to carefully calculate the exact size.
65 total_size += 5;
68 data.reserve(total_size);
70 // Now encode all the terms into data.
71 for (i = begin; i != end; ++i) {
72 const std::string & s = *i;
73 data += encode_length(s.size());
74 data += s;
77 p = data.data();
80 Xapian::termcount get_approx_size() const;
82 std::string get_termname() const;
84 Xapian::termcount get_wdf() const;
86 Xapian::doccount get_termfreq() const;
88 TermList * next();
90 TermList * skip_to(const std::string &);
92 bool at_end() const;
94 Xapian::termcount positionlist_count() const;
96 Xapian::PositionIterator positionlist_begin() const;
99 #endif // XAPIAN_INCLUDED_VECTORTERMLIST_H