From 11854f7ea4e1b4c8fc6f0004e5126d4ce2ee4717 Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Tue, 2 Jan 2018 13:16:46 +1300 Subject: [PATCH] Reduce overheads of PostList positional data support Probably the majority of PostList objects aren't involved in phrases or OP_NEAR during the match, so avoid the overhead of a position list object when we know we won't need it. For glass, we lazily create it (so only have the overhead of a pointer, while for honey we use a special subclass to eliminate the size overhead completely). We also now avoid copying interpolative coded positional data in order to iterate it in most cases. --- xapian-core/api/leafpostlist.cc | 2 +- xapian-core/api/leafpostlist.h | 7 +- xapian-core/backends/databaseinternal.h | 3 +- xapian-core/backends/empty_database.cc | 2 +- xapian-core/backends/empty_database.h | 3 +- xapian-core/backends/glass/glass_database.cc | 43 ++--- xapian-core/backends/glass/glass_database.h | 6 +- xapian-core/backends/glass/glass_dbcheck.cc | 4 +- xapian-core/backends/glass/glass_positionlist.cc | 192 +++++++++++++------- xapian-core/backends/glass/glass_positionlist.h | 89 ++++++---- xapian-core/backends/glass/glass_postlist.cc | 18 +- xapian-core/backends/glass/glass_postlist.h | 7 +- xapian-core/backends/honey/honey_database.cc | 10 +- xapian-core/backends/honey/honey_database.h | 4 +- xapian-core/backends/honey/honey_positionlist.cc | 194 ++++++++++++++------- xapian-core/backends/honey/honey_positionlist.h | 87 +++++---- xapian-core/backends/honey/honey_postlist.cc | 65 +++++-- xapian-core/backends/honey/honey_postlist.h | 37 ++-- xapian-core/backends/honey/honey_postlisttable.cc | 5 +- xapian-core/backends/honey/honey_postlisttable.h | 3 +- xapian-core/backends/inmemory/inmemory_database.cc | 6 +- xapian-core/backends/inmemory/inmemory_database.h | 2 +- xapian-core/backends/multi/multi_database.cc | 2 +- xapian-core/backends/multi/multi_database.h | 3 +- xapian-core/backends/remote/remote-database.cc | 4 +- xapian-core/backends/remote/remote-database.h | 2 +- xapian-core/common/bitstream.cc | 20 +-- xapian-core/common/bitstream.h | 34 ++-- xapian-core/matcher/localsubmatch.cc | 9 +- xapian-core/matcher/queryoptimiser.h | 2 +- 30 files changed, 556 insertions(+), 309 deletions(-) diff --git a/xapian-core/api/leafpostlist.cc b/xapian-core/api/leafpostlist.cc index d3aef2b19..b4da986c6 100644 --- a/xapian-core/api/leafpostlist.cc +++ b/xapian-core/api/leafpostlist.cc @@ -97,7 +97,7 @@ LeafPostList::gather_position_lists(OrPositionList* orposlist) } LeafPostList * -LeafPostList::open_nearby_postlist(const std::string &) const +LeafPostList::open_nearby_postlist(const std::string &, bool) const { return NULL; } diff --git a/xapian-core/api/leafpostlist.h b/xapian-core/api/leafpostlist.h index 5844a7bbf..836c55b6b 100644 --- a/xapian-core/api/leafpostlist.h +++ b/xapian-core/api/leafpostlist.h @@ -118,11 +118,16 @@ class LeafPostList : public PostList { * method can make a wildcard expansion much more memory * efficient. * + * @param need_pos Does the postlist need to support read_position_list()? + * Note that open_position_list() may still be called even + * if need_pos is false. + * * @return The new postlist object, or NULL if not supported * (in which case the caller should probably open the * postlist via the database instead). */ - virtual LeafPostList * open_nearby_postlist(const std::string & term_) const; + virtual LeafPostList * open_nearby_postlist(const std::string & term_, + bool need_pos) const; /** Set the term name. * diff --git a/xapian-core/backends/databaseinternal.h b/xapian-core/backends/databaseinternal.h index e0075fab0..309ae73a0 100644 --- a/xapian-core/backends/databaseinternal.h +++ b/xapian-core/backends/databaseinternal.h @@ -204,7 +204,8 @@ class Database::Internal : public Xapian::Internal::intrusive_base { virtual PostList* open_post_list(const std::string& term) const = 0; - virtual LeafPostList* open_leaf_post_list(const std::string& term) const = 0; + virtual LeafPostList* open_leaf_post_list(const std::string& term, + bool need_pos) const = 0; /** Open a value stream. * diff --git a/xapian-core/backends/empty_database.cc b/xapian-core/backends/empty_database.cc index 5148bacef..c81e13899 100644 --- a/xapian-core/backends/empty_database.cc +++ b/xapian-core/backends/empty_database.cc @@ -50,7 +50,7 @@ EmptyDatabase::open_post_list(const string&) const } LeafPostList* -EmptyDatabase::open_leaf_post_list(const string&) const +EmptyDatabase::open_leaf_post_list(const string&, bool) const { return NULL; } diff --git a/xapian-core/backends/empty_database.h b/xapian-core/backends/empty_database.h index aea706fee..4c641b30f 100644 --- a/xapian-core/backends/empty_database.h +++ b/xapian-core/backends/empty_database.h @@ -34,7 +34,8 @@ class EmptyDatabase : public Xapian::Database::Internal { PostList* open_post_list(const std::string& term) const; - LeafPostList* open_leaf_post_list(const std::string& term) const; + LeafPostList* open_leaf_post_list(const std::string& term, + bool need_pos) const; TermList* open_term_list(Xapian::docid did) const; diff --git a/xapian-core/backends/glass/glass_database.cc b/xapian-core/backends/glass/glass_database.cc index 79920cfd4..a4b0a81ad 100644 --- a/xapian-core/backends/glass/glass_database.cc +++ b/xapian-core/backends/glass/glass_database.cc @@ -822,16 +822,18 @@ PostList * GlassDatabase::open_post_list(const string& term) const { LOGCALL(DB, PostList *, "GlassDatabase::open_post_list", term); - RETURN(GlassDatabase::open_leaf_post_list(term)); + RETURN(GlassDatabase::open_leaf_post_list(term, false)); } LeafPostList* -GlassDatabase::open_leaf_post_list(const string& term) const +GlassDatabase::open_leaf_post_list(const string& term, bool need_pos) const { - LOGCALL(DB, LeafPostList *, "GlassDatabase::open_leaf_post_list", term); + LOGCALL(DB, LeafPostList *, "GlassDatabase::open_leaf_post_list", term | need_pos); + (void)need_pos; intrusive_ptr ptrtothis(this); if (term.empty()) { + Assert(!need_pos); Xapian::doccount doccount = get_doccount(); if (version_file.get_last_docid() == doccount) { RETURN(new ContiguousAllDocsPostList(doccount)); @@ -882,18 +884,10 @@ GlassDatabase::open_document(Xapian::docid did, bool lazy) const } PositionList * -GlassDatabase::open_position_list(Xapian::docid did, const string & term) const +GlassDatabase::open_position_list(Xapian::docid did, const string& term) const { Assert(did != 0); - - unique_ptr poslist(new GlassPositionList); - if (!poslist->read_data(&position_table, did, term)) { - // As of 1.1.0, we don't check if the did and term exist - we just - // return an empty positionlist. If the user really needs to know, - // they can check for themselves. - } - - return poslist.release(); + return new GlassPositionList(&position_table, did, term); } TermList * @@ -1490,16 +1484,19 @@ PostList * GlassWritableDatabase::open_post_list(const string& term) const { LOGCALL(DB, PostList *, "GlassWritableDatabase::open_post_list", term); - RETURN(GlassWritableDatabase::open_leaf_post_list(term)); + RETURN(GlassWritableDatabase::open_leaf_post_list(term, false)); } LeafPostList * -GlassWritableDatabase::open_leaf_post_list(const string& term) const +GlassWritableDatabase::open_leaf_post_list(const string& term, + bool need_pos) const { - LOGCALL(DB, LeafPostList *, "GlassWritableDatabase::open_leaf_post_list", term); + LOGCALL(DB, LeafPostList *, "GlassWritableDatabase::open_leaf_post_list", term | need_pos); + (void)need_pos; intrusive_ptr ptrtothis(this); if (term.empty()) { + Assert(!need_pos); Xapian::doccount doccount = get_doccount(); if (version_file.get_last_docid() == doccount) { RETURN(new ContiguousAllDocsPostList(doccount)); @@ -1542,22 +1539,14 @@ GlassWritableDatabase::open_term_list_direct(Xapian::docid did) const } PositionList * -GlassWritableDatabase::open_position_list(Xapian::docid did, const string & term) const +GlassWritableDatabase::open_position_list(Xapian::docid did, const string& term) const { Assert(did != 0); - - unique_ptr poslist(new GlassPositionList); - string data; if (inverter.get_positionlist(did, term, data)) { - poslist->read_data(data); - } else if (!poslist->read_data(&position_table, did, term)) { - // As of 1.1.0, we don't check if the did and term exist - we just - // return an empty positionlist. If the user really needs to know, - // they can check for themselves. + return new GlassPositionList(data); } - - return poslist.release(); + return GlassDatabase::open_position_list(did, term); } TermList * diff --git a/xapian-core/backends/glass/glass_database.h b/xapian-core/backends/glass/glass_database.h index 2f4336204..06a5b4316 100644 --- a/xapian-core/backends/glass/glass_database.h +++ b/xapian-core/backends/glass/glass_database.h @@ -258,7 +258,8 @@ class GlassDatabase : public Xapian::Database::Internal { bool has_positions() const; PostList * open_post_list(const string & tname) const; - LeafPostList* open_leaf_post_list(const string& term) const; + LeafPostList* open_leaf_post_list(const string& term, + bool need_pos) const; ValueList * open_value_list(Xapian::valueno slot) const; Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const; @@ -420,7 +421,8 @@ class GlassWritableDatabase : public GlassDatabase { bool has_positions() const; PostList * open_post_list(const string & tname) const; - LeafPostList* open_leaf_post_list(const string& term) const; + LeafPostList* open_leaf_post_list(const string& term, + bool need_pos) const; ValueList * open_value_list(Xapian::valueno slot) const; PositionList * open_position_list(Xapian::docid did, const string & term) const; TermList * open_term_list(Xapian::docid did) const; diff --git a/xapian-core/backends/glass/glass_dbcheck.cc b/xapian-core/backends/glass/glass_dbcheck.cc index 3c9f31fc3..a3c891cb2 100644 --- a/xapian-core/backends/glass/glass_dbcheck.cc +++ b/xapian-core/backends/glass/glass_dbcheck.cc @@ -2,7 +2,7 @@ * @brief Check consistency of a glass table. */ /* Copyright 1999,2000,2001 BrightStation PLC - * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts + * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -905,7 +905,7 @@ check_glass_table(const char * tablename, const string &db_dir, int fd, // Special case for single entry position list. } else { // Skip the header we just read. - BitReader rd(data, pos - data.data()); + BitReader rd(pos, end); Xapian::termpos pos_first = rd.decode(pos_last); Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); diff --git a/xapian-core/backends/glass/glass_positionlist.cc b/xapian-core/backends/glass/glass_positionlist.cc index 3a53680d5..201b0b51c 100644 --- a/xapian-core/backends/glass/glass_positionlist.cc +++ b/xapian-core/backends/glass/glass_positionlist.cc @@ -1,6 +1,6 @@ /* glass_positionlist.cc: A position list in a glass database. * - * Copyright (C) 2004,2005,2006,2008,2009,2010,2013 Olly Betts + * Copyright (C) 2004,2005,2006,2008,2009,2010,2013,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -73,7 +73,7 @@ GlassPositionListTable::positionlist_count(Xapian::docid did, } // Skip the header we just read. - BitReader rd(data, pos - data.data()); + BitReader rd(pos, end); Xapian::termpos pos_first = rd.decode(pos_last); Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; RETURN(pos_size); @@ -81,78 +81,25 @@ GlassPositionListTable::positionlist_count(Xapian::docid did, /////////////////////////////////////////////////////////////////////////// -bool -GlassPositionList::read_data(const string & data) -{ - LOGCALL(DB, bool, "GlassPositionList::read_data", data); - - have_started = false; - - if (data.empty()) { - // There's no positional information for this term. - size = 0; - last = 0; - current_pos = 1; - RETURN(false); - } - - const char * pos = data.data(); - const char * end = pos + data.size(); - Xapian::termpos pos_last; - if (!unpack_uint(&pos, end, &pos_last)) { - throw Xapian::DatabaseCorruptError("Position list data corrupt"); - } - if (pos == end) { - // Special case for single entry position list. - size = 1; - current_pos = last = pos_last; - RETURN(true); - } - // Skip the header we just read. - rd.init(data, pos - data.data()); - Xapian::termpos pos_first = rd.decode(pos_last); - Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; - rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); - size = pos_size; - last = pos_last; - current_pos = pos_first; - RETURN(true); -} - -bool -GlassPositionList::read_data(const GlassTable * table, Xapian::docid did, - const string & tname) -{ - LOGCALL(DB, bool, "GlassPositionList::read_data", table | did | tname); - if (!cursor.get()) { - cursor.reset(table->cursor_get()); - } - if (cursor.get() && - cursor->find_exact(GlassPositionListTable::make_key(did, tname))) { - RETURN(read_data(cursor->current_tag)); - } - RETURN(read_data(string())); -} - Xapian::termcount -GlassPositionList::get_approx_size() const +GlassBasePositionList::get_approx_size() const { - LOGCALL(DB, Xapian::termcount, "GlassPositionList::get_approx_size", NO_ARGS); + LOGCALL(DB, Xapian::termcount, "GlassBasePositionList::get_approx_size", NO_ARGS); RETURN(size); } Xapian::termpos -GlassPositionList::get_position() const +GlassBasePositionList::get_position() const { - LOGCALL(DB, Xapian::termpos, "GlassPositionList::get_position", NO_ARGS); + LOGCALL(DB, Xapian::termpos, "GlassBasePositionList::get_position", NO_ARGS); Assert(have_started); RETURN(current_pos); } bool -GlassPositionList::next() +GlassBasePositionList::next() { - LOGCALL(DB, bool, "GlassPositionList::next", NO_ARGS); + LOGCALL(DB, bool, "GlassBasePositionList::next", NO_ARGS); if (rare(!have_started)) { have_started = true; return current_pos <= last; @@ -165,9 +112,9 @@ GlassPositionList::next() } bool -GlassPositionList::skip_to(Xapian::termpos termpos) +GlassBasePositionList::skip_to(Xapian::termpos termpos) { - LOGCALL(DB, bool, "GlassPositionList::skip_to", termpos); + LOGCALL(DB, bool, "GlassBasePositionList::skip_to", termpos); have_started = true; if (termpos >= last) { if (termpos == last) { @@ -184,3 +131,122 @@ GlassPositionList::skip_to(Xapian::termpos termpos) } return true; } + +GlassPositionList::GlassPositionList(const string& data) +{ + LOGCALL_CTOR(DB, "GlassPositionList", data); + + have_started = false; + + if (data.empty()) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + const char* pos = data.data(); + const char* end = pos + data.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + // Copy the rest of the data and lazily decode from that copy. + pos_data.assign(pos, end); + + rd.init(pos_data.data(), pos_data.size()); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} + +GlassPositionList::GlassPositionList(const GlassTable* table, + Xapian::docid did, + const string& term) +{ + LOGCALL_CTOR(DB, "GlassPositionList", table | did | term); + + have_started = false; + + if (!table->get_exact_entry(GlassPositionListTable::make_key(did, term), + pos_data)) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + const char* pos = pos_data.data(); + const char* end = pos + pos_data.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + rd.init(pos, end); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} + +void +GlassRePositionList::read_data(Xapian::docid did, + const string& term) +{ + LOGCALL_VOID(DB, "GlassRePositionList::read_data", table | did | term); + + have_started = false; + + if (!cursor.find_exact(GlassPositionListTable::make_key(did, term))) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + const char* pos = cursor.current_tag.data(); + const char* end = pos + cursor.current_tag.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + rd.init(pos, end); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} diff --git a/xapian-core/backends/glass/glass_positionlist.h b/xapian-core/backends/glass/glass_positionlist.h index f2bd3e646..08d67e977 100644 --- a/xapian-core/backends/glass/glass_positionlist.h +++ b/xapian-core/backends/glass/glass_positionlist.h @@ -1,7 +1,7 @@ /** @file glass_positionlist.h * @brief A position list in a glass database. */ -/* Copyright (C) 2005,2006,2008,2009,2010,2011,2013,2016 Olly Betts +/* Copyright (C) 2005,2006,2008,2009,2010,2011,2013,2016,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -25,11 +25,11 @@ #include #include "bitstream.h" +#include "glass_cursor.h" #include "glass_lazytable.h" #include "pack.h" #include "backends/positionlist.h" -#include #include using namespace std; @@ -80,8 +80,15 @@ class GlassPositionListTable : public GlassLazyTable { const string & term) const; }; -/** A position list in a glass database. */ -class GlassPositionList : public PositionList { +/** Base-class for a position list in a glass database. */ +class GlassBasePositionList : public PositionList { + /// Copying is not allowed. + GlassBasePositionList(const GlassBasePositionList&) = delete; + + /// Assignment is not allowed. + GlassBasePositionList& operator=(const GlassBasePositionList&) = delete; + + protected: /// Interpolative decoder. BitReader rd; @@ -94,40 +101,12 @@ class GlassPositionList : public PositionList { /// Number of entries. Xapian::termcount size; - /// Cursor for locating multiple entries efficiently. - unique_ptr cursor; - /// Have we started iterating yet? bool have_started; - /// Copying is not allowed. - GlassPositionList(const GlassPositionList &); - - /// Assignment is not allowed. - void operator=(const GlassPositionList &); - public: /// Default constructor. - GlassPositionList() { } - - /// Construct and initialise with data. - GlassPositionList(const GlassTable * table, Xapian::docid did, - const string & tname) { - (void)read_data(table, did, tname); - } - - /** Fill list with data, and move the position to the start. - * - * @return true if position data was read. - */ - bool read_data(const string & data); - - /** Fill list with data, and move the position to the start. - * - * @return true if position data was read. - */ - bool read_data(const GlassTable * table, Xapian::docid did, - const string & tname); + GlassBasePositionList() {} /// Returns size of position list. Xapian::termcount get_approx_size() const; @@ -146,4 +125,48 @@ class GlassPositionList : public PositionList { bool skip_to(Xapian::termpos termpos); }; +/** A position list in a glass database. */ +class GlassPositionList : public GlassBasePositionList { + /// The encoded positional data being read by rd. + std::string pos_data; + + /// Copying is not allowed. + GlassPositionList(const GlassPositionList&) = delete; + + /// Assignment is not allowed. + GlassPositionList& operator=(const GlassPositionList&) = delete; + + public: + /// Construct and initialise with data. + explicit + GlassPositionList(const string& data); + + /// Construct and initialise with data. + GlassPositionList(const GlassTable* table, + Xapian::docid did, + const string& term); +}; + +/** A reusable position list in a glass database. */ +class GlassRePositionList : public GlassBasePositionList { + /// Cursor for locating multiple entries efficiently. + GlassCursor cursor; + + /// Copying is not allowed. + GlassRePositionList(const GlassRePositionList&) = delete; + + /// Assignment is not allowed. + GlassRePositionList& operator=(const GlassRePositionList&) = delete; + + public: + /// Constructor. + explicit + GlassRePositionList(const GlassTable* table) + : cursor(table) {} + + /** Fill list with data, and move the position to the start. */ + void read_data(Xapian::docid did, + const string& term); +}; + #endif /* XAPIAN_HGUARD_GLASS_POSITIONLIST_H */ diff --git a/xapian-core/backends/glass/glass_postlist.cc b/xapian-core/backends/glass/glass_postlist.cc index eb64d2cbc..e6083e9f4 100644 --- a/xapian-core/backends/glass/glass_postlist.cc +++ b/xapian-core/backends/glass/glass_postlist.cc @@ -1,7 +1,7 @@ /* glass_postlist.cc: Postlists in a glass database * * Copyright 1999,2000,2001 BrightStation PLC - * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015 Olly Betts + * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015,2017 Olly Betts * Copyright 2007,2008,2009 Lemur Consulting Ltd * * This program is free software; you can redistribute it and/or @@ -722,12 +722,15 @@ GlassPostList::init() GlassPostList::~GlassPostList() { LOGCALL_DTOR(DB, "GlassPostList"); + delete positionlist; } LeafPostList * -GlassPostList::open_nearby_postlist(const std::string & term_) const +GlassPostList::open_nearby_postlist(const std::string & term_, + bool need_pos) const { - LOGCALL(DB, LeafPostList *, "GlassPostList::open_nearby_postlist", term_); + LOGCALL(DB, LeafPostList *, "GlassPostList::open_nearby_postlist", term_ | need_pos); + (void)need_pos; if (term_.empty()) RETURN(NULL); if (!this_db.get() || this_db->postlist_table.is_modified()) @@ -803,8 +806,13 @@ GlassPostList::read_position_list() { LOGCALL(DB, PositionList *, "GlassPostList::read_position_list", NO_ARGS); Assert(this_db.get()); - positionlist.read_data(&this_db->position_table, did, term); - RETURN(&positionlist); + if (rare(positionlist == NULL)) { + // Lazily create positionlist to avoid the size cost for the common + // case where we don't want positional data. + positionlist = new GlassRePositionList(&this_db->position_table); + } + positionlist->read_data(did, term); + RETURN(positionlist); } PositionList * diff --git a/xapian-core/backends/glass/glass_postlist.h b/xapian-core/backends/glass/glass_postlist.h index 9e1c18419..23abba59c 100644 --- a/xapian-core/backends/glass/glass_postlist.h +++ b/xapian-core/backends/glass/glass_postlist.h @@ -3,7 +3,7 @@ */ /* Copyright 1999,2000,2001 BrightStation PLC * Copyright 2002 Ananova Ltd - * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015 Olly Betts + * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015,2017 Olly Betts * Copyright 2007,2009 Lemur Consulting Ltd * * This program is free software; you can redistribute it and/or @@ -144,7 +144,7 @@ class GlassPostList : public LeafPostList { Xapian::Internal::intrusive_ptr this_db; /// The position list object for this posting list. - GlassPositionList positionlist; + GlassRePositionList* positionlist = NULL; /// Whether we've started reading the list yet. bool have_started; @@ -245,7 +245,8 @@ class GlassPostList : public LeafPostList { /// Destructor. ~GlassPostList(); - LeafPostList * open_nearby_postlist(const std::string & term_) const; + LeafPostList * open_nearby_postlist(const std::string & term_, + bool need_pos) const; /** Used for looking up doclens. * diff --git a/xapian-core/backends/honey/honey_database.cc b/xapian-core/backends/honey/honey_database.cc index 406e29515..2dae249e0 100644 --- a/xapian-core/backends/honey/honey_database.cc +++ b/xapian-core/backends/honey/honey_database.cc @@ -210,16 +210,18 @@ HoneyDatabase::has_positions() const PostList* HoneyDatabase::open_post_list(const string& term) const { - return HoneyDatabase::open_leaf_post_list(term); + return HoneyDatabase::open_leaf_post_list(term, false); } LeafPostList* -HoneyDatabase::open_leaf_post_list(const string& term) const +HoneyDatabase::open_leaf_post_list(const string& term, bool need_pos) const { - if (term.empty()) + if (term.empty()) { + Assert(!need_pos); return new HoneyAllDocsPostList(this, get_doccount()); + } - return postlist_table.open_post_list(this, term); + return postlist_table.open_post_list(this, term, need_pos); } ValueList* diff --git a/xapian-core/backends/honey/honey_database.h b/xapian-core/backends/honey/honey_database.h index 0a3edd1fc..f1c505bdf 100644 --- a/xapian-core/backends/honey/honey_database.h +++ b/xapian-core/backends/honey/honey_database.h @@ -45,6 +45,7 @@ class HoneyTermList; /// Database using honey backend. class HoneyDatabase : public Xapian::Database::Internal { friend class HoneyAllTermsList; + friend class HoneyPosPostList; friend class HoneyPostList; friend class HoneySpellingWordsList; friend class HoneySynonymTermList; @@ -164,7 +165,8 @@ class HoneyDatabase : public Xapian::Database::Internal { PostList* open_post_list(const std::string& term) const; - LeafPostList* open_leaf_post_list(const std::string& term) const; + LeafPostList* open_leaf_post_list(const std::string& term, + bool need_pos) const; /** Open a value stream. * diff --git a/xapian-core/backends/honey/honey_positionlist.cc b/xapian-core/backends/honey/honey_positionlist.cc index f5258d965..c204d2a82 100644 --- a/xapian-core/backends/honey/honey_positionlist.cc +++ b/xapian-core/backends/honey/honey_positionlist.cc @@ -1,6 +1,6 @@ /* honey_positionlist.cc: A position list in a honey database. * - * Copyright (C) 2004,2005,2006,2008,2009,2010,2013 Olly Betts + * Copyright (C) 2004,2005,2006,2008,2009,2010,2013,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -74,7 +74,7 @@ HoneyPositionTable::positionlist_count(Xapian::docid did, } // Skip the header we just read. - BitReader rd(data, pos - data.data()); + BitReader rd(pos, end); Xapian::termpos pos_first = rd.decode(pos_last); Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; RETURN(pos_size); @@ -82,79 +82,25 @@ HoneyPositionTable::positionlist_count(Xapian::docid did, /////////////////////////////////////////////////////////////////////////// -bool -HoneyPositionList::read_data(const string & data) -{ - LOGCALL(DB, bool, "HoneyPositionList::read_data", data); - - have_started = false; - - if (data.empty()) { - // There's no positional information for this term. - size = 0; - last = 0; - current_pos = 1; - RETURN(false); - } - - const char * pos = data.data(); - const char * end = pos + data.size(); - Xapian::termpos pos_last; - if (!unpack_uint(&pos, end, &pos_last)) { - throw Xapian::DatabaseCorruptError("Position list data corrupt"); - } - if (pos == end) { - // Special case for single entry position list. - size = 1; - current_pos = last = pos_last; - RETURN(true); - } - // Skip the header we just read. - rd.init(data, pos - data.data()); - Xapian::termpos pos_first = rd.decode(pos_last); - Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; - rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); - size = pos_size; - last = pos_last; - current_pos = pos_first; - RETURN(true); -} - -bool -HoneyPositionList::read_data(const HoneyTable& table, Xapian::docid did, - const string& term) -{ - LOGCALL(DB, bool, "HoneyPositionList::read_data", table | did | term); - if (!cursor.get()) { - cursor.reset(table.cursor_get()); - } - if (cursor.get() && - cursor->find_exact(HoneyPositionTable::make_key(did, term))) { - cursor->read_tag(); - RETURN(read_data(cursor->current_tag)); - } - RETURN(read_data(string())); -} - Xapian::termcount -HoneyPositionList::get_approx_size() const +HoneyBasePositionList::get_approx_size() const { - LOGCALL(DB, Xapian::termcount, "HoneyPositionList::get_approx_size", NO_ARGS); + LOGCALL(DB, Xapian::termcount, "HoneyBasePositionList::get_approx_size", NO_ARGS); RETURN(size); } Xapian::termpos -HoneyPositionList::get_position() const +HoneyBasePositionList::get_position() const { - LOGCALL(DB, Xapian::termpos, "HoneyPositionList::get_position", NO_ARGS); + LOGCALL(DB, Xapian::termpos, "HoneyBasePositionList::get_position", NO_ARGS); Assert(have_started); RETURN(current_pos); } bool -HoneyPositionList::next() +HoneyBasePositionList::next() { - LOGCALL(DB, bool, "HoneyPositionList::next", NO_ARGS); + LOGCALL(DB, bool, "HoneyBasePositionList::next", NO_ARGS); if (rare(!have_started)) { have_started = true; return current_pos <= last; @@ -167,9 +113,9 @@ HoneyPositionList::next() } bool -HoneyPositionList::skip_to(Xapian::termpos termpos) +HoneyBasePositionList::skip_to(Xapian::termpos termpos) { - LOGCALL(DB, bool, "HoneyPositionList::skip_to", termpos); + LOGCALL(DB, bool, "HoneyBasePositionList::skip_to", termpos); have_started = true; if (termpos >= last) { if (termpos == last) { @@ -186,3 +132,123 @@ HoneyPositionList::skip_to(Xapian::termpos termpos) } return true; } + +HoneyPositionList::HoneyPositionList(const string& data) +{ + LOGCALL_CTOR(DB, "HoneyPositionList", data); + + have_started = false; + + if (data.empty()) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + const char* pos = data.data(); + const char* end = pos + data.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + // Copy the rest of the data and lazily decode from that copy. + pos_data.assign(pos, end); + + rd.init(pos_data.data(), pos_data.size()); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} + +HoneyPositionList::HoneyPositionList(const HoneyTable& table, + Xapian::docid did, + const string& term) +{ + LOGCALL_CTOR(DB, "HoneyPositionList", table | did | term); + + have_started = false; + + if (!table.get_exact_entry(HoneyPositionTable::make_key(did, term), + pos_data)) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + const char* pos = pos_data.data(); + const char* end = pos + pos_data.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + rd.init(pos, end); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} + +void +HoneyRePositionList::read_data(Xapian::docid did, + const string& term) +{ + LOGCALL_VOID(DB, "HoneyRePositionList::read_data", did | term); + + have_started = false; + + if (!cursor.find_exact(HoneyPositionTable::make_key(did, term))) { + // There's no positional information for this term. + size = 0; + last = 0; + current_pos = 1; + return; + } + + cursor.read_tag(); + const char* pos = cursor.current_tag.data(); + const char* end = pos + cursor.current_tag.size(); + Xapian::termpos pos_last; + if (!unpack_uint(&pos, end, &pos_last)) { + throw Xapian::DatabaseCorruptError("Position list data corrupt"); + } + + if (pos == end) { + // Special case for single entry position list. + size = 1; + current_pos = last = pos_last; + return; + } + + rd.init(pos, end); + Xapian::termpos pos_first = rd.decode(pos_last); + Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2; + rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last); + size = pos_size; + last = pos_last; + current_pos = pos_first; +} diff --git a/xapian-core/backends/honey/honey_positionlist.h b/xapian-core/backends/honey/honey_positionlist.h index f040f1357..33d0b0b4d 100644 --- a/xapian-core/backends/honey/honey_positionlist.h +++ b/xapian-core/backends/honey/honey_positionlist.h @@ -1,7 +1,7 @@ /** @file honey_positionlist.h * @brief A position list in a honey database. */ -/* Copyright (C) 2005,2006,2008,2009,2010,2011,2013,2016 Olly Betts +/* Copyright (C) 2005,2006,2008,2009,2010,2011,2013,2016,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -30,7 +30,6 @@ #include "honey_lazytable.h" #include "pack.h" -#include #include using namespace std; @@ -81,8 +80,15 @@ class HoneyPositionTable : public HoneyLazyTable { const string & term) const; }; -/** A position list in a honey database. */ -class HoneyPositionList : public PositionList { +/** Base-class for a position list in a honey database. */ +class HoneyBasePositionList : public PositionList { + /// Copying is not allowed. + HoneyBasePositionList(const HoneyBasePositionList&) = delete; + + /// Assignment is not allowed. + HoneyBasePositionList& operator=(const HoneyBasePositionList&) = delete; + + protected: /// Interpolative decoder. BitReader rd; @@ -95,40 +101,12 @@ class HoneyPositionList : public PositionList { /// Number of entries. Xapian::termcount size; - /// Cursor for locating multiple entries efficiently. - unique_ptr cursor; - /// Have we started iterating yet? bool have_started; - /// Copying is not allowed. - HoneyPositionList(const HoneyPositionList &); - - /// Assignment is not allowed. - void operator=(const HoneyPositionList &); - public: /// Default constructor. - HoneyPositionList() { } - - /// Construct and initialise with data. - HoneyPositionList(const HoneyTable& table, Xapian::docid did, - const string& term) { - (void)read_data(table, did, term); - } - - /** Fill list with data, and move the position to the start. - * - * @return true if position data was read. - */ - bool read_data(const string & data); - - /** Fill list with data, and move the position to the start. - * - * @return true if position data was read. - */ - bool read_data(const HoneyTable& table, Xapian::docid did, - const string & term); + HoneyBasePositionList() {} /// Returns size of position list. Xapian::termcount get_approx_size() const; @@ -147,4 +125,47 @@ class HoneyPositionList : public PositionList { bool skip_to(Xapian::termpos termpos); }; +/** A position list in a honey database. */ +class HoneyPositionList : public HoneyBasePositionList { + /// The encoded positional data being read by rd. + std::string pos_data; + + /// Copying is not allowed. + HoneyPositionList(const HoneyPositionList&) = delete; + + /// Assignment is not allowed. + HoneyPositionList& operator=(const HoneyPositionList&) = delete; + + public: + /// Construct and initialise with data. + explicit + HoneyPositionList(const string& data); + + /// Construct and initialise with data. + HoneyPositionList(const HoneyTable& table, + Xapian::docid did, + const string& term); +}; + +/** A reusable position list in a honey database. */ +class HoneyRePositionList : public HoneyBasePositionList { + /// Cursor for locating multiple entries efficiently. + HoneyCursor cursor; + + /// Copying is not allowed. + HoneyRePositionList(const HoneyRePositionList&) = delete; + + /// Assignment is not allowed. + HoneyRePositionList& operator=(const HoneyRePositionList&) = delete; + + public: + /// Constructor. + explicit + HoneyRePositionList(const HoneyTable& table) + : cursor(&table) {} + + /** Fill list with data, and move the position to the start. */ + void read_data(Xapian::docid did, const string& term); +}; + #endif /* XAPIAN_HGUARD_HONEY_POSITIONLIST_H */ diff --git a/xapian-core/backends/honey/honey_postlist.cc b/xapian-core/backends/honey/honey_postlist.cc index 0a1c864db..96ab3c26f 100644 --- a/xapian-core/backends/honey/honey_postlist.cc +++ b/xapian-core/backends/honey/honey_postlist.cc @@ -1,3 +1,22 @@ +/** @file honey_postlist.cc + * @brief PostList in a honey database. + */ +/* Copyright (C) 2017 Olly Betts + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -55,7 +74,6 @@ HoneyPostList::HoneyPostList(const HoneyDatabase* db_, HoneyPostList::~HoneyPostList() { delete cursor; - delete position_list; } Xapian::doccount @@ -65,7 +83,7 @@ HoneyPostList::get_termfreq() const } LeafPostList* -HoneyPostList::open_nearby_postlist(const string& term_) const +HoneyPostList::open_nearby_postlist(const string& term_, bool need_pos) const { Assert(!term_.empty()); if (!cursor) return NULL; @@ -79,9 +97,13 @@ HoneyPostList::open_nearby_postlist(const string& term_) const // We also need to distinguish this case from "open_nearby_postlist() // not supported" though. // return NULL; + // + // No need to consider need_pos for an empty posting list. return new HoneyPostList(db, term_, NULL); } + if (need_pos) + return new HoneyPosPostList(db, term_, new_cursor.release()); return new HoneyPostList(db, term_, new_cursor.release()); } @@ -104,20 +126,6 @@ HoneyPostList::at_end() const } PositionList* -HoneyPostList::read_position_list() -{ - if (rare(position_list == NULL)) - position_list = new HoneyPositionList(); - if (!position_list->read_data(db->position_table, get_docid(), term)) { - // FIXME: Consider returning NULL here - callers need fixing up, but - // this may be a rare case and the costs of checking for NULL may - // outweigh any gains. Need to profile. - // return NULL; - } - return position_list; -} - -PositionList* HoneyPostList::open_position_list() const { return new HoneyPositionList(db->position_table, get_docid(), term); @@ -248,6 +256,31 @@ HoneyPostList::get_description() const return desc; } +HoneyPosPostList::HoneyPosPostList(const HoneyDatabase* db_, + const std::string& term_, + HoneyCursor* cursor_) + : HoneyPostList(db_, term_, cursor_), + position_list(db_->position_table) {} + +PositionList* +HoneyPosPostList::read_position_list() +{ + position_list.read_data(HoneyPostList::get_docid(), term); + // FIXME: Consider returning NULL if there's no positional data - callers + // need fixing up, but this may be a rare case and the costs of checking + // for NULL may outweigh any gains. Need to profile. + return &position_list; +} + +string +HoneyPosPostList::get_description() const +{ + string desc = "HoneyPosPostList("; + desc += term; + desc += ')'; + return desc; +} + namespace Honey { void diff --git a/xapian-core/backends/honey/honey_postlist.h b/xapian-core/backends/honey/honey_postlist.h index 0cbe91f76..74f172c5e 100644 --- a/xapian-core/backends/honey/honey_postlist.h +++ b/xapian-core/backends/honey/honey_postlist.h @@ -23,13 +23,13 @@ #define XAPIAN_INCLUDED_HONEY_POSTLIST_H #include "api/leafpostlist.h" +#include "honey_positionlist.h" #include "pack.h" #include class HoneyCursor; class HoneyDatabase; -class HoneyPositionList; namespace Honey { @@ -126,13 +126,6 @@ class HoneyPostList : public LeafPostList { /// The highest document id in this posting list. Xapian::docid last_did; - /** PositionList object to reuse for OP_NEAR and OP_PHRASE. - * - * This saves the overhead of creating objects for every document - * considered. - */ - HoneyPositionList* position_list = NULL; - /// HoneyDatabase to get position table object from. const HoneyDatabase* db; @@ -149,7 +142,8 @@ class HoneyPostList : public LeafPostList { Xapian::doccount get_termfreq() const; - LeafPostList* open_nearby_postlist(const std::string& term_) const; + LeafPostList* open_nearby_postlist(const std::string& term_, + bool need_pos) const; Xapian::docid get_docid() const; @@ -157,8 +151,6 @@ class HoneyPostList : public LeafPostList { bool at_end() const; - PositionList* read_position_list(); - PositionList* open_position_list() const; PostList* next(double w_min); @@ -170,4 +162,27 @@ class HoneyPostList : public LeafPostList { std::string get_description() const; }; +/** PostList in a honey database with positions. + * + * Use a special subclass to avoid the size cost for the common case where we + * don't want positional data. + */ +class HoneyPosPostList : public HoneyPostList { + /** PositionList object to reuse for OP_NEAR and OP_PHRASE. + * + * This saves the overhead of creating objects for every document + * considered. + */ + HoneyRePositionList position_list; + + public: + HoneyPosPostList(const HoneyDatabase* db_, + const std::string& term_, + HoneyCursor* cursor_); + + PositionList* read_position_list(); + + std::string get_description() const; +}; + #endif // XAPIAN_INCLUDED_HONEY_POSTLIST_H diff --git a/xapian-core/backends/honey/honey_postlisttable.cc b/xapian-core/backends/honey/honey_postlisttable.cc index ca30b4689..1ef831fd6 100644 --- a/xapian-core/backends/honey/honey_postlisttable.cc +++ b/xapian-core/backends/honey/honey_postlisttable.cc @@ -33,7 +33,8 @@ using namespace std; HoneyPostList* HoneyPostListTable::open_post_list(const HoneyDatabase* db, - const std::string& term) const + const std::string& term, + bool need_pos) const { Assert(!term.empty()); // Try to position cursor first so we avoid creating HoneyPostList objects @@ -46,6 +47,8 @@ HoneyPostListTable::open_post_list(const HoneyDatabase* db, return new HoneyPostList(db, term, NULL); } + if (need_pos) + return new HoneyPosPostList(db, term, cursor.release()); return new HoneyPostList(db, term, cursor.release()); } diff --git a/xapian-core/backends/honey/honey_postlisttable.h b/xapian-core/backends/honey/honey_postlisttable.h index 7c4d29d06..6371122cc 100644 --- a/xapian-core/backends/honey/honey_postlisttable.h +++ b/xapian-core/backends/honey/honey_postlisttable.h @@ -55,7 +55,8 @@ class HoneyPostListTable : public HoneyTable { } HoneyPostList* open_post_list(const HoneyDatabase* db, - const std::string& term) const; + const std::string& term, + bool need_pos) const; void get_freqs(const std::string& term, Xapian::doccount* termfreq_ptr, diff --git a/xapian-core/backends/inmemory/inmemory_database.cc b/xapian-core/backends/inmemory/inmemory_database.cc index 826a969b6..c1c6f02ec 100644 --- a/xapian-core/backends/inmemory/inmemory_database.cc +++ b/xapian-core/backends/inmemory/inmemory_database.cc @@ -412,14 +412,16 @@ InMemoryDatabase::close() PostList* InMemoryDatabase::open_post_list(const string& term) const { - return InMemoryDatabase::open_leaf_post_list(term); + return InMemoryDatabase::open_leaf_post_list(term, false); } LeafPostList* -InMemoryDatabase::open_leaf_post_list(const string& term) const +InMemoryDatabase::open_leaf_post_list(const string& term, bool need_pos) const { + (void)need_pos; if (closed) InMemoryDatabase::throw_database_closed(); if (term.empty()) { + Assert(!need_pos); intrusive_ptr ptrtothis(this); return new InMemoryAllDocsPostList(ptrtothis); } diff --git a/xapian-core/backends/inmemory/inmemory_database.h b/xapian-core/backends/inmemory/inmemory_database.h index 3acf3e23a..39c3bbf87 100644 --- a/xapian-core/backends/inmemory/inmemory_database.h +++ b/xapian-core/backends/inmemory/inmemory_database.h @@ -335,7 +335,7 @@ class InMemoryDatabase : public Xapian::Database::Internal { bool has_positions() const; PostList * open_post_list(const string & tname) const; - LeafPostList* open_leaf_post_list(const string& term) const; + LeafPostList* open_leaf_post_list(const string& term, bool need_pos) const; TermList * open_term_list(Xapian::docid did) const; TermList * open_term_list_direct(Xapian::docid did) const; Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const; diff --git a/xapian-core/backends/multi/multi_database.cc b/xapian-core/backends/multi/multi_database.cc index 29d509834..3f8bb89ce 100644 --- a/xapian-core/backends/multi/multi_database.cc +++ b/xapian-core/backends/multi/multi_database.cc @@ -79,7 +79,7 @@ MultiDatabase::open_post_list(const string& term) const } LeafPostList* -MultiDatabase::open_leaf_post_list(const string&) const +MultiDatabase::open_leaf_post_list(const string&, bool) const { // This should never get called. Assert(false); diff --git a/xapian-core/backends/multi/multi_database.h b/xapian-core/backends/multi/multi_database.h index a82c06de0..b5008e4d7 100644 --- a/xapian-core/backends/multi/multi_database.h +++ b/xapian-core/backends/multi/multi_database.h @@ -62,7 +62,8 @@ class MultiDatabase : public Xapian::Database::Internal { PostList* open_post_list(const std::string& term) const; - LeafPostList* open_leaf_post_list(const std::string& term) const; + LeafPostList* open_leaf_post_list(const std::string& term, + bool need_pos) const; TermList* open_term_list(Xapian::docid did) const; diff --git a/xapian-core/backends/remote/remote-database.cc b/xapian-core/backends/remote/remote-database.cc index 257309e66..a9c5ee3ab 100644 --- a/xapian-core/backends/remote/remote-database.cc +++ b/xapian-core/backends/remote/remote-database.cc @@ -240,11 +240,11 @@ RemoteDatabase::open_allterms(const string & prefix) const { PostList * RemoteDatabase::open_post_list(const string& term) const { - return RemoteDatabase::open_leaf_post_list(term); + return RemoteDatabase::open_leaf_post_list(term, false); } LeafPostList * -RemoteDatabase::open_leaf_post_list(const string& term) const +RemoteDatabase::open_leaf_post_list(const string& term, bool) const { return new NetworkPostList(intrusive_ptr(this), term); } diff --git a/xapian-core/backends/remote/remote-database.h b/xapian-core/backends/remote/remote-database.h index d78cad14b..84f9a32e6 100644 --- a/xapian-core/backends/remote/remote-database.h +++ b/xapian-core/backends/remote/remote-database.h @@ -217,7 +217,7 @@ class RemoteDatabase : public Xapian::Database::Internal { PostList* open_post_list(const std::string& term) const; - LeafPostList* open_leaf_post_list(const std::string& term) const; + LeafPostList* open_leaf_post_list(const std::string& term, bool) const; Xapian::doccount read_post_list(const std::string& term, NetworkPostList & pl) const; diff --git a/xapian-core/common/bitstream.cc b/xapian-core/common/bitstream.cc index e33e81609..747c30b86 100644 --- a/xapian-core/common/bitstream.cc +++ b/xapian-core/common/bitstream.cc @@ -1,7 +1,7 @@ /** @file bitstream.cc * @brief Classes to encode/decode a bitstream. */ -/* Copyright (C) 2004,2005,2006,2008,2013,2014,2016 Olly Betts +/* Copyright (C) 2004,2005,2006,2008,2013,2014,2016,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -160,17 +160,17 @@ BitReader::decode(Xapian::termpos outof, bool force) size_t bits = highest_order_bit(outof - 1); const size_t spare = (1 << bits) - outof; const size_t mid_start = (outof - spare) / 2; - Xapian::termpos p; + Xapian::termpos pos; if (spare) { - p = read_bits(bits - 1); - if (p < mid_start) { - if (read_bits(1)) p += mid_start + spare; + pos = read_bits(bits - 1); + if (pos < mid_start) { + if (read_bits(1)) pos += mid_start + spare; } } else { - p = read_bits(bits); + pos = read_bits(bits); } - Assert(p < outof); - return p; + Assert(pos < outof); + return pos; } unsigned int @@ -187,8 +187,8 @@ BitReader::read_bits(int count) return result | (read_bits(count - 16) << 16); } while (n_bits < count) { - Assert(idx < buf.size()); - acc |= static_cast(buf[idx++]) << n_bits; + Assert(p < end); + acc |= static_cast(*p++) << n_bits; n_bits += 8; } result = acc & ((1u << count) - 1); diff --git a/xapian-core/common/bitstream.h b/xapian-core/common/bitstream.h index 370babbfd..32d7d4059 100644 --- a/xapian-core/common/bitstream.h +++ b/xapian-core/common/bitstream.h @@ -1,7 +1,7 @@ /** @file bitstream.h * @brief Classes to encode/decode a bitstream. */ -/* Copyright (C) 2004,2005,2006,2008,2012,2013,2014 Olly Betts +/* Copyright (C) 2004,2005,2006,2008,2012,2013,2014,2017 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -64,9 +64,12 @@ class BitWriter { /// Read a stream created by BitWriter. class BitReader { - std::string buf; - size_t idx; + const char* p; + + const char* end; + int n_bits; + unsigned int acc; unsigned int read_bits(int count); @@ -119,24 +122,25 @@ class BitReader { // Construct. BitReader() { } - // Construct with the contents of buf_. - explicit BitReader(const std::string &buf_) - : buf(buf_), idx(0), n_bits(0), acc(0) { } + // Construct and set data. + BitReader(const char* p_, const char* end_) + : p(p_), end(end_), n_bits(0), acc(0) { } - // Construct with the contents of buf_, skipping some bytes. - BitReader(const std::string &buf_, size_t skip) - : buf(buf_, skip), idx(0), n_bits(0), acc(0) { } - - // Initialise from buf_, optionally skipping some bytes. - void init(const std::string &buf_, size_t skip = 0) { - buf.assign(buf_, skip, std::string::npos); - idx = 0; + // Initialise with fresh data. + void init(const char* p_, const char* end_) { + p = p_; + end = end_; n_bits = 0; acc = 0; di_stack.clear(); di_current.uninit(); } + // Initialise with fresh data. + void init(const char* p_, size_t len) { + init(p_, p_ + len); + } + // Decode value, known to be less than outof. Xapian::termpos decode(Xapian::termpos outof, bool force = false); @@ -145,7 +149,7 @@ class BitReader { // there's less than a byte left and that all remaining bits are // zero. bool check_all_gone() const { - return (idx == buf.size() && n_bits <= 7 && acc == 0); + return (p == end && n_bits <= 7 && acc == 0); } /// Perform interpolative decoding between elements between j and k. diff --git a/xapian-core/matcher/localsubmatch.cc b/xapian-core/matcher/localsubmatch.cc index efdd0434b..2ae11a39a 100644 --- a/xapian-core/matcher/localsubmatch.cc +++ b/xapian-core/matcher/localsubmatch.cc @@ -238,7 +238,8 @@ LocalSubMatch::open_post_list(const string& term, LeafPostList * pl = NULL; if (term.empty()) { - pl = db->open_leaf_post_list(term); + Assert(!need_positions); + pl = db->open_leaf_post_list(term, false); } else { if (!need_positions) { if ((!weighted && !in_synonym) || @@ -250,7 +251,7 @@ LocalSubMatch::open_post_list(const string& term, // the term indexes all documents, we can replace it with // the MatchAll postlist, which is especially efficient if // there are no gaps in the docids. - pl = db->open_leaf_post_list(string()); + pl = db->open_leaf_post_list(string(), false); // Set the term name so the postlist looks up the correct // term frequencies - this is necessary if the weighting @@ -265,9 +266,9 @@ LocalSubMatch::open_post_list(const string& term, if (!pl) { const LeafPostList * hint = qopt->get_hint_postlist(); if (hint) - pl = hint->open_nearby_postlist(term); + pl = hint->open_nearby_postlist(term, need_positions); if (!pl) { - pl = db->open_leaf_post_list(term); + pl = db->open_leaf_post_list(term, need_positions); } qopt->set_hint_postlist(pl); } diff --git a/xapian-core/matcher/queryoptimiser.h b/xapian-core/matcher/queryoptimiser.h index 371415a47..0f0795c60 100644 --- a/xapian-core/matcher/queryoptimiser.h +++ b/xapian-core/matcher/queryoptimiser.h @@ -96,7 +96,7 @@ class QueryOptimiser { PostList * open_lazy_post_list(const std::string& term, Xapian::termcount wqf, double factor) { - return localsubmatch.open_post_list(term, wqf, factor, false, + return localsubmatch.open_post_list(term, wqf, factor, need_positions, in_synonym, this, true); } -- 2.11.4.GIT