From ae6df40822b668e63699e01c166de6739284cda3 Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Wed, 21 Feb 2018 17:31:37 +1300 Subject: [PATCH] [honey] Reduce find_entry() and find_entry_lt() use find_entry(string()) is the same as find_entry_ge(string()). The previous pattern of using find_entry_lt() to put the cursor in a state where next() advances to a particular key is a bit inefficient as we have to read items before those we actually want; it's also a wasted cursor seek if the first action is skip_to() instead of next(). So instead put the cursor after_end() initially, and check for that in next() and use find_entry_ge() for the particular key at that point. --- xapian-core/backends/honey/honey_compact.cc | 16 ++++++------ xapian-core/backends/honey/honey_cursor.h | 2 ++ xapian-core/backends/honey/honey_metadata.cc | 30 +++++++++++++++++----- .../backends/honey/honey_spellingwordslist.cc | 7 ++++- .../backends/honey/honey_spellingwordslist.h | 7 ++--- xapian-core/backends/honey/honey_synonym.cc | 15 ++++++++++- xapian-core/backends/honey/honey_synonym.h | 11 ++------ 7 files changed, 59 insertions(+), 29 deletions(-) diff --git a/xapian-core/backends/honey/honey_compact.cc b/xapian-core/backends/honey/honey_compact.cc index eca7715fe..221f81ca9 100644 --- a/xapian-core/backends/honey/honey_compact.cc +++ b/xapian-core/backends/honey/honey_compact.cc @@ -325,7 +325,7 @@ class PostlistCursor : private GlassCursor { PostlistCursor(const GlassTable *in, Xapian::docid offset_) : GlassCursor(in), offset(offset_), firstdid(0) { - find_entry(string()); + find_entry_ge(string()); next(); } @@ -552,7 +552,7 @@ class PostlistCursor : private HoneyCursor { PostlistCursor(const HoneyTable *in, Xapian::docid offset_) : HoneyCursor(in), offset(offset_), firstdid(0) { - find_entry(string()); + find_entry_ge(string()); next(); } @@ -1000,7 +1000,7 @@ template struct MergeCursor; template<> struct MergeCursor : public GlassCursor { explicit MergeCursor(const GlassTable *in) : GlassCursor(in) { - find_entry(string()); + find_entry_ge(string()); next(); } }; @@ -1009,7 +1009,7 @@ struct MergeCursor : public GlassCursor { template<> struct MergeCursor : public HoneyCursor { explicit MergeCursor(const HoneyTable *in) : HoneyCursor(in) { - find_entry(string()); + find_entry_ge(string()); next(); } }; @@ -1362,7 +1362,7 @@ class PositionCursor : private GlassCursor { PositionCursor(const GlassTable *in, Xapian::docid offset_) : GlassCursor(in), offset(offset_), firstdid(0) { - find_entry(string()); + find_entry_ge(string()); next(); } @@ -1401,7 +1401,7 @@ class PositionCursor : private HoneyCursor { PositionCursor(const HoneyTable *in, Xapian::docid offset_) : HoneyCursor(in), offset(offset_), firstdid(0) { - find_entry(string()); + find_entry_ge(string()); next(); } @@ -1522,7 +1522,7 @@ merge_docid_keyed(T *out, const vector & inputs, if (in->empty()) continue; HoneyCursor cur(in); - cur.find_entry(string()); + cur.find_entry_ge(string()); string key; while (cur.next()) { @@ -1566,7 +1566,7 @@ merge_docid_keyed(T *out, const vector & inputs, if (in->empty()) continue; GlassCursor cur(in); - cur.find_entry(string()); + cur.find_entry_ge(string()); string key; while (cur.next()) { diff --git a/xapian-core/backends/honey/honey_cursor.h b/xapian-core/backends/honey/honey_cursor.h index 3cbe10646..37575a27a 100644 --- a/xapian-core/backends/honey/honey_cursor.h +++ b/xapian-core/backends/honey/honey_cursor.h @@ -77,6 +77,8 @@ class HoneyCursor { fh.set_pos(o.fh.get_pos()); } + void to_end() { is_at_end = true; } + bool after_end() const { return is_at_end; } bool next(); diff --git a/xapian-core/backends/honey/honey_metadata.cc b/xapian-core/backends/honey/honey_metadata.cc index 66d616083..6c2fd0b17 100644 --- a/xapian-core/backends/honey/honey_metadata.cc +++ b/xapian-core/backends/honey/honey_metadata.cc @@ -43,8 +43,8 @@ HoneyMetadataTermList::HoneyMetadataTermList( { LOGCALL_CTOR(DB, "HoneyMetadataTermList", database_ | cursor_ | prefix_); Assert(cursor); - // Seek to the first key before the first metadata key. - cursor->find_entry_lt(prefix); + // Set the cursor to its end to signal we haven't started yet. + cursor->to_end(); } HoneyMetadataTermList::~HoneyMetadataTermList() @@ -92,7 +92,13 @@ HoneyMetadataTermList::next() LOGCALL(DB, TermList *, "HoneyMetadataTermList::next", NO_ARGS); Assert(!at_end()); - cursor->next(); + if (cursor->after_end()) { + // This is the first action on a new HoneyMetadataTermList. + if (cursor->find_entry_ge(prefix)) + RETURN(NULL); + } else { + cursor->next(); + } if (cursor->after_end() || !startswith(cursor->current_key, prefix)) { // We've reached the end of the prefixed terms. delete cursor; @@ -108,11 +114,21 @@ HoneyMetadataTermList::skip_to(const string &key) LOGCALL(DB, TermList *, "HoneyMetadataTermList::skip_to", key); Assert(!at_end()); - if (!cursor->find_entry_ge(string("\0", 2) + key)) { - // The exact term we asked for isn't there, so check if the next - // term after it also has the right prefix. + // k is the table key (key is the user metadata key). + string k(2, '\0'); + k += key; + if (cursor->after_end() && prefix > k) { + // This is the first action on a new HoneySynonymTermList and we were + // asked to skip to a key before the prefix - this ought to leave us + // on the first key with the specified prefix. + k = prefix; + } + + if (!cursor->find_entry_ge(k)) { + // The exact key we asked for isn't there, so check if the next + // key after it also has the right prefix. if (cursor->after_end() || !startswith(cursor->current_key, prefix)) { - // We've reached the end of the prefixed terms. + // We've reached the end of the prefixed keys. delete cursor; cursor = NULL; } diff --git a/xapian-core/backends/honey/honey_spellingwordslist.cc b/xapian-core/backends/honey/honey_spellingwordslist.cc index c273e5145..39a5a4646 100644 --- a/xapian-core/backends/honey/honey_spellingwordslist.cc +++ b/xapian-core/backends/honey/honey_spellingwordslist.cc @@ -91,7 +91,12 @@ HoneySpellingWordsList::next() LOGCALL(DB, TermList *, "HoneySpellingWordsList::next", NO_ARGS); Assert(!at_end()); - cursor->next(); + if (cursor->after_end()) { + // This is the first action on a new HoneySpellingWordsList. + (void)cursor->find_entry_ge("W"); + } else { + cursor->next(); + } if (cursor->after_end() || !startswith(cursor->current_key, 'W')) { // We've reached the end of the prefixed terms. delete cursor; diff --git a/xapian-core/backends/honey/honey_spellingwordslist.h b/xapian-core/backends/honey/honey_spellingwordslist.h index 9274a812d..934ce7ccc 100644 --- a/xapian-core/backends/honey/honey_spellingwordslist.h +++ b/xapian-core/backends/honey/honey_spellingwordslist.h @@ -57,9 +57,10 @@ class HoneySpellingWordsList : public AllTermsList { public: HoneySpellingWordsList(const HoneyDatabase* database_, HoneyCursor* cursor_) : database(database_), cursor(cursor_), termfreq(0) { - // Seek to the entry before the first key with a "W" prefix, so the - // first next() will advance us to the first such entry. - cursor->find_entry(std::string("W", 1)); + // Set the cursor to its end to signal we haven't started yet. Then + // if the first action is next() it can use find_entry_ge("W") to + // locate the first word. + cursor->to_end(); } /// Destructor. diff --git a/xapian-core/backends/honey/honey_synonym.cc b/xapian-core/backends/honey/honey_synonym.cc index c033a582a..aaf128dd9 100644 --- a/xapian-core/backends/honey/honey_synonym.cc +++ b/xapian-core/backends/honey/honey_synonym.cc @@ -211,7 +211,13 @@ HoneySynonymTermList::next() LOGCALL(DB, TermList *, "HoneySynonymTermList::next", NO_ARGS); Assert(!at_end()); - cursor->next(); + if (cursor->after_end()) { + // This is the first action on a new HoneySynonymTermList. + if (cursor->find_entry_ge(prefix)) + RETURN(NULL); + } else { + cursor->next(); + } if (cursor->after_end() || !startswith(cursor->current_key, prefix)) { // We've reached the end of the prefixed terms. delete cursor; @@ -227,6 +233,13 @@ HoneySynonymTermList::skip_to(const string &term) LOGCALL(DB, TermList *, "HoneySynonymTermList::skip_to", term); Assert(!at_end()); + if (cursor->after_end() && prefix > term) { + // This is the first action on a new HoneySynonymTermList and we were + // asked to skip to a term before the prefix - this ought to leave us + // on the first term with the specified prefix. + RETURN(skip_to(prefix)); + } + if (!cursor->find_entry_ge(term)) { // The exact term we asked for isn't there, so check if the next // term after it also has the right prefix. diff --git a/xapian-core/backends/honey/honey_synonym.h b/xapian-core/backends/honey/honey_synonym.h index 789723522..6abd93b63 100644 --- a/xapian-core/backends/honey/honey_synonym.h +++ b/xapian-core/backends/honey/honey_synonym.h @@ -143,15 +143,8 @@ class HoneySynonymTermList : public AllTermsList { const std::string& prefix_) : database(database_), cursor(cursor_), prefix(prefix_) { - // Position the cursor on the highest key before the first key we want, - // so that the first call to next() will put us on the first key we - // want. - if (prefix.empty()) { - cursor->find_entry(std::string()); - } else { - // Seek to the first key before one with the desired prefix. - cursor->find_entry_lt(prefix); - } + // Set the cursor to its end to signal we haven't started yet. + cursor->to_end(); } /// Destructor. -- 2.11.4.GIT