xapian-bindings/python3/extra.i

   1 %{
   2 /* python/extra.i: Xapian scripting python interface additional python code.
   3  *
   4  * Copyright (C) 2003,2004,2005 James Aylett
   5  * Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2013 Olly Betts
   6  * Copyright (C) 2007 Lemur Consulting Ltd
   7  * Copyright (C) 2010 Richard Boulton
   8  *
   9  * This program is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU General Public License as
  11  * published by the Free Software Foundation; either version 2 of the
  12  * License, or (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  22  * USA
  23  */
  24 %}
  25
  26 %pythoncode %{
  27
  28 # Set the documentation format - this is used by tools like "epydoc" to decide
  29 # how to format the documentation strings.
  30 __docformat__ = "restructuredtext en"
  31
  32 ##################################
  33 # Support for iteration of MSets #
  34 ##################################
  35
  36 class MSetItem(object):
  37     """An item returned from iteration of the MSet.
  38
  39     The item supports access to the following attributes and properties:
  40
  41      - `docid`: The Xapian document ID corresponding to this MSet item.
  42      - `weight`: The weight corresponding to this MSet item.
  43      - `rank`: The rank of this MSet item.  The rank is the position in the
  44        total set of matching documents of this item.  The highest document is
  45        given a rank of 0.  If the MSet did not start at the highest matching
  46        document, because a non-zero 'start' parameter was supplied to
  47        get_mset(), the first document in the MSet will have a rank greater than
  48        0 (in fact, it will be equal to the value of 'start' supplied to
  49        get_mset()).
  50      - `percent`: The percentage score assigned to this MSet item.
  51      - `document`: The document for this MSet item.  This can be used to access
  52        the document data, or any other information stored in the document (such
  53        as term lists).  It is lazily evaluated.
  54      - `collapse_key`: The value of the key which was used for collapsing.
  55      - `collapse_count`: An estimate of the number of documents that have been
  56        collapsed into this one.
  57
  58     The collapse count estimate will always be less than or equal to the actual
  59     number of other documents satisfying the match criteria with the same
  60     collapse key as this document.  If may be 0 even though there are other
  61     documents with the same collapse key which satisfying the match criteria.
  62     However if this method returns non-zero, there definitely are other such
  63     documents.  So this method may be used to inform the user that there are
  64     "at least N other matches in this group", or to control whether to offer a
  65     "show other documents in this group" feature (but note that it may not
  66     offer it in every case where it would show other documents).
  67
  68     """
  69
  70     __slots__ = ('_mset', '_firstitem', 'docid', 'weight', 'rank',
  71                  'percent', 'collapse_key', 'collapse_count', '_document', )
  72
  73     def __init__(self, iter, mset):
  74         self._mset = mset
  75         self._firstitem = self._mset.get_firstitem()
  76         self.docid = iter.get_docid()
  77         self.weight = iter.get_weight()
  78         self.rank = iter.get_rank()
  79         self.percent = iter.get_percent()
  80         self.collapse_key = iter.get_collapse_key()
  81         self.collapse_count = iter.get_collapse_count()
  82         self._document = None
  83
  84     def _get_document(self):
  85         if self._document is None:
  86             self._document = self._mset._get_hit_internal(self.rank - self._firstitem).get_document()
  87         return self._document
  88
  89     document = property(_get_document, doc="The document object corresponding to this MSet item.")
  90
  91 class MSetIter(object):
  92     """An iterator over the items in an MSet.
  93
  94     The iterator will return MSetItem objects, which will be evaluated lazily
  95     where appropriate.
  96
  97     """
  98     __slots__ = ('_iter', '_end', '_mset')
  99     def __init__(self, mset):
 100         self._iter = mset._begin()
 101         self._end = mset._end()
 102         self._mset = mset
 103
 104     def __iter__(self):
 105         return self
 106
 107     def __next__(self):
 108         if self._iter == self._end:
 109             raise StopIteration
 110         else:
 111             r = MSetItem(self._iter, self._mset)
 112             next(self._iter)
 113             return r
 114
 115 # Modify the MSet to allow access to the python iterators, and have other
 116 # convenience methods.
 117
 118 def _mset_gen_iter(self):
 119     """Return an iterator over the MSet.
 120
 121     The iterator will return MSetItem objects, which will be evaluated lazily
 122     where appropriate.
 123
 124     """
 125     return MSetIter(self)
 126 MSet.__iter__ = _mset_gen_iter
 127
 128 MSet.__len__ = lambda self: MSet.size(self)
 129
 130 def _mset_getitem(self, index):
 131     """Get an item from the MSet.
 132
 133     The supplied index is relative to the start of the MSet, not the absolute
 134     rank of the item.
 135
 136     Returns an MSetItem.
 137
 138     """
 139     if index < 0:
 140         index += len(self)
 141     if index < 0 or index >= len(self):
 142         raise IndexError("Mset index out of range")
 143     return MSetItem(self._get_hit_internal(index), self)
 144 MSet.__getitem__ = _mset_getitem
 145 MSet.get_hit = _mset_getitem
 146
 147
 148 ##################################
 149 # Support for iteration of ESets #
 150 ##################################
 151
 152 class ESetItem(object):
 153     """An item returned from iteration of the ESet.
 154
 155     The item supports access to the following attributes:
 156
 157      - `term`: The term corresponding to this ESet item.
 158      - `weight`: The weight corresponding to this ESet item.
 159
 160     """
 161     __slots__ = ('term', 'weight')
 162
 163     def __init__(self, iter):
 164         self.term = iter.get_term()
 165         self.weight = iter.get_weight()
 166
 167 class ESetIter(object):
 168     """An iterator over the items in an ESet.
 169
 170     The iterator will return ESetItem objects.
 171
 172     """
 173     __slots__ = ('_iter', '_end')
 174     def __init__(self, eset):
 175         self._iter = eset._begin()
 176         self._end = eset._end()
 177
 178     def __iter__(self):
 179         return self
 180
 181     def __next__(self):
 182         if self._iter == self._end:
 183             raise StopIteration
 184         else:
 185             r = ESetItem(self._iter)
 186             next(self._iter)
 187             return r
 188
 189 # Modify the ESet to allow access to the python iterators, and have other
 190 # convenience methods.
 191
 192 def _eset_gen_iter(self):
 193     """Return an iterator over the ESet.
 194
 195     The iterator will return ESetItem objects.
 196
 197     """
 198     return ESetIter(self)
 199 ESet.__iter__ = _eset_gen_iter
 200
 201 ESet.__len__ = lambda self: ESet.size(self)
 202
 203
 204 #######################################
 205 # Support for iteration of term lists #
 206 #######################################
 207
 208 class TermListItem(object):
 209     """An item returned from iteration of a term list.
 210
 211     The item supports access to the following attributes and properties:
 212
 213      - `term`: The term corresponding to this TermListItem.
 214      - `wdf`: The within document frequency of this term.
 215      - `termfreq`: The number of documents in the collection which are indexed
 216        by the term
 217      - `positer`: An iterator over the positions which the term appears at in
 218        the document.  This is only available until the iterator which returned
 219        this item next moves.
 220
 221     """
 222     __slots__ = ('_iter', 'term', '_wdf', '_termfreq')
 223
 224     def __init__(self, iter, term):
 225         self._iter = iter
 226         self.term = term
 227         self._wdf = None
 228         self._termfreq = None
 229
 230         if iter._has_wdf == TermIter.EAGER:
 231             self._wdf = iter._iter.get_wdf()
 232         if iter._has_termfreq == TermIter.EAGER:
 233             self._termfreq = iter._iter.get_termfreq()
 234
 235         # Support for sequence API
 236         sequence = ['term', 'wdf', 'termfreq', 'positer']
 237         if iter._has_wdf == TermIter.INVALID:
 238             sequence[1] = 0
 239         if iter._has_termfreq == TermIter.INVALID:
 240             sequence[2] = 0
 241         if iter._has_positions == TermIter.INVALID:
 242             sequence[3] = PositionIter()
 243
 244     def _get_wdf(self):
 245         """Get the within-document-frequency of the current term.
 246
 247         This will raise a InvalidOperationError exception if the iterator this
 248         item came from doesn't support within-document-frequencies.
 249
 250         """
 251         if self._wdf is None:
 252             if self._iter._has_wdf == TermIter.INVALID:
 253                 raise InvalidOperationError("Iterator does not support wdfs")
 254             if self.term is not self._iter._lastterm:
 255                 raise InvalidOperationError("Iterator has moved, and does not support random access")
 256             self._wdf = self._iter._iter.get_wdf()
 257         return self._wdf
 258     wdf = property(_get_wdf, doc=
 259     """The within-document-frequency of the current term (if meaningful).
 260
 261     This will raise a InvalidOperationError exception if the iterator
 262     this item came from doesn't support within-document-frequencies.
 263
 264     """)
 265
 266     def _get_termfreq(self):
 267         """Get the term frequency.
 268
 269         This is the number of documents in the collection which are indexed by
 270         the term.
 271
 272         This will raise a InvalidOperationError exception if the iterator this
 273         item came from doesn't support term frequencies.
 274
 275         """
 276         if self._termfreq is None:
 277             if self._iter._has_termfreq == TermIter.INVALID:
 278                 raise InvalidOperationError("Iterator does not support term frequencies")
 279             if self.term is not self._iter._lastterm:
 280                 raise InvalidOperationError("Iterator has moved, and does not support random access")
 281             self._termfreq = self._iter._iter.get_termfreq()
 282         return self._termfreq
 283     termfreq = property(_get_termfreq, doc=
 284     """The term frequency of the current term (if meaningful).
 285
 286     This is the number of documents in the collection which are indexed by the
 287     term.
 288
 289     This will raise a InvalidOperationError exception if the iterator
 290     this item came from doesn't support term frequencies.
 291
 292     """)
 293
 294     def _get_positer(self):
 295         """Get a position list iterator.
 296
 297         The iterator will return integers representing the positions that the
 298         term occurs at.
 299
 300         This will raise a InvalidOperationError exception if the iterator this
 301         item came from doesn't support position lists, or if the iterator has
 302         moved on since the item was returned from it.
 303
 304         """
 305         if self._iter._has_positions == TermIter.INVALID:
 306             raise InvalidOperationError("Iterator does not support position lists")
 307         # Access to position lists is always lazy, so we don't need to check
 308         # _has_positions.
 309         if self.term is not self._iter._lastterm:
 310             raise InvalidOperationError("Iterator has moved, and does not support random access")
 311         return PositionIter(self._iter._iter._positionlist_begin(),
 312                             self._iter._iter._positionlist_end())
 313     positer = property(_get_positer, doc=
 314     """A position iterator for the current term (if meaningful).
 315
 316     The iterator will return integers representing the positions that the term
 317     occurs at.
 318
 319     This will raise a InvalidOperationError exception if the iterator this item
 320     came from doesn't support position lists, or if the iterator has moved on
 321     since the item was returned from it.
 322
 323     """)
 324
 325
 326 class TermIter(object):
 327     """An iterator over a term list.
 328
 329     The iterator will return TermListItem objects, which will be evaluated
 330     lazily where appropriate.
 331
 332     """
 333     __slots__ = ('_iter', '_end', '_has_termfreq', '_has_wdf',
 334                  '_has_positions', '_return_strings', '_lastterm', '_moved')
 335
 336     INVALID = 0
 337     LAZY = 1
 338     EAGER = 2
 339
 340     def __init__(self, start, end, has_termfreq=INVALID,
 341                  has_wdf=INVALID, has_positions=INVALID,
 342                  return_strings=False):
 343         self._iter = start
 344         self._end = end
 345         self._has_termfreq = has_termfreq
 346         self._has_wdf = has_wdf
 347         self._has_positions = has_positions
 348         assert(has_positions != TermIter.EAGER) # Can't do eager access to position lists
 349         self._return_strings = return_strings
 350         self._lastterm = None # Used to test if the iterator has moved
 351
 352         # _moved is True if we've moved onto the next item.  This is needed so
 353         # that the iterator doesn't have to move on until just before next() is
 354         # called: since the iterator starts by pointing at a valid item, we
 355         # can't just call next(self._iter) unconditionally at the start of our
 356         # __next__() method.
 357         self._moved = True
 358
 359     def __iter__(self):
 360         return self
 361
 362     def __next__(self):
 363         if not self._moved:
 364             next(self._iter)
 365             self._moved = True
 366
 367         if self._iter == self._end:
 368             self._lastterm = None
 369             raise StopIteration
 370         else:
 371             self._lastterm = self._iter.get_term()
 372             self._moved = False
 373             if self._return_strings:
 374                 return self._lastterm
 375             return TermListItem(self, self._lastterm)
 376
 377     def skip_to(self, term):
 378         """Skip the iterator forward.
 379
 380         The iterator is advanced to the first term at or after the current
 381         position which is greater than or equal to the supplied term.
 382
 383         If there are no such items, this will raise StopIteration.
 384
 385         This returns the item which the iterator is moved to.  The subsequent
 386         item will be returned the next time that next() is called (unless
 387         skip_to() is called again first).
 388
 389         """
 390         if self._iter != self._end:
 391             self._iter.skip_to(term)
 392
 393         if self._iter == self._end:
 394             self._lastterm = None
 395             self._moved = True
 396             raise StopIteration
 397
 398         # Update self._lastterm if the iterator has moved.
 399         # TermListItems compare a saved value of lastterm with self._lastterm
 400         # with the object identity comparator, so it is important to ensure
 401         # that it does not get modified if the new term compares equal.
 402         newterm = self._iter.get_term()
 403         if newterm != self._lastterm:
 404             self._lastterm = newterm
 405
 406         self._moved = False
 407         if self._return_strings:
 408             return self._lastterm
 409         return TermListItem(self, self._lastterm)
 410
 411 # Modify Enquire to add a "matching_terms()" method.
 412 def _enquire_gen_iter(self, which):
 413     """Get an iterator over the terms which match a given match set item.
 414
 415     The match set item to consider is specified by the `which` parameter, which
 416     may be a document ID, or an MSetItem object.
 417
 418     The iterator will return string objects.
 419
 420     """
 421     if isinstance(which, MSetItem):
 422         which = which.docid
 423     return TermIter(self._get_matching_terms_begin(which),
 424                     self._get_matching_terms_end(which),
 425                     return_strings=True)
 426 Enquire.matching_terms = _enquire_gen_iter
 427
 428 # Modify Query to add an "__iter__()" method.
 429 def _query_gen_iter(self):
 430     """Get an iterator over the terms in a query.
 431
 432     The iterator will return string objects.
 433
 434     """
 435     return TermIter(self._get_terms_begin(),
 436                     self._get_terms_end(),
 437                     return_strings=True)
 438 Query.__iter__ = _query_gen_iter
 439
 440 # Modify Database to add an "__iter__()" method and an "allterms()" method.
 441 def _database_gen_allterms_iter(self, prefix=None):
 442     """Get an iterator over all the terms in the database.
 443
 444     The iterator will return TermListItem objects, but these will not support
 445     access to wdf, or position information.
 446
 447     Access to term frequency information is only available until the iterator
 448     has moved on.
 449
 450     If prefix is supplied, only terms which start with that prefix will be
 451     returned.
 452
 453     """
 454     if prefix is None:
 455         return TermIter(self._allterms_begin(), self._allterms_end(),
 456                         has_termfreq=TermIter.LAZY)
 457     else:
 458         return TermIter(self._allterms_begin(prefix), self._allterms_end(prefix),
 459                         has_termfreq=TermIter.LAZY)
 460 Database.__iter__ = _database_gen_allterms_iter
 461 Database.allterms = _database_gen_allterms_iter
 462
 463 # Modify Database to add a "termlist()" method.
 464 def _database_gen_termlist_iter(self, docid):
 465     """Get an iterator over all the terms which index a given document ID.
 466
 467     The iterator will return TermListItem objects.
 468
 469     Access to term frequency and position information is only available until
 470     the iterator has moved on.
 471
 472     """
 473     # Note: has_termfreq is set to LAZY because most databases don't store term
 474     # frequencies in the termlist (because this would require updating many termlist
 475     # entries for every document update), so access to the term frequency requires a
 476     # separate lookup.
 477     return TermIter(self._termlist_begin(docid), self._termlist_end(docid),
 478                     has_termfreq=TermIter.LAZY,
 479                     has_wdf=TermIter.EAGER,
 480                     has_positions=TermIter.LAZY)
 481 Database.termlist = _database_gen_termlist_iter
 482
 483 # Modify Database to add a "spellings()" method.
 484 def _database_gen_spellings_iter(self):
 485     """Get an iterator which returns all the spelling correction targets
 486
 487     The iterator will return TermListItem objects.  Only the term frequency is
 488     available; wdf and positions are not meaningful.
 489
 490     """
 491     return TermIter(self._spellings_begin(), self._spellings_end(),
 492                     has_termfreq=TermIter.EAGER,
 493                     has_wdf=TermIter.INVALID,
 494                     has_positions=TermIter.INVALID)
 495 Database.spellings = _database_gen_spellings_iter
 496
 497 # Modify Database to add a "synonyms()" method.
 498 def _database_gen_synonyms_iter(self, term):
 499     """Get an iterator which returns all the synonyms for a given term.
 500
 501     The term to return synonyms for is specified by the `term` parameter.
 502
 503     The iterator will return string objects.
 504
 505     """
 506     return TermIter(self._synonyms_begin(term),
 507                     self._synonyms_end(term),
 508                     return_strings=True)
 509 Database.synonyms = _database_gen_synonyms_iter
 510
 511 # Modify Database to add a "synonym_keys()" method.
 512 def _database_gen_synonym_keys_iter(self, prefix=""):
 513     """Get an iterator which returns all the terms which have synonyms.
 514
 515     The iterator will return string objects.
 516
 517     If `prefix` is non-empty, only terms with this prefix are returned.
 518
 519     """
 520     return TermIter(self._synonym_keys_begin(prefix),
 521                     self._synonym_keys_end(prefix),
 522                     return_strings=True)
 523 Database.synonym_keys = _database_gen_synonym_keys_iter
 524
 525 # Modify Database to add a "metadata_keys()" method, instead of direct access
 526 # to metadata_keys_begin and metadata_keys_end.
 527 def _database_gen_metadata_keys_iter(self, prefix=""):
 528     """Get an iterator which returns all the metadata keys.
 529
 530     The iterator will return string objects.
 531
 532     If `prefix` is non-empty, only metadata keys with this prefix are returned.
 533
 534     """
 535     return TermIter(self._metadata_keys_begin(prefix),
 536                     self._metadata_keys_end(prefix),
 537                     return_strings=True)
 538 Database.metadata_keys = _database_gen_metadata_keys_iter
 539
 540 # Modify Document to add an "__iter__()" method and a "termlist()" method.
 541 def _document_gen_termlist_iter(self):
 542     """Get an iterator over all the terms in a document.
 543
 544     The iterator will return TermListItem objects.
 545
 546     Access to term frequency and position information is only available until
 547     the iterator has moved on.
 548
 549     Note that term frequency information is only meaningful for a document
 550     retrieved from a database.  If term frequency information is requested for
 551     a document which was freshly created, an InvalidOperationError will be
 552     raised.
 553
 554     """
 555     # Note: document termlist iterators may be implemented entirely in-memory
 556     # (in which case access to all items could be allowed eagerly), but may
 557     # also be implemented by returning a database termlist (for documents which
 558     # are stored in a database, rather than freshly created).  We choose the
 559     # most conservative settings, to avoid doing eager access when lazy access
 560     # would be more appropriate.
 561     return TermIter(self._termlist_begin(), self._termlist_end(),
 562                     has_termfreq=TermIter.LAZY,
 563                     has_wdf=TermIter.EAGER,
 564                     has_positions=TermIter.LAZY)
 565 Document.__iter__ = _document_gen_termlist_iter
 566 Document.termlist = _document_gen_termlist_iter
 567
 568 # Modify QueryParser to add a "stoplist()" method.
 569 def _queryparser_gen_stoplist_iter(self):
 570     """Get an iterator over all the stopped terms from the previous query.
 571
 572     This returns an iterator over all the terms which were omitted from the
 573     previously parsed query due to being considered to be stopwords.  Each
 574     instance of a word omitted from the query is represented in the returned
 575     list, in the order in which the
 576
 577     The iterator will return string objects.
 578
 579     """
 580     return TermIter(self._stoplist_begin(), self._stoplist_end(),
 581                     return_strings=True)
 582 QueryParser.stoplist = _queryparser_gen_stoplist_iter
 583
 584 # Modify QueryParser to add an "unstemlist()" method.
 585 def _queryparser_gen_unstemlist_iter(self, tname):
 586     """Get an iterator over all the unstemmed forms of a stemmed term.
 587
 588     This returns an iterator which returns all the unstemmed words which were
 589     stemmed to the stemmed form specified by `tname` when parsing the previous
 590     query.  Each instance of a word which stems to `tname` is returned by the
 591     iterator in the order in which the words appeared in the query - an
 592     individual unstemmed word may thus occur multiple times.
 593
 594     The iterator will return string objects.
 595
 596     """
 597     return TermIter(self._unstem_begin(tname), self._unstem_end(tname),
 598                     return_strings=True)
 599 QueryParser.unstemlist = _queryparser_gen_unstemlist_iter
 600
 601 # Modify ValueCountMatchSpy to add a "values()" method.
 602 def wrapper():
 603     begin = ValueCountMatchSpy.values_begin
 604     del ValueCountMatchSpy.values_begin
 605     end = ValueCountMatchSpy.values_end
 606     del ValueCountMatchSpy.values_end
 607     def values(self):
 608         """Get an iterator over all the values in the slot.
 609
 610         Values will be returned in ascending alphabetical order.
 611
 612         The iterator will return TermListItem objects: the value can be
 613         accessed as the `term` property, and the frequency can be accessed as
 614         the `termfreq` property.
 615
 616         """
 617         return TermIter(begin(self), end(self), has_termfreq=TermIter.EAGER)
 618     return values
 619 ValueCountMatchSpy.values = wrapper()
 620 del wrapper
 621
 622 # Modify ValueCountMatchSpy to add a "top_values()" method.
 623 def wrapper():
 624     begin = ValueCountMatchSpy.top_values_begin
 625     del ValueCountMatchSpy.top_values_begin
 626     end = ValueCountMatchSpy.top_values_end
 627     del ValueCountMatchSpy.top_values_end
 628     def top_values(self, maxvalues):
 629         """Get an iterator over the most frequent values for the slot.
 630
 631         Values will be returned in descending order of frequency.  Values with
 632         the same frequency will be returned in ascending alphabetical order.
 633
 634         The iterator will return TermListItem objects: the value can be
 635         accessed as the `term` property, and the frequency can be accessed as
 636         the `termfreq` property.
 637
 638         """
 639         return TermIter(begin(self, maxvalues), end(self, maxvalues),
 640                         has_termfreq=TermIter.EAGER)
 641     return top_values
 642 ValueCountMatchSpy.top_values = wrapper()
 643 del wrapper
 644
 645 # When we make a query, keep a note of postingsources involved, so they won't
 646 # be deleted. This hack can probably be removed once xapian bug #186 is fixed.
 647 __query_init_orig = Query.__init__
 648 def _query_init(self, *args):
 649     """Make a new query object.
 650
 651     Many possible arguments are possible - see the documentation for details.
 652
 653     """
 654     ps = []
 655     if len(args) == 1 and isinstance(args[0], PostingSource):
 656         ps.append(args[0])
 657     else:
 658         for arg in args:
 659             if isinstance(arg, Query):
 660                 ps.extend(getattr(arg, '_ps', []))
 661             elif hasattr(arg, '__iter__'):
 662                 for listarg in arg:
 663                     if isinstance(listarg, Query):
 664                         ps.extend(getattr(listarg, '_ps', []))
 665     __query_init_orig(self, *args)
 666     self._ps = ps
 667 Query.__init__ = _query_init
 668 del _query_init
 669
 670 # When setting a query on enquire, keep a note of postingsources involved, so
 671 # they won't be deleted. This hack can probably be removed once xapian bug #186
 672 # is fixed.
 673 __enquire_set_query_orig = Enquire.set_query
 674 def _enquire_set_query(self, query, qlen=0):
 675     self._ps = getattr(query, '_ps', [])
 676     return __enquire_set_query_orig(self, query, qlen)
 677 _enquire_set_query.__doc__ = __enquire_set_query_orig.__doc__
 678 Enquire.set_query = _enquire_set_query
 679 del _enquire_set_query
 680
 681 # When getting  a query from enquire, keep a note of postingsources involved,
 682 # so they won't be deleted. This hack can probably be removed once xapian bug
 683 # #186 is fixed.
 684 __enquire_get_query_orig = Enquire.get_query
 685 def _enquire_get_query(self):
 686     query = __enquire_get_query_orig(self)
 687     query._ps = getattr(self, '_ps', [])
 688     return query
 689 _enquire_get_query.__doc__ = __enquire_get_query_orig.__doc__
 690 Enquire.get_query = _enquire_get_query
 691 del _enquire_get_query
 692
 693 # When we set a RangeProcessor into the QueryParser, keep a python
 694 # reference so it won't be deleted. This hack can probably be removed once
 695 # xapian bug #186 is fixed.
 696 __queryparser_add_rangeprocessor_orig = QueryParser.add_rangeprocessor
 697 def _queryparser_add_rangeprocessor(self, rproc):
 698     if not hasattr(self, '_rps'):
 699         self._rps = []
 700     self._rps.append(rproc)
 701     return __queryparser_add_rangeprocessor_orig(self, rproc)
 702 _queryparser_add_rangeprocessor.__doc__ = __queryparser_add_rangeprocessor_orig.__doc__
 703 QueryParser.add_rangeprocessor = _queryparser_add_rangeprocessor
 704 del _queryparser_add_rangeprocessor
 705
 706 # When we set a FieldProcessor into the QueryParser, keep a python
 707 # reference so it won't be deleted. This hack can probably be removed once
 708 # xapian bug #186 is fixed.
 709 __queryparser_add_prefix_orig = QueryParser.add_prefix
 710 def _queryparser_add_prefix(self, s, proc):
 711     if not isinstance(proc, (str, bytes)):
 712         if not hasattr(self, '_fps'):
 713             self._fps = []
 714         self._fps.append(proc)
 715     return __queryparser_add_prefix_orig(self, s, proc)
 716 _queryparser_add_prefix.__doc__ = __queryparser_add_prefix_orig.__doc__
 717 QueryParser.add_prefix = _queryparser_add_prefix
 718 del _queryparser_add_prefix
 719 __queryparser_add_boolean_prefix_orig = QueryParser.add_boolean_prefix
 720 def _queryparser_add_boolean_prefix(self, s, proc, exclusive = True):
 721     if not isinstance(proc, (str, bytes)):
 722         if not hasattr(self, '_fps'):
 723             self._fps = []
 724         self._fps.append(proc)
 725     return __queryparser_add_boolean_prefix_orig(self, s, proc, exclusive)
 726 _queryparser_add_boolean_prefix.__doc__ = __queryparser_add_boolean_prefix_orig.__doc__
 727 QueryParser.add_boolean_prefix = _queryparser_add_boolean_prefix
 728 del _queryparser_add_boolean_prefix
 729
 730 # When we set a Stopper into the QueryParser, keep a python reference so it
 731 # won't be deleted. This hack can probably be removed once xapian bug #186 is
 732 # fixed.
 733 __queryparser_set_stopper_orig = QueryParser.set_stopper
 734 def _queryparser_set_stopper(self, stopper):
 735     self._stopper = stopper
 736     return __queryparser_set_stopper_orig(self, stopper)
 737 _queryparser_set_stopper.__doc__ = __queryparser_set_stopper_orig.__doc__
 738 QueryParser.set_stopper = _queryparser_set_stopper
 739 del _queryparser_set_stopper
 740
 741 # When we set a Stopper into the TermGenerator, keep a python reference so it
 742 # won't be deleted. This hack can probably be removed once xapian bug #186 is
 743 # fixed.
 744 __termgenerator_set_stopper_orig = TermGenerator.set_stopper
 745 def _termgenerator_set_stopper(self, stopper):
 746     self._stopper = stopper
 747     return __termgenerator_set_stopper_orig(self, stopper)
 748 _termgenerator_set_stopper.__doc__ = __termgenerator_set_stopper_orig.__doc__
 749 TermGenerator.set_stopper = _termgenerator_set_stopper
 750 del _termgenerator_set_stopper
 751
 752 # When we set a Sorter on enquire, keep a python reference so it won't be
 753 # deleted.  This hack can probably be removed once xapian bug #186 is fixed.
 754 __enquire_set_sort_by_key_orig = Enquire.set_sort_by_key
 755 def _enquire_set_sort_by_key(self, sorter, reverse):
 756     self._sorter = sorter
 757     return __enquire_set_sort_by_key_orig(self, sorter, reverse)
 758 _enquire_set_sort_by_key.__doc__ = __enquire_set_sort_by_key_orig.__doc__
 759 Enquire.set_sort_by_key = _enquire_set_sort_by_key
 760 del _enquire_set_sort_by_key
 761
 762 __enquire_set_sort_by_key_then_relevance_orig = Enquire.set_sort_by_key_then_relevance
 763 def _enquire_set_sort_by_key_then_relevance(self, sorter, reverse):
 764     self._sorter = sorter
 765     return __enquire_set_sort_by_key_then_relevance_orig(self, sorter, reverse)
 766 _enquire_set_sort_by_key_then_relevance.__doc__ = __enquire_set_sort_by_key_then_relevance_orig.__doc__
 767 Enquire.set_sort_by_key_then_relevance = _enquire_set_sort_by_key_then_relevance
 768 del _enquire_set_sort_by_key_then_relevance
 769
 770 __enquire_set_sort_by_relevance_then_key_orig = Enquire.set_sort_by_relevance_then_key
 771 def _enquire_set_sort_by_relevance_then_key(self, sorter, reverse):
 772     self._sorter = sorter
 773     return __enquire_set_sort_by_relevance_then_key_orig(self, sorter, reverse)
 774 _enquire_set_sort_by_relevance_then_key.__doc__ = __enquire_set_sort_by_relevance_then_key_orig.__doc__
 775 Enquire.set_sort_by_relevance_then_key = _enquire_set_sort_by_relevance_then_key
 776 del _enquire_set_sort_by_relevance_then_key
 777
 778
 779 ##########################################
 780 # Support for iteration of posting lists #
 781 ##########################################
 782
 783 class PostingItem(object):
 784     """An item returned from iteration of a posting list.
 785
 786     The item supports access to the following attributes and properties:
 787
 788      - `docid`: The document ID corresponding to this PostingItem.
 789      - `doclength`: The length of the document corresponding to this
 790        PostingItem.
 791      - `wdf`: The within document frequency of the term which the posting list
 792        is for in the document corresponding to this PostingItem.
 793      - `positer`: An iterator over the positions which the term corresponing to
 794        this posting list occurs at in the document corresponding to this
 795        PostingItem.  This is only available until the iterator which returned
 796        this item next moves.
 797
 798     """
 799     __slots__ = ('_iter', 'docid', 'doclength', 'wdf',)
 800
 801     def __init__(self, iter):
 802         self._iter = iter
 803         self.docid = iter._iter.get_docid()
 804         self.doclength = iter._iter.get_doclength()
 805         self.wdf = iter._iter.get_wdf()
 806
 807         # Support for sequence API
 808         sequence = ['docid', 'doclength', 'wdf', 'positer']
 809         if not iter._has_positions:
 810             sequence[3] = PositionIter()
 811
 812     def _get_positer(self):
 813         """Get a position list iterator.
 814
 815         The iterator will return integers representing the positions that the
 816         term occurs at in the document corresponding to this PostingItem.
 817
 818         This will raise a InvalidOperationError exception if the iterator this
 819         item came from doesn't support position lists, or if the iterator has
 820         moved on since the item was returned from it.
 821
 822         """
 823         if not self._iter._has_positions:
 824             raise InvalidOperationError("Iterator does not support position lists")
 825         if self._iter._iter == self._iter._end or \
 826            self.docid != self._iter._iter.get_docid():
 827             raise InvalidOperationError("Iterator has moved, and does not support random access")
 828         return PositionIter(self._iter._iter._positionlist_begin(),
 829                             self._iter._iter._positionlist_end())
 830     positer = property(_get_positer, doc=
 831     """A position iterator for the current posting (if meaningful).
 832
 833     The iterator will return integers representing the positions that the term
 834     occurs at.
 835
 836     This will raise a InvalidOperationError exception if the iterator this item
 837     came from doesn't support position lists, or if the iterator has moved on
 838     since the item was returned from it.
 839
 840     """)
 841
 842
 843 class PostingIter(object):
 844     """An iterator over a posting list.
 845
 846     The iterator will return PostingItem objects, which will be evaluated
 847     lazily where appropriate.
 848
 849     """
 850     __slots__ = ('_iter', '_end', '_has_positions', '_moved')
 851
 852     def __init__(self, start, end, has_positions=False):
 853         self._iter = start
 854         self._end = end
 855         self._has_positions = has_positions
 856
 857         # _moved is True if we've moved onto the next item.  This is needed so
 858         # that the iterator doesn't have to move on until just before next() is
 859         # called: since the iterator starts by pointing at a valid item, we
 860         # can't just call next(self._iter) unconditionally at the start of our
 861         # __next__() method.
 862         self._moved = True
 863
 864     def __iter__(self):
 865         return self
 866
 867     def __next__(self):
 868         if not self._moved:
 869             next(self._iter)
 870             self._moved = True
 871
 872         if self._iter == self._end:
 873             raise StopIteration
 874         else:
 875             self._moved = False
 876             return PostingItem(self)
 877
 878     def skip_to(self, docid):
 879         """Skip the iterator forward.
 880
 881         The iterator is advanced to the first document with a document ID
 882         which is greater than or equal to the supplied document ID.
 883
 884         If there are no such items, this will raise StopIteration.
 885
 886         This returns the item which the iterator is moved to.  The subsequent
 887         item will be returned the next time that next() is called (unless
 888         skip_to() is called again first).
 889
 890         """
 891         if self._iter != self._end:
 892             self._iter.skip_to(docid)
 893         if self._iter == self._end:
 894             self._moved = True
 895             raise StopIteration
 896         self._moved = False
 897         return PostingItem(self)
 898
 899 def _database_gen_postlist_iter(self, tname):
 900     """Get an iterator over the postings which are indexed by a given term.
 901
 902     If `tname` is empty, an iterator over all the documents will be returned
 903     (this will contain one entry for each document, will always return a wdf of
 904     1, and will not allow access to a position iterator).
 905
 906     """
 907     if len(tname) != 0:
 908         return PostingIter(self._postlist_begin(tname), self._postlist_end(tname),
 909                            has_positions=True)
 910     else:
 911         return PostingIter(self._postlist_begin(tname), self._postlist_end(tname))
 912 Database.postlist = _database_gen_postlist_iter
 913
 914
 915 ###########################################
 916 # Support for iteration of position lists #
 917 ###########################################
 918
 919 class PositionIter(object):
 920     """An iterator over a position list.
 921
 922     The iterator will return integers, in ascending order.
 923
 924     """
 925     def __init__(self, start = 0, end = 0):
 926         self.iter = start
 927         self.end = end
 928
 929     def __iter__(self):
 930         return self
 931
 932     def __next__(self):
 933         if self.iter==self.end:
 934             raise StopIteration
 935         else:
 936             r = self.iter.get_termpos()
 937             next(self.iter)
 938             return r
 939
 940 # Modify Database to add a "positionlist()" method.
 941 def _database_gen_positionlist_iter(self, docid, tname):
 942     """Get an iterator over all the positions in a given document of a term.
 943
 944     The iterator will return integers, in ascending order.
 945
 946     """
 947     return PositionIter(self._positionlist_begin(docid, tname), self._positionlist_end(docid, tname))
 948 Database.positionlist = _database_gen_positionlist_iter
 949
 950 ########################################
 951 # Support for iteration of value lists #
 952 ########################################
 953
 954 class ValueItem(object):
 955     """An item returned from iteration of the values in a document.
 956
 957     The item supports access to the following attributes:
 958
 959      - `num`: The number of the value.
 960      - `value`: The contents of the value.
 961
 962     """
 963
 964     __slots__ = ('num', 'value', )
 965
 966     def __init__(self, num, value):
 967         self.num = num
 968         self.value = value
 969
 970 class ValueIter(object):
 971     """An iterator over all the values stored in a document.
 972
 973     The iterator will return ValueItem objects, in ascending order of value number.
 974
 975     """
 976     def __init__(self, start, end):
 977         self.iter = start
 978         self.end = end
 979
 980     def __iter__(self):
 981         return self
 982
 983     def __next__(self):
 984         if self.iter==self.end:
 985             raise StopIteration
 986         else:
 987             r = ValueItem(self.iter.get_valueno(), self.iter.get_value())
 988             next(self.iter)
 989             return r
 990
 991 # Modify Document to add a "values()" method.
 992 def _document_gen_values_iter(self):
 993     """Get an iterator over all the values stored in a document.
 994
 995     The iterator will return ValueItem objects, in ascending order of value number.
 996
 997     """
 998     return ValueIter(self._values_begin(), self._values_end())
 999 Document.values = _document_gen_values_iter
1000
1001
1002 ##########################################
1003 # Support for iteration of value streams #
1004 ##########################################
1005
1006 class ValueStreamItem(object):
1007     """An item returned from iteration of the values in a document.
1008
1009     The item supports access to the following attributes:
1010
1011      - `docid`: The docid for the item.
1012      - `value`: The contents of the value.
1013
1014     """
1015
1016     __slots__ = ('docid', 'value', )
1017
1018     def __init__(self, docid, value):
1019         self.docid = docid
1020         self.value = value
1021
1022 class ValueStreamIter(object):
1023     """An iterator over all the values stored in a document.
1024
1025     The iterator will return ValueStreamItem objects, in ascending order of value number.
1026
1027     """
1028     def __init__(self, start, end):
1029         self.iter = start
1030         self.end = end
1031         self.moved = True
1032
1033     def __iter__(self):
1034         return self
1035
1036     def __next__(self):
1037         if not self.moved:
1038             self.iter.__next__()
1039             self.moved = True
1040
1041         if self.iter==self.end:
1042             raise StopIteration
1043         else:
1044             self.moved = False
1045             return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1046
1047     def skip_to(self, docid):
1048         """Skip the iterator forward.
1049
1050         The iterator is advanced to the first document with a document ID
1051         which is greater than or equal to the supplied document ID.
1052
1053         If there are no such items, this will raise StopIteration.
1054
1055         This returns the item which the iterator is moved to.  The subsequent
1056         item will be returned the next time that next() is called (unless
1057         skip_to() is called again first).
1058
1059         """
1060         if self.iter != self.end:
1061             self.iter.skip_to(docid)
1062         if self.iter == self.end:
1063             self.moved = True
1064             raise StopIteration
1065         self.moved = False
1066         return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1067
1068 # Modify Database to add a "valuestream()" method, and remove the
1069 # valuestream_begin() and valuestream_end() methods.
1070 def wrapper():
1071     vs_begin = Database.valuestream_begin
1072     del Database.valuestream_begin
1073     vs_end = Database.valuestream_end
1074     del Database.valuestream_end
1075     def valuestream(self, slot):
1076         """Get an iterator over all the values stored in a slot in the database.
1077
1078         The iterator will return ValueStreamItem objects, in ascending order of
1079         document id.
1080
1081         """
1082         return ValueStreamIter(vs_begin(self, slot), vs_end(self, slot))
1083     return valuestream
1084 Database.valuestream = wrapper()
1085 del wrapper
1086
1087 ##########################################
1088 # Support for iteration of LatLongCoords #
1089 ##########################################
1090
1091 class LatLongCoordsIter(object):
1092     """An iterator over all the coordinates in a LatLongCoords object.
1093
1094     The iterator returns LatLongCoord objects.
1095
1096     """
1097     def __init__(self, start, end):
1098         self.iter = start
1099         self.end = end
1100
1101     def __iter__(self):
1102         return self
1103
1104     def __eq__(self, other):
1105         return self.equals(other)
1106
1107     def __ne__(self, other):
1108         return not self.equals(other)
1109
1110     def __next__(self):
1111         if self.iter.equals(self.end):
1112             raise StopIteration
1113         else:
1114             r = self.iter.get_coord()
1115             self.iter.__next__()
1116             return r
1117
1118 # Modify LatLongCoords to make it iterable.
1119 def _latlongcoords_iter(self):
1120     """Get an iterator over all the coordinates in a LatLongCoords.
1121
1122     The iterator will return xapian.LatLongCoord objects.
1123
1124     """
1125     return LatLongCoordsIter(self.begin(), self.end())
1126 LatLongCoords.__iter__ = _latlongcoords_iter
1127 del _latlongcoords_iter
1128 del LatLongCoordsIterator
1129
1130 # Fix up Enquire so that it keeps a python reference to the deciders supplied
1131 # to it so that they won't be deleted before the Enquire object.  This hack can
1132 # probably be removed once xapian bug #186 is fixed.
1133 _enquire_add_matchspy_orig = Enquire.add_matchspy
1134 def _enquire_match_spy_add(self, decider):
1135     if not hasattr(self, '_deciders'):
1136         self._deciders = []
1137     self._deciders.append(decider)
1138     _enquire_add_matchspy_orig(self, decider)
1139 _enquire_match_spy_add.__doc__ = Enquire.add_matchspy.__doc__
1140 Enquire.add_matchspy = _enquire_match_spy_add
1141
1142 _enquire_clear_matchspies_orig = Enquire.clear_matchspies
1143 def _enquire_match_spies_clear(self):
1144     _enquire_clear_matchspies_orig(self)
1145     if hasattr(self, '_deciders'):
1146         del self._deciders
1147 _enquire_match_spies_clear.__doc__ = Enquire.clear_matchspies.__doc__
1148 Enquire.clear_matchspies = _enquire_match_spies_clear
1149
1150
1151 # Fix up Stem.__init__() so that it calls __disown__() on the passed
1152 # StemImplementation object so that Python won't delete it from under us.
1153 _stem_init_orig = Stem.__init__
1154 def _stem_init(self, *args):
1155     _stem_init_orig(self, *args)
1156     if len(args) > 0 and isinstance(args[0], StemImplementation):
1157         args[0].__disown__()
1158 _stem_init.__doc__ = Stem.__init__.__doc__
1159 Stem.__init__ = _stem_init
1160
1161
1162 # Add wrappers for Query::MatchAll and Query::MatchNothing
1163 Query.MatchAll = Query("")
1164 Query.MatchNothing = Query()
1165
1166
1167 # Set the list of names which should be public.
1168 # Note that this needs to happen at the end of xapian.py.
1169 __all__ = []
1170 for item in dir():
1171     if item.startswith('_') or item.endswith('_swigregister') or item.endswith('Iterator'):
1172         continue
1173     __all__.append(item)
1174 __all__ = tuple(__all__)
1175 %}
1176
1177 /* vim:syntax=python:set expandtab: */