xapian-bindings/python/extra.i

   1 %{
   2 /* python/extra.i: Xapian scripting python interface additional python code.
   3  *
   4  * Copyright (C) 2003,2004,2005 James Aylett
   5  * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Olly Betts
   6  * Copyright (C) 2007 Lemur Consulting Ltd
   7  * Copyright (C) 2010 Richard Boulton
   8  *
   9  * This program is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU General Public License as
  11  * published by the Free Software Foundation; either version 2 of the
  12  * License, or (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  22  * USA
  23  */
  24 %}
  25
  26 %pythoncode %{
  27
  28 # Set the documentation format - this is used by tools like "epydoc" to decide
  29 # how to format the documentation strings.
  30 __docformat__ = "restructuredtext en"
  31
  32 ##################################
  33 # Support for iteration of MSets #
  34 ##################################
  35
  36 class MSetItem(object):
  37     """An item returned from iteration of the MSet.
  38
  39     The item supports access to the following attributes and properties:
  40
  41      - `docid`: The Xapian document ID corresponding to this MSet item.
  42      - `weight`: The weight corresponding to this MSet item.
  43      - `rank`: The rank of this MSet item.  The rank is the position in the
  44        total set of matching documents of this item.  The highest document is
  45        given a rank of 0.  If the MSet did not start at the highest matching
  46        document, because a non-zero 'start' parameter was supplied to
  47        get_mset(), the first document in the MSet will have a rank greater than
  48        0 (in fact, it will be equal to the value of 'start' supplied to
  49        get_mset()).
  50      - `percent`: The percentage score assigned to this MSet item.
  51      - `document`: The document for this MSet item.  This can be used to access
  52        the document data, or any other information stored in the document (such
  53        as term lists).  It is lazily evaluated.
  54      - `collapse_key`: The value of the key which was used for collapsing.
  55      - `collapse_count`: An estimate of the number of documents that have been
  56        collapsed into this one.
  57
  58     The collapse count estimate will always be less than or equal to the actual
  59     number of other documents satisfying the match criteria with the same
  60     collapse key as this document.  If may be 0 even though there are other
  61     documents with the same collapse key which satisfying the match criteria.
  62     However if this method returns non-zero, there definitely are other such
  63     documents.  So this method may be used to inform the user that there are
  64     "at least N other matches in this group", or to control whether to offer a
  65     "show other documents in this group" feature (but note that it may not
  66     offer it in every case where it would show other documents).
  67
  68     """
  69
  70     __slots__ = ('_mset', '_firstitem', 'docid', 'weight', 'rank',
  71                  'percent', 'collapse_key', 'collapse_count', '_document', )
  72
  73     def __init__(self, iter, mset):
  74         self._mset = mset
  75         self._firstitem = self._mset.get_firstitem()
  76         self.docid = iter.get_docid()
  77         self.weight = iter.get_weight()
  78         self.rank = iter.get_rank()
  79         self.percent = iter.get_percent()
  80         self.collapse_key = iter.get_collapse_key()
  81         self.collapse_count = iter.get_collapse_count()
  82         self._document = None
  83
  84     def _get_document(self):
  85         if self._document is None:
  86             self._document = self._mset._get_hit_internal(self.rank - self._firstitem).get_document()
  87         return self._document
  88
  89     document = property(_get_document, doc="The document object corresponding to this MSet item.")
  90
  91 class MSetIter(object):
  92     """An iterator over the items in an MSet.
  93
  94     The iterator will return MSetItem objects, which will be evaluated lazily
  95     where appropriate.
  96
  97     """
  98     __slots__ = ('_iter', '_end', '_mset')
  99     def __init__(self, mset):
 100         self._iter = mset._begin()
 101         self._end = mset._end()
 102         self._mset = mset
 103
 104     def __iter__(self):
 105         return self
 106
 107     def next(self):
 108         if self._iter == self._end:
 109             raise StopIteration
 110         else:
 111             r = MSetItem(self._iter, self._mset)
 112             self._iter.next()
 113             return r
 114
 115 # Modify the MSet to allow access to the python iterators, and have other
 116 # convenience methods.
 117
 118 def _mset_gen_iter(self):
 119     """Return an iterator over the MSet.
 120
 121     The iterator will return MSetItem objects, which will be evaluated lazily
 122     where appropriate.
 123
 124     """
 125     return MSetIter(self)
 126 MSet.__iter__ = _mset_gen_iter
 127
 128 MSet.__len__ = lambda self: MSet.size(self)
 129
 130 def _mset_getitem(self, index):
 131     """Get an item from the MSet.
 132
 133     The supplied index is relative to the start of the MSet, not the absolute
 134     rank of the item.
 135
 136     Returns an MSetItem.
 137
 138     """
 139     if index < 0:
 140         index += len(self)
 141     if index < 0 or index >= len(self):
 142         raise IndexError("Mset index out of range")
 143     return MSetItem(self._get_hit_internal(index), self)
 144 MSet.__getitem__ = _mset_getitem
 145 MSet.get_hit = _mset_getitem
 146
 147
 148 ##################################
 149 # Support for iteration of ESets #
 150 ##################################
 151
 152 class ESetItem(object):
 153     """An item returned from iteration of the ESet.
 154
 155     The item supports access to the following attributes:
 156
 157      - `term`: The term corresponding to this ESet item.
 158      - `weight`: The weight corresponding to this ESet item.
 159
 160     """
 161     __slots__ = ('term', 'weight')
 162
 163     def __init__(self, iter):
 164         self.term = iter.get_term()
 165         self.weight = iter.get_weight()
 166
 167 class ESetIter(object):
 168     """An iterator over the items in an ESet.
 169
 170     The iterator will return ESetItem objects.
 171
 172     """
 173     __slots__ = ('_iter', '_end')
 174     def __init__(self, eset):
 175         self._iter = eset._begin()
 176         self._end = eset._end()
 177
 178     def __iter__(self):
 179         return self
 180
 181     def next(self):
 182         if self._iter == self._end:
 183             raise StopIteration
 184         else:
 185             r = ESetItem(self._iter)
 186             self._iter.next()
 187             return r
 188
 189 # Modify the ESet to allow access to the python iterators, and have other
 190 # convenience methods.
 191
 192 def _eset_gen_iter(self):
 193     """Return an iterator over the ESet.
 194
 195     The iterator will return ESetItem objects.
 196
 197     """
 198     return ESetIter(self)
 199 ESet.__iter__ = _eset_gen_iter
 200
 201 ESet.__len__ = lambda self: ESet.size(self)
 202
 203
 204 #######################################
 205 # Support for iteration of term lists #
 206 #######################################
 207
 208 class TermListItem(object):
 209     """An item returned from iteration of a term list.
 210
 211     The item supports access to the following attributes and properties:
 212
 213      - `term`: The term corresponding to this TermListItem.
 214      - `wdf`: The within document frequency of this term.
 215      - `termfreq`: The number of documents in the collection which are indexed
 216        by the term
 217      - `positer`: An iterator over the positions which the term appears at in
 218        the document.  This is only available until the iterator which returned
 219        this item next moves.
 220
 221     """
 222     __slots__ = ('_iter', 'term', '_wdf', '_termfreq')
 223
 224     def __init__(self, iter, term):
 225         self._iter = iter
 226         self.term = term
 227         self._wdf = None
 228         self._termfreq = None
 229
 230         if iter._has_wdf == TermIter.EAGER:
 231             self._wdf = iter._iter.get_wdf()
 232         if iter._has_termfreq == TermIter.EAGER:
 233             self._termfreq = iter._iter.get_termfreq()
 234
 235         # Support for sequence API
 236         sequence = ['term', 'wdf', 'termfreq', 'positer']
 237         if iter._has_wdf == TermIter.INVALID:
 238             sequence[1] = 0
 239         if iter._has_termfreq == TermIter.INVALID:
 240             sequence[2] = 0
 241         if iter._has_positions == TermIter.INVALID:
 242             sequence[3] = PositionIter()
 243
 244     def _get_wdf(self):
 245         """Get the within-document-frequency of the current term.
 246
 247         This will raise a InvalidOperationError exception if the iterator this
 248         item came from doesn't support within-document-frequencies.
 249
 250         """
 251         if self._wdf is None:
 252             if self._iter._has_wdf == TermIter.INVALID:
 253                 raise InvalidOperationError("Iterator does not support wdfs")
 254             if self.term is not self._iter._lastterm:
 255                 raise InvalidOperationError("Iterator has moved, and does not support random access")
 256             self._wdf = self._iter._iter.get_wdf()
 257         return self._wdf
 258     wdf = property(_get_wdf, doc=
 259     """The within-document-frequency of the current term (if meaningful).
 260
 261     This will raise a InvalidOperationError exception if the iterator
 262     this item came from doesn't support within-document-frequencies.
 263
 264     """)
 265
 266     def _get_termfreq(self):
 267         """Get the term frequency.
 268
 269         This is the number of documents in the collection which are indexed by
 270         the term.
 271
 272         This will raise a InvalidOperationError exception if the iterator this
 273         item came from doesn't support term frequencies.
 274
 275         """
 276         if self._termfreq is None:
 277             if self._iter._has_termfreq == TermIter.INVALID:
 278                 raise InvalidOperationError("Iterator does not support term frequencies")
 279             if self.term is not self._iter._lastterm:
 280                 raise InvalidOperationError("Iterator has moved, and does not support random access")
 281             self._termfreq = self._iter._iter.get_termfreq()
 282         return self._termfreq
 283     termfreq = property(_get_termfreq, doc=
 284     """The term frequency of the current term (if meaningful).
 285
 286     This is the number of documents in the collection which are indexed by the
 287     term.
 288
 289     This will raise a InvalidOperationError exception if the iterator
 290     this item came from doesn't support term frequencies.
 291
 292     """)
 293
 294     def _get_positer(self):
 295         """Get a position list iterator.
 296
 297         The iterator will return integers representing the positions that the
 298         term occurs at.
 299
 300         This will raise a InvalidOperationError exception if the iterator this
 301         item came from doesn't support position lists, or if the iterator has
 302         moved on since the item was returned from it.
 303
 304         """
 305         if self._iter._has_positions == TermIter.INVALID:
 306             raise InvalidOperationError("Iterator does not support position lists")
 307         # Access to position lists is always lazy, so we don't need to check
 308         # _has_positions.
 309         if self.term is not self._iter._lastterm:
 310             raise InvalidOperationError("Iterator has moved, and does not support random access")
 311         return PositionIter(self._iter._iter._positionlist_begin(),
 312                             self._iter._iter._positionlist_end())
 313     positer = property(_get_positer, doc=
 314     """A position iterator for the current term (if meaningful).
 315
 316     The iterator will return integers representing the positions that the term
 317     occurs at.
 318
 319     This will raise a InvalidOperationError exception if the iterator this item
 320     came from doesn't support position lists, or if the iterator has moved on
 321     since the item was returned from it.
 322
 323     """)
 324
 325
 326 class TermIter(object):
 327     """An iterator over a term list.
 328
 329     The iterator will return TermListItem objects, which will be evaluated
 330     lazily where appropriate.
 331
 332     """
 333     __slots__ = ('_iter', '_end', '_has_termfreq', '_has_wdf',
 334                  '_has_positions', '_return_strings', '_lastterm', '_moved')
 335
 336     INVALID = 0
 337     LAZY = 1
 338     EAGER = 2
 339
 340     def __init__(self, start, end, has_termfreq=INVALID,
 341                  has_wdf=INVALID, has_positions=INVALID,
 342                  return_strings=False):
 343         self._iter = start
 344         self._end = end
 345         self._has_termfreq = has_termfreq
 346         self._has_wdf = has_wdf
 347         self._has_positions = has_positions
 348         assert(has_positions != TermIter.EAGER) # Can't do eager access to position lists
 349         self._return_strings = return_strings
 350         self._lastterm = None # Used to test if the iterator has moved
 351
 352         # _moved is True if we've moved onto the next item.  This is needed so
 353         # that the iterator doesn't have to move on until just before next() is
 354         # called: since the iterator starts by pointing at a valid item, we
 355         # can't just call self._iter.next() unconditionally at the start of our
 356         # next() method.
 357         self._moved = True
 358
 359     def __iter__(self):
 360         return self
 361
 362     def next(self):
 363         if not self._moved:
 364             self._iter.next()
 365             self._moved = True
 366
 367         if self._iter == self._end:
 368             self._lastterm = None
 369             raise StopIteration
 370         else:
 371             self._lastterm = self._iter.get_term()
 372             self._moved = False
 373             if self._return_strings:
 374                 return self._lastterm
 375             return TermListItem(self, self._lastterm)
 376
 377     def skip_to(self, term):
 378         """Skip the iterator forward.
 379
 380         The iterator is advanced to the first term at or after the current
 381         position which is greater than or equal to the supplied term.
 382
 383         If there are no such items, this will raise StopIteration.
 384
 385         This returns the item which the iterator is moved to.  The subsequent
 386         item will be returned the next time that next() is called (unless
 387         skip_to() is called again first).
 388
 389         """
 390         if self._iter != self._end:
 391             self._iter.skip_to(term)
 392
 393         if self._iter == self._end:
 394             self._lastterm = None
 395             self._moved = True
 396             raise StopIteration
 397
 398         # Update self._lastterm if the iterator has moved.
 399         # TermListItems compare a saved value of lastterm with self._lastterm
 400         # with the object identity comparator, so it is important to ensure
 401         # that it does not get modified if the new term compares equal.
 402         newterm = self._iter.get_term()
 403         if newterm != self._lastterm:
 404             self._lastterm = newterm
 405
 406         self._moved = False
 407         if self._return_strings:
 408             return self._lastterm
 409         return TermListItem(self, self._lastterm)
 410
 411 # Modify Enquire to add a "matching_terms()" method.
 412 def _enquire_gen_iter(self, which):
 413     """Get an iterator over the terms which match a given match set item.
 414
 415     The match set item to consider is specified by the `which` parameter, which
 416     may be a document ID, or an MSetItem object.
 417
 418     The iterator will return string objects.
 419
 420     """
 421     if isinstance(which, MSetItem):
 422         which = which.docid
 423     return TermIter(self._get_matching_terms_begin(which),
 424                     self._get_matching_terms_end(which),
 425                     return_strings=True)
 426 Enquire.matching_terms = _enquire_gen_iter
 427
 428 # Modify Query to add an "__iter__()" method.
 429 def _query_gen_iter(self):
 430     """Get an iterator over the terms in a query.
 431
 432     The iterator will return string objects.
 433
 434     """
 435     return TermIter(self._get_terms_begin(),
 436                     self._get_terms_end(),
 437                     return_strings=True)
 438 Query.__iter__ = _query_gen_iter
 439
 440 # Modify Database to add an "__iter__()" method and an "allterms()" method.
 441 def _database_gen_allterms_iter(self, prefix=None):
 442     """Get an iterator over all the terms in the database.
 443
 444     The iterator will return TermListItem objects, but these will not support
 445     access to wdf, or position information.
 446
 447     Access to term frequency information is only available until the iterator
 448     has moved on.
 449
 450     If prefix is supplied, only terms which start with that prefix will be
 451     returned.
 452
 453     """
 454     if prefix is None:
 455         return TermIter(self._allterms_begin(), self._allterms_end(),
 456                         has_termfreq=TermIter.LAZY)
 457     else:
 458         return TermIter(self._allterms_begin(prefix), self._allterms_end(prefix),
 459                         has_termfreq=TermIter.LAZY)
 460 Database.__iter__ = _database_gen_allterms_iter
 461 Database.allterms = _database_gen_allterms_iter
 462
 463 # Modify Database to add a "termlist()" method.
 464 def _database_gen_termlist_iter(self, docid):
 465     """Get an iterator over all the terms which index a given document ID.
 466
 467     The iterator will return TermListItem objects.
 468
 469     Access to term frequency and position information is only available until
 470     the iterator has moved on.
 471
 472     """
 473     # Note: has_termfreq is set to LAZY because most databases don't store term
 474     # frequencies in the termlist (because this would require updating many termlist
 475     # entries for every document update), so access to the term frequency requires a
 476     # separate lookup.
 477     return TermIter(self._termlist_begin(docid), self._termlist_end(docid),
 478                     has_termfreq=TermIter.LAZY,
 479                     has_wdf=TermIter.EAGER,
 480                     has_positions=TermIter.LAZY)
 481 Database.termlist = _database_gen_termlist_iter
 482
 483 # Modify Database to add a "spellings()" method.
 484 def _database_gen_spellings_iter(self):
 485     """Get an iterator which returns all the spelling correction targets
 486
 487     The iterator will return TermListItem objects.  Only the term frequency is
 488     available; wdf and positions are not meaningful.
 489
 490     """
 491     return TermIter(self._spellings_begin(), self._spellings_end(),
 492                     has_termfreq=TermIter.EAGER,
 493                     has_wdf=TermIter.INVALID,
 494                     has_positions=TermIter.INVALID)
 495 Database.spellings = _database_gen_spellings_iter
 496
 497 # Modify Database to add a "synonyms()" method.
 498 def _database_gen_synonyms_iter(self, term):
 499     """Get an iterator which returns all the synonyms for a given term.
 500
 501     The term to return synonyms for is specified by the `term` parameter.
 502
 503     The iterator will return string objects.
 504
 505     """
 506     return TermIter(self._synonyms_begin(term),
 507                     self._synonyms_end(term),
 508                     return_strings=True)
 509 Database.synonyms = _database_gen_synonyms_iter
 510
 511 # Modify Database to add a "synonym_keys()" method.
 512 def _database_gen_synonym_keys_iter(self, prefix=""):
 513     """Get an iterator which returns all the terms which have synonyms.
 514
 515     The iterator will return string objects.
 516
 517     If `prefix` is non-empty, only terms with this prefix are returned.
 518
 519     """
 520     return TermIter(self._synonym_keys_begin(prefix),
 521                     self._synonym_keys_end(prefix),
 522                     return_strings=True)
 523 Database.synonym_keys = _database_gen_synonym_keys_iter
 524
 525 # Modify Database to add a "metadata_keys()" method, instead of direct access
 526 # to metadata_keys_begin and metadata_keys_end.
 527 def _database_gen_metadata_keys_iter(self, prefix=""):
 528     """Get an iterator which returns all the metadata keys.
 529
 530     The iterator will return string objects.
 531
 532     If `prefix` is non-empty, only metadata keys with this prefix are returned.
 533
 534     """
 535     return TermIter(self._metadata_keys_begin(prefix),
 536                     self._metadata_keys_end(prefix),
 537                     return_strings=True)
 538 Database.metadata_keys = _database_gen_metadata_keys_iter
 539
 540 # Modify Document to add an "__iter__()" method and a "termlist()" method.
 541 def _document_gen_termlist_iter(self):
 542     """Get an iterator over all the terms in a document.
 543
 544     The iterator will return TermListItem objects.
 545
 546     Access to term frequency and position information is only available until
 547     the iterator has moved on.
 548
 549     Note that term frequency information is only meaningful for a document
 550     retrieved from a database.  If term frequency information is requested for
 551     a document which was freshly created, an InvalidOperationError will be
 552     raised.
 553
 554     """
 555     # Note: document termlist iterators may be implemented entirely in-memory
 556     # (in which case access to all items could be allowed eagerly), but may
 557     # also be implemented by returning a database termlist (for documents which
 558     # are stored in a database, rather than freshly created).  We choose the
 559     # most conservative settings, to avoid doing eager access when lazy access
 560     # would be more appropriate.
 561     return TermIter(self._termlist_begin(), self._termlist_end(),
 562                     has_termfreq=TermIter.LAZY,
 563                     has_wdf=TermIter.EAGER,
 564                     has_positions=TermIter.LAZY)
 565 Document.__iter__ = _document_gen_termlist_iter
 566 Document.termlist = _document_gen_termlist_iter
 567
 568 # Modify QueryParser to add a "stoplist()" method.
 569 def _queryparser_gen_stoplist_iter(self):
 570     """Get an iterator over all the stopped terms from the previous query.
 571
 572     This returns an iterator over all the terms which were omitted from the
 573     previously parsed query due to being considered to be stopwords.  Each
 574     instance of a word omitted from the query is represented in the returned
 575     list, in the order in which the
 576
 577     The iterator will return string objects.
 578
 579     """
 580     return TermIter(self._stoplist_begin(), self._stoplist_end(),
 581                     return_strings=True)
 582 QueryParser.stoplist = _queryparser_gen_stoplist_iter
 583
 584 # Modify QueryParser to add an "unstemlist()" method.
 585 def _queryparser_gen_unstemlist_iter(self, tname):
 586     """Get an iterator over all the unstemmed forms of a stemmed term.
 587
 588     This returns an iterator which returns all the unstemmed words which were
 589     stemmed to the stemmed form specified by `tname` when parsing the previous
 590     query.  Each instance of a word which stems to `tname` is returned by the
 591     iterator in the order in which the words appeared in the query - an
 592     individual unstemmed word may thus occur multiple times.
 593
 594     The iterator will return string objects.
 595
 596     """
 597     return TermIter(self._unstem_begin(tname), self._unstem_end(tname),
 598                     return_strings=True)
 599 QueryParser.unstemlist = _queryparser_gen_unstemlist_iter
 600
 601 # Modify ValueCountMatchSpy to add an "values()" method.
 602 def wrapper():
 603     begin = ValueCountMatchSpy.values_begin
 604     del ValueCountMatchSpy.values_begin
 605     end = ValueCountMatchSpy.values_end
 606     del ValueCountMatchSpy.values_end
 607     def values(self):
 608         """Get an iterator over all the values in the slot.
 609
 610         Values will be returned in ascending alphabetical order.
 611
 612         The iterator will return TermListItem objects: the value can be
 613         accessed as the `term` property, and the frequency can be accessed as
 614         the `termfreq` property.
 615
 616         """
 617         return TermIter(begin(self), end(self), has_termfreq=TermIter.EAGER)
 618     return values
 619 ValueCountMatchSpy.values = wrapper()
 620 del wrapper
 621
 622 # Modify ValueCountMatchSpy to add an "top_values()" method.
 623 def wrapper():
 624     begin = ValueCountMatchSpy.top_values_begin
 625     del ValueCountMatchSpy.top_values_begin
 626     end = ValueCountMatchSpy.top_values_end
 627     del ValueCountMatchSpy.top_values_end
 628     def top_values(self, maxvalues):
 629         """Get an iterator over the most frequent values for the slot.
 630
 631         Values will be returned in descending order of frequency.  Values with
 632         the same frequency will be returned in ascending alphabetical order.
 633
 634         The iterator will return TermListItem objects: the value can be
 635         accessed as the `term` property, and the frequency can be accessed as
 636         the `termfreq` property.
 637
 638         """
 639         return TermIter(begin(self, maxvalues), end(self, maxvalues),
 640                         has_termfreq=TermIter.EAGER)
 641     return top_values
 642 ValueCountMatchSpy.top_values = wrapper()
 643 del wrapper
 644
 645 # When we make a query, keep a note of postingsources involved, so they won't
 646 # be deleted. This hack can probably be removed once xapian bug #186 is fixed.
 647 __query_init_orig = Query.__init__
 648 def _query_init(self, *args):
 649     """Make a new query object.
 650
 651     Many possible arguments are possible - see the documentation for details.
 652
 653     """
 654     ps = []
 655     if len(args) == 1 and isinstance(args[0], PostingSource):
 656         ps.append(args[0])
 657     else:
 658         for arg in args:
 659             if isinstance(arg, Query):
 660                 ps.extend(getattr(arg, '_ps', []))
 661             elif hasattr(arg, '__iter__'):
 662                 for listarg in arg:
 663                     if isinstance(listarg, Query):
 664                         ps.extend(getattr(listarg, '_ps', []))
 665     __query_init_orig(self, *args)
 666     self._ps = ps
 667 Query.__init__ = _query_init
 668 del _query_init
 669
 670 # When setting a query on enquire, keep a note of postingsources involved, so
 671 # they won't be deleted. This hack can probably be removed once xapian bug #186
 672 # is fixed.
 673 __enquire_set_query_orig = Enquire.set_query
 674 def _enquire_set_query(self, query, qlen=0):
 675     self._ps = getattr(query, '_ps', [])
 676     return __enquire_set_query_orig(self, query, qlen)
 677 _enquire_set_query.__doc__ = __enquire_set_query_orig.__doc__
 678 Enquire.set_query = _enquire_set_query
 679 del _enquire_set_query
 680
 681 # When getting  a query from enquire, keep a note of postingsources involved,
 682 # so they won't be deleted. This hack can probably be removed once xapian bug
 683 # #186 is fixed.
 684 __enquire_get_query_orig = Enquire.get_query
 685 def _enquire_get_query(self):
 686     query = __enquire_get_query_orig(self)
 687     query._ps = getattr(self, '_ps', [])
 688     return query
 689 _enquire_get_query.__doc__ = __enquire_get_query_orig.__doc__
 690 Enquire.get_query = _enquire_get_query
 691 del _enquire_get_query
 692
 693 # When we set a ValueRangeProcessor into the QueryParser, keep a python
 694 # reference so it won't be deleted. This hack can probably be removed once
 695 # xapian bug #186 is fixed.
 696 __queryparser_add_valuerangeprocessor_orig = QueryParser.add_valuerangeprocessor
 697 def _queryparser_add_valuerangeprocessor(self, vrproc):
 698     if not hasattr(self, '_vrps'):
 699         self._vrps = []
 700     self._vrps.append(vrproc)
 701     return __queryparser_add_valuerangeprocessor_orig(self, vrproc)
 702 _queryparser_add_valuerangeprocessor.__doc__ = __queryparser_add_valuerangeprocessor_orig.__doc__
 703 QueryParser.add_valuerangeprocessor = _queryparser_add_valuerangeprocessor
 704 del _queryparser_add_valuerangeprocessor
 705
 706 # When we set a RangeProcessor into the QueryParser, keep a python
 707 # reference so it won't be deleted. This hack can probably be removed once
 708 # xapian bug #186 is fixed.
 709 __queryparser_add_rangeprocessor_orig = QueryParser.add_rangeprocessor
 710 def _queryparser_add_rangeprocessor(self, rproc):
 711     if not hasattr(self, '_rps'):
 712         self._rps = []
 713     self._rps.append(rproc)
 714     return __queryparser_add_rangeprocessor_orig(self, rproc)
 715 _queryparser_add_rangeprocessor.__doc__ = __queryparser_add_rangeprocessor_orig.__doc__
 716 QueryParser.add_rangeprocessor = _queryparser_add_rangeprocessor
 717 del _queryparser_add_rangeprocessor
 718
 719 # When we set a FieldProcessor into the QueryParser, keep a python
 720 # reference so it won't be deleted. This hack can probably be removed once
 721 # xapian bug #186 is fixed.
 722 __queryparser_add_prefix_orig = QueryParser.add_prefix
 723 def _queryparser_add_prefix(self, s, proc):
 724     if not isinstance(proc, (str, bytes)):
 725         if not hasattr(self, '_fps'):
 726             self._fps = []
 727         self._fps.append(proc)
 728     return __queryparser_add_prefix_orig(self, s, proc)
 729 _queryparser_add_prefix.__doc__ = __queryparser_add_prefix_orig.__doc__
 730 QueryParser.add_prefix = _queryparser_add_prefix
 731 del _queryparser_add_prefix
 732 __queryparser_add_boolean_prefix_orig = QueryParser.add_boolean_prefix
 733 def _queryparser_add_boolean_prefix(self, s, proc, exclusive = True):
 734     if not isinstance(proc, (str, bytes)):
 735         if not hasattr(self, '_fps'):
 736             self._fps = []
 737         self._fps.append(proc)
 738     return __queryparser_add_boolean_prefix_orig(self, s, proc, exclusive)
 739 _queryparser_add_boolean_prefix.__doc__ = __queryparser_add_boolean_prefix_orig.__doc__
 740 QueryParser.add_boolean_prefix = _queryparser_add_boolean_prefix
 741 del _queryparser_add_boolean_prefix
 742
 743 # When we set a Stopper into the QueryParser, keep a python reference so it
 744 # won't be deleted. This hack can probably be removed once xapian bug #186 is
 745 # fixed.
 746 __queryparser_set_stopper_orig = QueryParser.set_stopper
 747 def _queryparser_set_stopper(self, stopper):
 748     self._stopper = stopper
 749     return __queryparser_set_stopper_orig(self, stopper)
 750 _queryparser_set_stopper.__doc__ = __queryparser_set_stopper_orig.__doc__
 751 QueryParser.set_stopper = _queryparser_set_stopper
 752 del _queryparser_set_stopper
 753
 754 # When we set a Stopper into the TermGenerator, keep a python reference so it
 755 # won't be deleted. This hack can probably be removed once xapian bug #186 is
 756 # fixed.
 757 __termgenerator_set_stopper_orig = TermGenerator.set_stopper
 758 def _termgenerator_set_stopper(self, stopper):
 759     self._stopper = stopper
 760     return __termgenerator_set_stopper_orig(self, stopper)
 761 _termgenerator_set_stopper.__doc__ = __termgenerator_set_stopper_orig.__doc__
 762 TermGenerator.set_stopper = _termgenerator_set_stopper
 763 del _termgenerator_set_stopper
 764
 765 # When we set a Sorter on enquire, keep a python reference so it won't be
 766 # deleted.  This hack can probably be removed once xapian bug #186 is fixed.
 767 __enquire_set_sort_by_key_orig = Enquire.set_sort_by_key
 768 def _enquire_set_sort_by_key(self, sorter, reverse):
 769     self._sorter = sorter
 770     return __enquire_set_sort_by_key_orig(self, sorter, reverse)
 771 _enquire_set_sort_by_key.__doc__ = __enquire_set_sort_by_key_orig.__doc__
 772 Enquire.set_sort_by_key = _enquire_set_sort_by_key
 773 del _enquire_set_sort_by_key
 774
 775 __enquire_set_sort_by_key_then_relevance_orig = Enquire.set_sort_by_key_then_relevance
 776 def _enquire_set_sort_by_key_then_relevance(self, sorter, reverse):
 777     self._sorter = sorter
 778     return __enquire_set_sort_by_key_then_relevance_orig(self, sorter, reverse)
 779 _enquire_set_sort_by_key_then_relevance.__doc__ = __enquire_set_sort_by_key_then_relevance_orig.__doc__
 780 Enquire.set_sort_by_key_then_relevance = _enquire_set_sort_by_key_then_relevance
 781 del _enquire_set_sort_by_key_then_relevance
 782
 783 __enquire_set_sort_by_relevance_then_key_orig = Enquire.set_sort_by_relevance_then_key
 784 def _enquire_set_sort_by_relevance_then_key(self, sorter, reverse):
 785     self._sorter = sorter
 786     return __enquire_set_sort_by_relevance_then_key_orig(self, sorter, reverse)
 787 _enquire_set_sort_by_relevance_then_key.__doc__ = __enquire_set_sort_by_relevance_then_key_orig.__doc__
 788 Enquire.set_sort_by_relevance_then_key = _enquire_set_sort_by_relevance_then_key
 789 del _enquire_set_sort_by_relevance_then_key
 790
 791
 792 ##########################################
 793 # Support for iteration of posting lists #
 794 ##########################################
 795
 796 class PostingItem(object):
 797     """An item returned from iteration of a posting list.
 798
 799     The item supports access to the following attributes and properties:
 800
 801      - `docid`: The document ID corresponding to this PostingItem.
 802      - `doclength`: The length of the document corresponding to this
 803        PostingItem.
 804      - `wdf`: The within document frequency of the term which the posting list
 805        is for in the document corresponding to this PostingItem.
 806      - `positer`: An iterator over the positions which the term corresponing to
 807        this posting list occurs at in the document corresponding to this
 808        PostingItem.  This is only available until the iterator which returned
 809        this item next moves.
 810
 811     """
 812     __slots__ = ('_iter', 'docid', 'doclength', 'wdf',)
 813
 814     def __init__(self, iter):
 815         self._iter = iter
 816         self.docid = iter._iter.get_docid()
 817         self.doclength = iter._iter.get_doclength()
 818         self.wdf = iter._iter.get_wdf()
 819
 820         # Support for sequence API
 821         sequence = ['docid', 'doclength', 'wdf', 'positer']
 822         if not iter._has_positions:
 823             sequence[3] = PositionIter()
 824
 825     def _get_positer(self):
 826         """Get a position list iterator.
 827
 828         The iterator will return integers representing the positions that the
 829         term occurs at in the document corresponding to this PostingItem.
 830
 831         This will raise a InvalidOperationError exception if the iterator this
 832         item came from doesn't support position lists, or if the iterator has
 833         moved on since the item was returned from it.
 834
 835         """
 836         if not self._iter._has_positions:
 837             raise InvalidOperationError("Iterator does not support position lists")
 838         if self._iter._iter == self._iter._end or \
 839            self.docid != self._iter._iter.get_docid():
 840             raise InvalidOperationError("Iterator has moved, and does not support random access")
 841         return PositionIter(self._iter._iter._positionlist_begin(),
 842                             self._iter._iter._positionlist_end())
 843     positer = property(_get_positer, doc=
 844     """A position iterator for the current posting (if meaningful).
 845
 846     The iterator will return integers representing the positions that the term
 847     occurs at.
 848
 849     This will raise a InvalidOperationError exception if the iterator this item
 850     came from doesn't support position lists, or if the iterator has moved on
 851     since the item was returned from it.
 852
 853     """)
 854
 855
 856 class PostingIter(object):
 857     """An iterator over a posting list.
 858
 859     The iterator will return PostingItem objects, which will be evaluated
 860     lazily where appropriate.
 861
 862     """
 863     __slots__ = ('_iter', '_end', '_has_positions', '_moved')
 864
 865     def __init__(self, start, end, has_positions=False):
 866         self._iter = start
 867         self._end = end
 868         self._has_positions = has_positions
 869
 870         # _moved is True if we've moved onto the next item.  This is needed so
 871         # that the iterator doesn't have to move on until just before next() is
 872         # called: since the iterator starts by pointing at a valid item, we
 873         # can't just call self._iter.next() unconditionally at the start of our
 874         # next() method.
 875         self._moved = True
 876
 877     def __iter__(self):
 878         return self
 879
 880     def next(self):
 881         if not self._moved:
 882             self._iter.next()
 883             self._moved = True
 884
 885         if self._iter == self._end:
 886             raise StopIteration
 887         else:
 888             self._moved = False
 889             return PostingItem(self)
 890
 891     def skip_to(self, docid):
 892         """Skip the iterator forward.
 893
 894         The iterator is advanced to the first document with a document ID
 895         which is greater than or equal to the supplied document ID.
 896
 897         If there are no such items, this will raise StopIteration.
 898
 899         This returns the item which the iterator is moved to.  The subsequent
 900         item will be returned the next time that next() is called (unless
 901         skip_to() is called again first).
 902
 903         """
 904         if self._iter != self._end:
 905             self._iter.skip_to(docid)
 906         if self._iter == self._end:
 907             self._moved = True
 908             raise StopIteration
 909         self._moved = False
 910         return PostingItem(self)
 911
 912 def _database_gen_postlist_iter(self, tname):
 913     """Get an iterator over the postings which are indexed by a given term.
 914
 915     If `tname` is empty, an iterator over all the documents will be returned
 916     (this will contain one entry for each document, will always return a wdf of
 917     1, and will not allow access to a position iterator).
 918
 919     """
 920     if len(tname) != 0:
 921         return PostingIter(self._postlist_begin(tname), self._postlist_end(tname),
 922                            has_positions=True)
 923     else:
 924         return PostingIter(self._postlist_begin(tname), self._postlist_end(tname))
 925 Database.postlist = _database_gen_postlist_iter
 926
 927
 928 ###########################################
 929 # Support for iteration of position lists #
 930 ###########################################
 931
 932 class PositionIter(object):
 933     """An iterator over a position list.
 934
 935     The iterator will return integers, in ascending order.
 936
 937     """
 938     def __init__(self, start = 0, end = 0):
 939         self.iter = start
 940         self.end = end
 941
 942     def __iter__(self):
 943         return self
 944
 945     def next(self):
 946         if self.iter==self.end:
 947             raise StopIteration
 948         else:
 949             r = self.iter.get_termpos()
 950             self.iter.next()
 951             return r
 952
 953 # Modify Database to add a "positionlist()" method.
 954 def _database_gen_positionlist_iter(self, docid, tname):
 955     """Get an iterator over all the positions in a given document of a term.
 956
 957     The iterator will return integers, in ascending order.
 958
 959     """
 960     return PositionIter(self._positionlist_begin(docid, tname), self._positionlist_end(docid, tname))
 961 Database.positionlist = _database_gen_positionlist_iter
 962
 963 ########################################
 964 # Support for iteration of value lists #
 965 ########################################
 966
 967 class ValueItem(object):
 968     """An item returned from iteration of the values in a document.
 969
 970     The item supports access to the following attributes:
 971
 972      - `num`: The number of the value.
 973      - `value`: The contents of the value.
 974
 975     """
 976
 977     __slots__ = ('num', 'value', )
 978
 979     def __init__(self, num, value):
 980         self.num = num
 981         self.value = value
 982
 983 class ValueIter(object):
 984     """An iterator over all the values stored in a document.
 985
 986     The iterator will return ValueItem objects, in ascending order of value number.
 987
 988     """
 989     def __init__(self, start, end):
 990         self.iter = start
 991         self.end = end
 992
 993     def __iter__(self):
 994         return self
 995
 996     def next(self):
 997         if self.iter==self.end:
 998             raise StopIteration
 999         else:
1000             r = ValueItem(self.iter.get_valueno(), self.iter.get_value())
1001             self.iter.next()
1002             return r
1003
1004 # Modify Document to add a "values()" method.
1005 def _document_gen_values_iter(self):
1006     """Get an iterator over all the values stored in a document.
1007
1008     The iterator will return ValueItem objects, in ascending order of value number.
1009
1010     """
1011     return ValueIter(self._values_begin(), self._values_end())
1012 Document.values = _document_gen_values_iter
1013
1014
1015 ##########################################
1016 # Support for iteration of value streams #
1017 ##########################################
1018
1019 class ValueStreamItem(object):
1020     """An item returned from iteration of the values in a document.
1021
1022     The item supports access to the following attributes:
1023
1024      - `docid`: The docid for the item.
1025      - `value`: The contents of the value.
1026
1027     """
1028
1029     __slots__ = ('docid', 'value', )
1030
1031     def __init__(self, docid, value):
1032         self.docid = docid
1033         self.value = value
1034
1035 class ValueStreamIter(object):
1036     """An iterator over all the values stored in a document.
1037
1038     The iterator will return ValueStreamItem objects, in ascending order of value number.
1039
1040     """
1041     def __init__(self, start, end):
1042         self.iter = start
1043         self.end = end
1044         self.moved = True
1045
1046     def __iter__(self):
1047         return self
1048
1049     def next(self):
1050         if not self.moved:
1051             self.iter.next()
1052             self.moved = True
1053
1054         if self.iter==self.end:
1055             raise StopIteration
1056         else:
1057             self.moved = False
1058             return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1059
1060     def skip_to(self, docid):
1061         """Skip the iterator forward.
1062
1063         The iterator is advanced to the first document with a document ID
1064         which is greater than or equal to the supplied document ID.
1065
1066         If there are no such items, this will raise StopIteration.
1067
1068         This returns the item which the iterator is moved to.  The subsequent
1069         item will be returned the next time that next() is called (unless
1070         skip_to() is called again first).
1071
1072         """
1073         if self.iter != self.end:
1074             self.iter.skip_to(docid)
1075         if self.iter == self.end:
1076             self.moved = True
1077             raise StopIteration
1078         self.moved = False
1079         return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1080
1081 # Modify Database to add a "valuestream()" method, and remove the
1082 # valuestream_begin() and valuestream_end() methods.
1083 def wrapper():
1084     vs_begin = Database.valuestream_begin
1085     del Database.valuestream_begin
1086     vs_end = Database.valuestream_end
1087     del Database.valuestream_end
1088     def valuestream(self, slot):
1089         """Get an iterator over all the values stored in a slot in the database.
1090
1091         The iterator will return ValueStreamItem objects, in ascending order of
1092         document id.
1093
1094         """
1095         return ValueStreamIter(vs_begin(self, slot), vs_end(self, slot))
1096     return valuestream
1097 Database.valuestream = wrapper()
1098 del wrapper
1099
1100 ##########################################
1101 # Support for iteration of LatLongCoords #
1102 ##########################################
1103
1104 class LatLongCoordsIter(object):
1105     """An iterator over all the coordinates in a LatLongCoords object.
1106
1107     The iterator returns LatLongCoord objects.
1108
1109     """
1110     def __init__(self, start, end):
1111         self.iter = start
1112         self.end = end
1113
1114     def __iter__(self):
1115         return self
1116
1117     def __eq__(self, other):
1118         return self.equals(other)
1119
1120     def __ne__(self, other):
1121         return not self.equals(other)
1122
1123     def next(self):
1124         if self.iter.equals(self.end):
1125             raise StopIteration
1126         else:
1127             r = self.iter.get_coord()
1128             self.iter.next()
1129             return r
1130
1131 # Modify LatLongCoords to make it iterable.
1132 def _latlongcoords_iter(self):
1133     """Get an iterator over all the coordinates in a LatLongCoords.
1134
1135     The iterator will return xapian.LatLongCoord objects.
1136
1137     """
1138     return LatLongCoordsIter(self.begin(), self.end())
1139 LatLongCoords.__iter__ = _latlongcoords_iter
1140 del _latlongcoords_iter
1141 del LatLongCoordsIterator
1142
1143 # Fix up Enquire so that it keeps a python reference to the deciders supplied
1144 # to it so that they won't be deleted before the Enquire object.  This hack can
1145 # probably be removed once xapian bug #186 is fixed.
1146 _enquire_add_matchspy_orig = Enquire.add_matchspy
1147 def _enquire_match_spy_add(self, decider):
1148     if not hasattr(self, '_deciders'):
1149         self._deciders = []
1150     self._deciders.append(decider)
1151     _enquire_add_matchspy_orig(self, decider)
1152 _enquire_match_spy_add.__doc__ = Enquire.add_matchspy.__doc__
1153 Enquire.add_matchspy = _enquire_match_spy_add
1154
1155 _enquire_clear_matchspies_orig = Enquire.clear_matchspies
1156 def _enquire_match_spies_clear(self):
1157     _enquire_clear_matchspies_orig(self)
1158     if hasattr(self, '_deciders'):
1159         del self._deciders
1160 _enquire_match_spies_clear.__doc__ = Enquire.clear_matchspies.__doc__
1161 Enquire.clear_matchspies = _enquire_match_spies_clear
1162
1163
1164 # Fix up Stem.__init__() so that it calls __disown__() on the passed
1165 # StemImplementation object so that Python won't delete it from under us.
1166 _stem_init_orig = Stem.__init__
1167 def _stem_init(self, *args):
1168     _stem_init_orig(self, *args)
1169     if len(args) > 0 and isinstance(args[0], StemImplementation):
1170         args[0].__disown__()
1171 _stem_init.__doc__ = Stem.__init__.__doc__
1172 Stem.__init__ = _stem_init
1173
1174
1175 # Remove static methods which shouldn't be in the API.
1176 del Document_unserialise
1177 del Query_unserialise
1178 del Stem_get_available_languages
1179
1180 # Add wrappers for Query::MatchAll and Query::MatchNothing
1181 Query.MatchAll = Query("")
1182 Query.MatchNothing = Query()
1183
1184
1185 # Set the list of names which should be public.
1186 # Note that this needs to happen at the end of xapian.py.
1187 __all__ = []
1188 for item in dir():
1189     if item.startswith('_') or item.endswith('_swigregister') or item.endswith('Iterator'):
1190         continue
1191     __all__.append(item)
1192 __all__ = tuple(__all__)
1193 %}
1194
1195 /* vim:syntax=python:set expandtab: */