Fix testcase unsupportedcheck1 for --disable-backend-remote
[xapian.git] / xapian-bindings / python3 / extra.i
blob62433d908817f4779d39eac75cee1a3cb899892a
1 %{
2 /* python/extra.i: Xapian scripting python interface additional python code.
4 * Copyright (C) 2003,2004,2005 James Aylett
5 * Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2013 Olly Betts
6 * Copyright (C) 2007 Lemur Consulting Ltd
7 * Copyright (C) 2010 Richard Boulton
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
26 %pythoncode %{
28 # Set the documentation format - this is used by tools like "epydoc" to decide
29 # how to format the documentation strings.
30 __docformat__ = "restructuredtext en"
32 ##################################
33 # Support for iteration of MSets #
34 ##################################
36 class MSetItem(object):
37 """An item returned from iteration of the MSet.
39 The item supports access to the following attributes and properties:
41 - `docid`: The Xapian document ID corresponding to this MSet item.
42 - `weight`: The weight corresponding to this MSet item.
43 - `rank`: The rank of this MSet item. The rank is the position in the
44 total set of matching documents of this item. The highest document is
45 given a rank of 0. If the MSet did not start at the highest matching
46 document, because a non-zero 'start' parameter was supplied to
47 get_mset(), the first document in the MSet will have a rank greater than
48 0 (in fact, it will be equal to the value of 'start' supplied to
49 get_mset()).
50 - `percent`: The percentage score assigned to this MSet item.
51 - `document`: The document for this MSet item. This can be used to access
52 the document data, or any other information stored in the document (such
53 as term lists). It is lazily evaluated.
54 - `collapse_key`: The value of the key which was used for collapsing.
55 - `collapse_count`: An estimate of the number of documents that have been
56 collapsed into this one.
58 The collapse count estimate will always be less than or equal to the actual
59 number of other documents satisfying the match criteria with the same
60 collapse key as this document. If may be 0 even though there are other
61 documents with the same collapse key which satisfying the match criteria.
62 However if this method returns non-zero, there definitely are other such
63 documents. So this method may be used to inform the user that there are
64 "at least N other matches in this group", or to control whether to offer a
65 "show other documents in this group" feature (but note that it may not
66 offer it in every case where it would show other documents).
68 """
70 __slots__ = ('_mset', '_firstitem', 'docid', 'weight', 'rank',
71 'percent', 'collapse_key', 'collapse_count', '_document', )
73 def __init__(self, iter, mset):
74 self._mset = mset
75 self._firstitem = self._mset.get_firstitem()
76 self.docid = iter.get_docid()
77 self.weight = iter.get_weight()
78 self.rank = iter.get_rank()
79 self.percent = iter.get_percent()
80 self.collapse_key = iter.get_collapse_key()
81 self.collapse_count = iter.get_collapse_count()
82 self._document = None
84 def _get_document(self):
85 if self._document is None:
86 self._document = self._mset._get_hit_internal(self.rank - self._firstitem).get_document()
87 return self._document
89 document = property(_get_document, doc="The document object corresponding to this MSet item.")
91 class MSetIter(object):
92 """An iterator over the items in an MSet.
94 The iterator will return MSetItem objects, which will be evaluated lazily
95 where appropriate.
97 """
98 __slots__ = ('_iter', '_end', '_mset')
99 def __init__(self, mset):
100 self._iter = mset._begin()
101 self._end = mset._end()
102 self._mset = mset
104 def __iter__(self):
105 return self
107 def __next__(self):
108 if self._iter == self._end:
109 raise StopIteration
110 else:
111 r = MSetItem(self._iter, self._mset)
112 next(self._iter)
113 return r
115 # Modify the MSet to allow access to the python iterators, and have other
116 # convenience methods.
118 def _mset_gen_iter(self):
119 """Return an iterator over the MSet.
121 The iterator will return MSetItem objects, which will be evaluated lazily
122 where appropriate.
125 return MSetIter(self)
126 MSet.__iter__ = _mset_gen_iter
128 MSet.__len__ = lambda self: MSet.size(self)
130 def _mset_getitem(self, index):
131 """Get an item from the MSet.
133 The supplied index is relative to the start of the MSet, not the absolute
134 rank of the item.
136 Returns an MSetItem.
139 if index < 0:
140 index += len(self)
141 if index < 0 or index >= len(self):
142 raise IndexError("Mset index out of range")
143 return MSetItem(self._get_hit_internal(index), self)
144 MSet.__getitem__ = _mset_getitem
145 MSet.get_hit = _mset_getitem
148 ##################################
149 # Support for iteration of ESets #
150 ##################################
152 class ESetItem(object):
153 """An item returned from iteration of the ESet.
155 The item supports access to the following attributes:
157 - `term`: The term corresponding to this ESet item.
158 - `weight`: The weight corresponding to this ESet item.
161 __slots__ = ('term', 'weight')
163 def __init__(self, iter):
164 self.term = iter.get_term()
165 self.weight = iter.get_weight()
167 class ESetIter(object):
168 """An iterator over the items in an ESet.
170 The iterator will return ESetItem objects.
173 __slots__ = ('_iter', '_end')
174 def __init__(self, eset):
175 self._iter = eset._begin()
176 self._end = eset._end()
178 def __iter__(self):
179 return self
181 def __next__(self):
182 if self._iter == self._end:
183 raise StopIteration
184 else:
185 r = ESetItem(self._iter)
186 next(self._iter)
187 return r
189 # Modify the ESet to allow access to the python iterators, and have other
190 # convenience methods.
192 def _eset_gen_iter(self):
193 """Return an iterator over the ESet.
195 The iterator will return ESetItem objects.
198 return ESetIter(self)
199 ESet.__iter__ = _eset_gen_iter
201 ESet.__len__ = lambda self: ESet.size(self)
204 #######################################
205 # Support for iteration of term lists #
206 #######################################
208 class TermListItem(object):
209 """An item returned from iteration of a term list.
211 The item supports access to the following attributes and properties:
213 - `term`: The term corresponding to this TermListItem.
214 - `wdf`: The within document frequency of this term.
215 - `termfreq`: The number of documents in the collection which are indexed
216 by the term
217 - `positer`: An iterator over the positions which the term appears at in
218 the document. This is only available until the iterator which returned
219 this item next moves.
222 __slots__ = ('_iter', 'term', '_wdf', '_termfreq')
224 def __init__(self, iter, term):
225 self._iter = iter
226 self.term = term
227 self._wdf = None
228 self._termfreq = None
230 if iter._has_wdf == TermIter.EAGER:
231 self._wdf = iter._iter.get_wdf()
232 if iter._has_termfreq == TermIter.EAGER:
233 self._termfreq = iter._iter.get_termfreq()
235 # Support for sequence API
236 sequence = ['term', 'wdf', 'termfreq', 'positer']
237 if iter._has_wdf == TermIter.INVALID:
238 sequence[1] = 0
239 if iter._has_termfreq == TermIter.INVALID:
240 sequence[2] = 0
241 if iter._has_positions == TermIter.INVALID:
242 sequence[3] = PositionIter()
244 def _get_wdf(self):
245 """Get the within-document-frequency of the current term.
247 This will raise a InvalidOperationError exception if the iterator this
248 item came from doesn't support within-document-frequencies.
251 if self._wdf is None:
252 if self._iter._has_wdf == TermIter.INVALID:
253 raise InvalidOperationError("Iterator does not support wdfs")
254 if self.term is not self._iter._lastterm:
255 raise InvalidOperationError("Iterator has moved, and does not support random access")
256 self._wdf = self._iter._iter.get_wdf()
257 return self._wdf
258 wdf = property(_get_wdf, doc=
259 """The within-document-frequency of the current term (if meaningful).
261 This will raise a InvalidOperationError exception if the iterator
262 this item came from doesn't support within-document-frequencies.
264 """)
266 def _get_termfreq(self):
267 """Get the term frequency.
269 This is the number of documents in the collection which are indexed by
270 the term.
272 This will raise a InvalidOperationError exception if the iterator this
273 item came from doesn't support term frequencies.
276 if self._termfreq is None:
277 if self._iter._has_termfreq == TermIter.INVALID:
278 raise InvalidOperationError("Iterator does not support term frequencies")
279 if self.term is not self._iter._lastterm:
280 raise InvalidOperationError("Iterator has moved, and does not support random access")
281 self._termfreq = self._iter._iter.get_termfreq()
282 return self._termfreq
283 termfreq = property(_get_termfreq, doc=
284 """The term frequency of the current term (if meaningful).
286 This is the number of documents in the collection which are indexed by the
287 term.
289 This will raise a InvalidOperationError exception if the iterator
290 this item came from doesn't support term frequencies.
292 """)
294 def _get_positer(self):
295 """Get a position list iterator.
297 The iterator will return integers representing the positions that the
298 term occurs at.
300 This will raise a InvalidOperationError exception if the iterator this
301 item came from doesn't support position lists, or if the iterator has
302 moved on since the item was returned from it.
305 if self._iter._has_positions == TermIter.INVALID:
306 raise InvalidOperationError("Iterator does not support position lists")
307 # Access to position lists is always lazy, so we don't need to check
308 # _has_positions.
309 if self.term is not self._iter._lastterm:
310 raise InvalidOperationError("Iterator has moved, and does not support random access")
311 return PositionIter(self._iter._iter._positionlist_begin(),
312 self._iter._iter._positionlist_end())
313 positer = property(_get_positer, doc=
314 """A position iterator for the current term (if meaningful).
316 The iterator will return integers representing the positions that the term
317 occurs at.
319 This will raise a InvalidOperationError exception if the iterator this item
320 came from doesn't support position lists, or if the iterator has moved on
321 since the item was returned from it.
323 """)
326 class TermIter(object):
327 """An iterator over a term list.
329 The iterator will return TermListItem objects, which will be evaluated
330 lazily where appropriate.
333 __slots__ = ('_iter', '_end', '_has_termfreq', '_has_wdf',
334 '_has_positions', '_return_strings', '_lastterm', '_moved')
336 INVALID = 0
337 LAZY = 1
338 EAGER = 2
340 def __init__(self, start, end, has_termfreq=INVALID,
341 has_wdf=INVALID, has_positions=INVALID,
342 return_strings=False):
343 self._iter = start
344 self._end = end
345 self._has_termfreq = has_termfreq
346 self._has_wdf = has_wdf
347 self._has_positions = has_positions
348 assert(has_positions != TermIter.EAGER) # Can't do eager access to position lists
349 self._return_strings = return_strings
350 self._lastterm = None # Used to test if the iterator has moved
352 # _moved is True if we've moved onto the next item. This is needed so
353 # that the iterator doesn't have to move on until just before next() is
354 # called: since the iterator starts by pointing at a valid item, we
355 # can't just call next(self._iter) unconditionally at the start of our
356 # __next__() method.
357 self._moved = True
359 def __iter__(self):
360 return self
362 def __next__(self):
363 if not self._moved:
364 next(self._iter)
365 self._moved = True
367 if self._iter == self._end:
368 self._lastterm = None
369 raise StopIteration
370 else:
371 self._lastterm = self._iter.get_term()
372 self._moved = False
373 if self._return_strings:
374 return self._lastterm
375 return TermListItem(self, self._lastterm)
377 def skip_to(self, term):
378 """Skip the iterator forward.
380 The iterator is advanced to the first term at or after the current
381 position which is greater than or equal to the supplied term.
383 If there are no such items, this will raise StopIteration.
385 This returns the item which the iterator is moved to. The subsequent
386 item will be returned the next time that next() is called (unless
387 skip_to() is called again first).
390 if self._iter != self._end:
391 self._iter.skip_to(term)
393 if self._iter == self._end:
394 self._lastterm = None
395 self._moved = True
396 raise StopIteration
398 # Update self._lastterm if the iterator has moved.
399 # TermListItems compare a saved value of lastterm with self._lastterm
400 # with the object identity comparator, so it is important to ensure
401 # that it does not get modified if the new term compares equal.
402 newterm = self._iter.get_term()
403 if newterm != self._lastterm:
404 self._lastterm = newterm
406 self._moved = False
407 if self._return_strings:
408 return self._lastterm
409 return TermListItem(self, self._lastterm)
411 # Modify Enquire to add a "matching_terms()" method.
412 def _enquire_gen_iter(self, which):
413 """Get an iterator over the terms which match a given match set item.
415 The match set item to consider is specified by the `which` parameter, which
416 may be a document ID, or an MSetItem object.
418 The iterator will return string objects.
421 if isinstance(which, MSetItem):
422 which = which.docid
423 return TermIter(self._get_matching_terms_begin(which),
424 self._get_matching_terms_end(which),
425 return_strings=True)
426 Enquire.matching_terms = _enquire_gen_iter
428 # Modify Query to add an "__iter__()" method.
429 def _query_gen_iter(self):
430 """Get an iterator over the terms in a query.
432 The iterator will return string objects.
435 return TermIter(self._get_terms_begin(),
436 self._get_terms_end(),
437 return_strings=True)
438 Query.__iter__ = _query_gen_iter
440 # Modify Database to add an "__iter__()" method and an "allterms()" method.
441 def _database_gen_allterms_iter(self, prefix=None):
442 """Get an iterator over all the terms in the database.
444 The iterator will return TermListItem objects, but these will not support
445 access to wdf, or position information.
447 Access to term frequency information is only available until the iterator
448 has moved on.
450 If prefix is supplied, only terms which start with that prefix will be
451 returned.
454 if prefix is None:
455 return TermIter(self._allterms_begin(), self._allterms_end(),
456 has_termfreq=TermIter.LAZY)
457 else:
458 return TermIter(self._allterms_begin(prefix), self._allterms_end(prefix),
459 has_termfreq=TermIter.LAZY)
460 Database.__iter__ = _database_gen_allterms_iter
461 Database.allterms = _database_gen_allterms_iter
463 # Modify Database to add a "termlist()" method.
464 def _database_gen_termlist_iter(self, docid):
465 """Get an iterator over all the terms which index a given document ID.
467 The iterator will return TermListItem objects.
469 Access to term frequency and position information is only available until
470 the iterator has moved on.
473 # Note: has_termfreq is set to LAZY because most databases don't store term
474 # frequencies in the termlist (because this would require updating many termlist
475 # entries for every document update), so access to the term frequency requires a
476 # separate lookup.
477 return TermIter(self._termlist_begin(docid), self._termlist_end(docid),
478 has_termfreq=TermIter.LAZY,
479 has_wdf=TermIter.EAGER,
480 has_positions=TermIter.LAZY)
481 Database.termlist = _database_gen_termlist_iter
483 # Modify Database to add a "spellings()" method.
484 def _database_gen_spellings_iter(self):
485 """Get an iterator which returns all the spelling correction targets
487 The iterator will return TermListItem objects. Only the term frequency is
488 available; wdf and positions are not meaningful.
491 return TermIter(self._spellings_begin(), self._spellings_end(),
492 has_termfreq=TermIter.EAGER,
493 has_wdf=TermIter.INVALID,
494 has_positions=TermIter.INVALID)
495 Database.spellings = _database_gen_spellings_iter
497 # Modify Database to add a "synonyms()" method.
498 def _database_gen_synonyms_iter(self, term):
499 """Get an iterator which returns all the synonyms for a given term.
501 The term to return synonyms for is specified by the `term` parameter.
503 The iterator will return string objects.
506 return TermIter(self._synonyms_begin(term),
507 self._synonyms_end(term),
508 return_strings=True)
509 Database.synonyms = _database_gen_synonyms_iter
511 # Modify Database to add a "synonym_keys()" method.
512 def _database_gen_synonym_keys_iter(self, prefix=""):
513 """Get an iterator which returns all the terms which have synonyms.
515 The iterator will return string objects.
517 If `prefix` is non-empty, only terms with this prefix are returned.
520 return TermIter(self._synonym_keys_begin(prefix),
521 self._synonym_keys_end(prefix),
522 return_strings=True)
523 Database.synonym_keys = _database_gen_synonym_keys_iter
525 # Modify Database to add a "metadata_keys()" method, instead of direct access
526 # to metadata_keys_begin and metadata_keys_end.
527 def _database_gen_metadata_keys_iter(self, prefix=""):
528 """Get an iterator which returns all the metadata keys.
530 The iterator will return string objects.
532 If `prefix` is non-empty, only metadata keys with this prefix are returned.
535 return TermIter(self._metadata_keys_begin(prefix),
536 self._metadata_keys_end(prefix),
537 return_strings=True)
538 Database.metadata_keys = _database_gen_metadata_keys_iter
540 # Modify Document to add an "__iter__()" method and a "termlist()" method.
541 def _document_gen_termlist_iter(self):
542 """Get an iterator over all the terms in a document.
544 The iterator will return TermListItem objects.
546 Access to term frequency and position information is only available until
547 the iterator has moved on.
549 Note that term frequency information is only meaningful for a document
550 retrieved from a database. If term frequency information is requested for
551 a document which was freshly created, an InvalidOperationError will be
552 raised.
555 # Note: document termlist iterators may be implemented entirely in-memory
556 # (in which case access to all items could be allowed eagerly), but may
557 # also be implemented by returning a database termlist (for documents which
558 # are stored in a database, rather than freshly created). We choose the
559 # most conservative settings, to avoid doing eager access when lazy access
560 # would be more appropriate.
561 return TermIter(self._termlist_begin(), self._termlist_end(),
562 has_termfreq=TermIter.LAZY,
563 has_wdf=TermIter.EAGER,
564 has_positions=TermIter.LAZY)
565 Document.__iter__ = _document_gen_termlist_iter
566 Document.termlist = _document_gen_termlist_iter
568 # Modify QueryParser to add a "stoplist()" method.
569 def _queryparser_gen_stoplist_iter(self):
570 """Get an iterator over all the stopped terms from the previous query.
572 This returns an iterator over all the terms which were omitted from the
573 previously parsed query due to being considered to be stopwords. Each
574 instance of a word omitted from the query is represented in the returned
575 list, in the order in which the
577 The iterator will return string objects.
580 return TermIter(self._stoplist_begin(), self._stoplist_end(),
581 return_strings=True)
582 QueryParser.stoplist = _queryparser_gen_stoplist_iter
584 # Modify QueryParser to add an "unstemlist()" method.
585 def _queryparser_gen_unstemlist_iter(self, tname):
586 """Get an iterator over all the unstemmed forms of a stemmed term.
588 This returns an iterator which returns all the unstemmed words which were
589 stemmed to the stemmed form specified by `tname` when parsing the previous
590 query. Each instance of a word which stems to `tname` is returned by the
591 iterator in the order in which the words appeared in the query - an
592 individual unstemmed word may thus occur multiple times.
594 The iterator will return string objects.
597 return TermIter(self._unstem_begin(tname), self._unstem_end(tname),
598 return_strings=True)
599 QueryParser.unstemlist = _queryparser_gen_unstemlist_iter
601 # Modify ValueCountMatchSpy to add a "values()" method.
602 def wrapper():
603 begin = ValueCountMatchSpy.values_begin
604 del ValueCountMatchSpy.values_begin
605 end = ValueCountMatchSpy.values_end
606 del ValueCountMatchSpy.values_end
607 def values(self):
608 """Get an iterator over all the values in the slot.
610 Values will be returned in ascending alphabetical order.
612 The iterator will return TermListItem objects: the value can be
613 accessed as the `term` property, and the frequency can be accessed as
614 the `termfreq` property.
617 return TermIter(begin(self), end(self), has_termfreq=TermIter.EAGER)
618 return values
619 ValueCountMatchSpy.values = wrapper()
620 del wrapper
622 # Modify ValueCountMatchSpy to add a "top_values()" method.
623 def wrapper():
624 begin = ValueCountMatchSpy.top_values_begin
625 del ValueCountMatchSpy.top_values_begin
626 end = ValueCountMatchSpy.top_values_end
627 del ValueCountMatchSpy.top_values_end
628 def top_values(self, maxvalues):
629 """Get an iterator over the most frequent values for the slot.
631 Values will be returned in descending order of frequency. Values with
632 the same frequency will be returned in ascending alphabetical order.
634 The iterator will return TermListItem objects: the value can be
635 accessed as the `term` property, and the frequency can be accessed as
636 the `termfreq` property.
639 return TermIter(begin(self, maxvalues), end(self, maxvalues),
640 has_termfreq=TermIter.EAGER)
641 return top_values
642 ValueCountMatchSpy.top_values = wrapper()
643 del wrapper
645 # When we make a query, keep a note of postingsources involved, so they won't
646 # be deleted. This hack can probably be removed once xapian bug #186 is fixed.
647 __query_init_orig = Query.__init__
648 def _query_init(self, *args):
649 """Make a new query object.
651 Many possible arguments are possible - see the documentation for details.
654 ps = []
655 if len(args) == 1 and isinstance(args[0], PostingSource):
656 ps.append(args[0])
657 else:
658 for arg in args:
659 if isinstance(arg, Query):
660 ps.extend(getattr(arg, '_ps', []))
661 elif hasattr(arg, '__iter__'):
662 for listarg in arg:
663 if isinstance(listarg, Query):
664 ps.extend(getattr(listarg, '_ps', []))
665 __query_init_orig(self, *args)
666 self._ps = ps
667 Query.__init__ = _query_init
668 del _query_init
670 # When setting a query on enquire, keep a note of postingsources involved, so
671 # they won't be deleted. This hack can probably be removed once xapian bug #186
672 # is fixed.
673 __enquire_set_query_orig = Enquire.set_query
674 def _enquire_set_query(self, query, qlen=0):
675 self._ps = getattr(query, '_ps', [])
676 return __enquire_set_query_orig(self, query, qlen)
677 _enquire_set_query.__doc__ = __enquire_set_query_orig.__doc__
678 Enquire.set_query = _enquire_set_query
679 del _enquire_set_query
681 # When getting a query from enquire, keep a note of postingsources involved,
682 # so they won't be deleted. This hack can probably be removed once xapian bug
683 # #186 is fixed.
684 __enquire_get_query_orig = Enquire.get_query
685 def _enquire_get_query(self):
686 query = __enquire_get_query_orig(self)
687 query._ps = getattr(self, '_ps', [])
688 return query
689 _enquire_get_query.__doc__ = __enquire_get_query_orig.__doc__
690 Enquire.get_query = _enquire_get_query
691 del _enquire_get_query
693 # When we set a RangeProcessor into the QueryParser, keep a python
694 # reference so it won't be deleted. This hack can probably be removed once
695 # xapian bug #186 is fixed.
696 __queryparser_add_rangeprocessor_orig = QueryParser.add_rangeprocessor
697 def _queryparser_add_rangeprocessor(self, rproc):
698 if not hasattr(self, '_rps'):
699 self._rps = []
700 self._rps.append(rproc)
701 return __queryparser_add_rangeprocessor_orig(self, rproc)
702 _queryparser_add_rangeprocessor.__doc__ = __queryparser_add_rangeprocessor_orig.__doc__
703 QueryParser.add_rangeprocessor = _queryparser_add_rangeprocessor
704 del _queryparser_add_rangeprocessor
706 # When we set a FieldProcessor into the QueryParser, keep a python
707 # reference so it won't be deleted. This hack can probably be removed once
708 # xapian bug #186 is fixed.
709 __queryparser_add_prefix_orig = QueryParser.add_prefix
710 def _queryparser_add_prefix(self, s, proc):
711 if not isinstance(proc, (str, bytes)):
712 if not hasattr(self, '_fps'):
713 self._fps = []
714 self._fps.append(proc)
715 return __queryparser_add_prefix_orig(self, s, proc)
716 _queryparser_add_prefix.__doc__ = __queryparser_add_prefix_orig.__doc__
717 QueryParser.add_prefix = _queryparser_add_prefix
718 del _queryparser_add_prefix
719 __queryparser_add_boolean_prefix_orig = QueryParser.add_boolean_prefix
720 def _queryparser_add_boolean_prefix(self, s, proc, exclusive = True):
721 if not isinstance(proc, (str, bytes)):
722 if not hasattr(self, '_fps'):
723 self._fps = []
724 self._fps.append(proc)
725 return __queryparser_add_boolean_prefix_orig(self, s, proc, exclusive)
726 _queryparser_add_boolean_prefix.__doc__ = __queryparser_add_boolean_prefix_orig.__doc__
727 QueryParser.add_boolean_prefix = _queryparser_add_boolean_prefix
728 del _queryparser_add_boolean_prefix
730 # When we set a Stopper into the QueryParser, keep a python reference so it
731 # won't be deleted. This hack can probably be removed once xapian bug #186 is
732 # fixed.
733 __queryparser_set_stopper_orig = QueryParser.set_stopper
734 def _queryparser_set_stopper(self, stopper):
735 self._stopper = stopper
736 return __queryparser_set_stopper_orig(self, stopper)
737 _queryparser_set_stopper.__doc__ = __queryparser_set_stopper_orig.__doc__
738 QueryParser.set_stopper = _queryparser_set_stopper
739 del _queryparser_set_stopper
741 # When we set a Stopper into the TermGenerator, keep a python reference so it
742 # won't be deleted. This hack can probably be removed once xapian bug #186 is
743 # fixed.
744 __termgenerator_set_stopper_orig = TermGenerator.set_stopper
745 def _termgenerator_set_stopper(self, stopper):
746 self._stopper = stopper
747 return __termgenerator_set_stopper_orig(self, stopper)
748 _termgenerator_set_stopper.__doc__ = __termgenerator_set_stopper_orig.__doc__
749 TermGenerator.set_stopper = _termgenerator_set_stopper
750 del _termgenerator_set_stopper
752 # When we set a Sorter on enquire, keep a python reference so it won't be
753 # deleted. This hack can probably be removed once xapian bug #186 is fixed.
754 __enquire_set_sort_by_key_orig = Enquire.set_sort_by_key
755 def _enquire_set_sort_by_key(self, sorter, reverse):
756 self._sorter = sorter
757 return __enquire_set_sort_by_key_orig(self, sorter, reverse)
758 _enquire_set_sort_by_key.__doc__ = __enquire_set_sort_by_key_orig.__doc__
759 Enquire.set_sort_by_key = _enquire_set_sort_by_key
760 del _enquire_set_sort_by_key
762 __enquire_set_sort_by_key_then_relevance_orig = Enquire.set_sort_by_key_then_relevance
763 def _enquire_set_sort_by_key_then_relevance(self, sorter, reverse):
764 self._sorter = sorter
765 return __enquire_set_sort_by_key_then_relevance_orig(self, sorter, reverse)
766 _enquire_set_sort_by_key_then_relevance.__doc__ = __enquire_set_sort_by_key_then_relevance_orig.__doc__
767 Enquire.set_sort_by_key_then_relevance = _enquire_set_sort_by_key_then_relevance
768 del _enquire_set_sort_by_key_then_relevance
770 __enquire_set_sort_by_relevance_then_key_orig = Enquire.set_sort_by_relevance_then_key
771 def _enquire_set_sort_by_relevance_then_key(self, sorter, reverse):
772 self._sorter = sorter
773 return __enquire_set_sort_by_relevance_then_key_orig(self, sorter, reverse)
774 _enquire_set_sort_by_relevance_then_key.__doc__ = __enquire_set_sort_by_relevance_then_key_orig.__doc__
775 Enquire.set_sort_by_relevance_then_key = _enquire_set_sort_by_relevance_then_key
776 del _enquire_set_sort_by_relevance_then_key
779 ##########################################
780 # Support for iteration of posting lists #
781 ##########################################
783 class PostingItem(object):
784 """An item returned from iteration of a posting list.
786 The item supports access to the following attributes and properties:
788 - `docid`: The document ID corresponding to this PostingItem.
789 - `doclength`: The length of the document corresponding to this
790 PostingItem.
791 - `wdf`: The within document frequency of the term which the posting list
792 is for in the document corresponding to this PostingItem.
793 - `positer`: An iterator over the positions which the term corresponing to
794 this posting list occurs at in the document corresponding to this
795 PostingItem. This is only available until the iterator which returned
796 this item next moves.
799 __slots__ = ('_iter', 'docid', 'doclength', 'wdf',)
801 def __init__(self, iter):
802 self._iter = iter
803 self.docid = iter._iter.get_docid()
804 self.doclength = iter._iter.get_doclength()
805 self.wdf = iter._iter.get_wdf()
807 # Support for sequence API
808 sequence = ['docid', 'doclength', 'wdf', 'positer']
809 if not iter._has_positions:
810 sequence[3] = PositionIter()
812 def _get_positer(self):
813 """Get a position list iterator.
815 The iterator will return integers representing the positions that the
816 term occurs at in the document corresponding to this PostingItem.
818 This will raise a InvalidOperationError exception if the iterator this
819 item came from doesn't support position lists, or if the iterator has
820 moved on since the item was returned from it.
823 if not self._iter._has_positions:
824 raise InvalidOperationError("Iterator does not support position lists")
825 if self._iter._iter == self._iter._end or \
826 self.docid != self._iter._iter.get_docid():
827 raise InvalidOperationError("Iterator has moved, and does not support random access")
828 return PositionIter(self._iter._iter._positionlist_begin(),
829 self._iter._iter._positionlist_end())
830 positer = property(_get_positer, doc=
831 """A position iterator for the current posting (if meaningful).
833 The iterator will return integers representing the positions that the term
834 occurs at.
836 This will raise a InvalidOperationError exception if the iterator this item
837 came from doesn't support position lists, or if the iterator has moved on
838 since the item was returned from it.
840 """)
843 class PostingIter(object):
844 """An iterator over a posting list.
846 The iterator will return PostingItem objects, which will be evaluated
847 lazily where appropriate.
850 __slots__ = ('_iter', '_end', '_has_positions', '_moved')
852 def __init__(self, start, end, has_positions=False):
853 self._iter = start
854 self._end = end
855 self._has_positions = has_positions
857 # _moved is True if we've moved onto the next item. This is needed so
858 # that the iterator doesn't have to move on until just before next() is
859 # called: since the iterator starts by pointing at a valid item, we
860 # can't just call next(self._iter) unconditionally at the start of our
861 # __next__() method.
862 self._moved = True
864 def __iter__(self):
865 return self
867 def __next__(self):
868 if not self._moved:
869 next(self._iter)
870 self._moved = True
872 if self._iter == self._end:
873 raise StopIteration
874 else:
875 self._moved = False
876 return PostingItem(self)
878 def skip_to(self, docid):
879 """Skip the iterator forward.
881 The iterator is advanced to the first document with a document ID
882 which is greater than or equal to the supplied document ID.
884 If there are no such items, this will raise StopIteration.
886 This returns the item which the iterator is moved to. The subsequent
887 item will be returned the next time that next() is called (unless
888 skip_to() is called again first).
891 if self._iter != self._end:
892 self._iter.skip_to(docid)
893 if self._iter == self._end:
894 self._moved = True
895 raise StopIteration
896 self._moved = False
897 return PostingItem(self)
899 def _database_gen_postlist_iter(self, tname):
900 """Get an iterator over the postings which are indexed by a given term.
902 If `tname` is empty, an iterator over all the documents will be returned
903 (this will contain one entry for each document, will always return a wdf of
904 1, and will not allow access to a position iterator).
907 if len(tname) != 0:
908 return PostingIter(self._postlist_begin(tname), self._postlist_end(tname),
909 has_positions=True)
910 else:
911 return PostingIter(self._postlist_begin(tname), self._postlist_end(tname))
912 Database.postlist = _database_gen_postlist_iter
915 ###########################################
916 # Support for iteration of position lists #
917 ###########################################
919 class PositionIter(object):
920 """An iterator over a position list.
922 The iterator will return integers, in ascending order.
925 def __init__(self, start = 0, end = 0):
926 self.iter = start
927 self.end = end
929 def __iter__(self):
930 return self
932 def __next__(self):
933 if self.iter==self.end:
934 raise StopIteration
935 else:
936 r = self.iter.get_termpos()
937 next(self.iter)
938 return r
940 # Modify Database to add a "positionlist()" method.
941 def _database_gen_positionlist_iter(self, docid, tname):
942 """Get an iterator over all the positions in a given document of a term.
944 The iterator will return integers, in ascending order.
947 return PositionIter(self._positionlist_begin(docid, tname), self._positionlist_end(docid, tname))
948 Database.positionlist = _database_gen_positionlist_iter
950 ########################################
951 # Support for iteration of value lists #
952 ########################################
954 class ValueItem(object):
955 """An item returned from iteration of the values in a document.
957 The item supports access to the following attributes:
959 - `num`: The number of the value.
960 - `value`: The contents of the value.
964 __slots__ = ('num', 'value', )
966 def __init__(self, num, value):
967 self.num = num
968 self.value = value
970 class ValueIter(object):
971 """An iterator over all the values stored in a document.
973 The iterator will return ValueItem objects, in ascending order of value number.
976 def __init__(self, start, end):
977 self.iter = start
978 self.end = end
980 def __iter__(self):
981 return self
983 def __next__(self):
984 if self.iter==self.end:
985 raise StopIteration
986 else:
987 r = ValueItem(self.iter.get_valueno(), self.iter.get_value())
988 next(self.iter)
989 return r
991 # Modify Document to add a "values()" method.
992 def _document_gen_values_iter(self):
993 """Get an iterator over all the values stored in a document.
995 The iterator will return ValueItem objects, in ascending order of value number.
998 return ValueIter(self._values_begin(), self._values_end())
999 Document.values = _document_gen_values_iter
1002 ##########################################
1003 # Support for iteration of value streams #
1004 ##########################################
1006 class ValueStreamItem(object):
1007 """An item returned from iteration of the values in a document.
1009 The item supports access to the following attributes:
1011 - `docid`: The docid for the item.
1012 - `value`: The contents of the value.
1016 __slots__ = ('docid', 'value', )
1018 def __init__(self, docid, value):
1019 self.docid = docid
1020 self.value = value
1022 class ValueStreamIter(object):
1023 """An iterator over all the values stored in a document.
1025 The iterator will return ValueStreamItem objects, in ascending order of value number.
1028 def __init__(self, start, end):
1029 self.iter = start
1030 self.end = end
1031 self.moved = True
1033 def __iter__(self):
1034 return self
1036 def __next__(self):
1037 if not self.moved:
1038 self.iter.__next__()
1039 self.moved = True
1041 if self.iter==self.end:
1042 raise StopIteration
1043 else:
1044 self.moved = False
1045 return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1047 def skip_to(self, docid):
1048 """Skip the iterator forward.
1050 The iterator is advanced to the first document with a document ID
1051 which is greater than or equal to the supplied document ID.
1053 If there are no such items, this will raise StopIteration.
1055 This returns the item which the iterator is moved to. The subsequent
1056 item will be returned the next time that next() is called (unless
1057 skip_to() is called again first).
1060 if self.iter != self.end:
1061 self.iter.skip_to(docid)
1062 if self.iter == self.end:
1063 self.moved = True
1064 raise StopIteration
1065 self.moved = False
1066 return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
1068 # Modify Database to add a "valuestream()" method, and remove the
1069 # valuestream_begin() and valuestream_end() methods.
1070 def wrapper():
1071 vs_begin = Database.valuestream_begin
1072 del Database.valuestream_begin
1073 vs_end = Database.valuestream_end
1074 del Database.valuestream_end
1075 def valuestream(self, slot):
1076 """Get an iterator over all the values stored in a slot in the database.
1078 The iterator will return ValueStreamItem objects, in ascending order of
1079 document id.
1082 return ValueStreamIter(vs_begin(self, slot), vs_end(self, slot))
1083 return valuestream
1084 Database.valuestream = wrapper()
1085 del wrapper
1087 ##########################################
1088 # Support for iteration of LatLongCoords #
1089 ##########################################
1091 class LatLongCoordsIter(object):
1092 """An iterator over all the coordinates in a LatLongCoords object.
1094 The iterator returns LatLongCoord objects.
1097 def __init__(self, start, end):
1098 self.iter = start
1099 self.end = end
1101 def __iter__(self):
1102 return self
1104 def __eq__(self, other):
1105 return self.equals(other)
1107 def __ne__(self, other):
1108 return not self.equals(other)
1110 def __next__(self):
1111 if self.iter.equals(self.end):
1112 raise StopIteration
1113 else:
1114 r = self.iter.get_coord()
1115 self.iter.__next__()
1116 return r
1118 # Modify LatLongCoords to make it iterable.
1119 def _latlongcoords_iter(self):
1120 """Get an iterator over all the coordinates in a LatLongCoords.
1122 The iterator will return xapian.LatLongCoord objects.
1125 return LatLongCoordsIter(self.begin(), self.end())
1126 LatLongCoords.__iter__ = _latlongcoords_iter
1127 del _latlongcoords_iter
1128 del LatLongCoordsIterator
1130 # Fix up Enquire so that it keeps a python reference to the deciders supplied
1131 # to it so that they won't be deleted before the Enquire object. This hack can
1132 # probably be removed once xapian bug #186 is fixed.
1133 _enquire_add_matchspy_orig = Enquire.add_matchspy
1134 def _enquire_match_spy_add(self, decider):
1135 if not hasattr(self, '_deciders'):
1136 self._deciders = []
1137 self._deciders.append(decider)
1138 _enquire_add_matchspy_orig(self, decider)
1139 _enquire_match_spy_add.__doc__ = Enquire.add_matchspy.__doc__
1140 Enquire.add_matchspy = _enquire_match_spy_add
1142 _enquire_clear_matchspies_orig = Enquire.clear_matchspies
1143 def _enquire_match_spies_clear(self):
1144 _enquire_clear_matchspies_orig(self)
1145 if hasattr(self, '_deciders'):
1146 del self._deciders
1147 _enquire_match_spies_clear.__doc__ = Enquire.clear_matchspies.__doc__
1148 Enquire.clear_matchspies = _enquire_match_spies_clear
1151 # Fix up Stem.__init__() so that it calls __disown__() on the passed
1152 # StemImplementation object so that Python won't delete it from under us.
1153 _stem_init_orig = Stem.__init__
1154 def _stem_init(self, *args):
1155 _stem_init_orig(self, *args)
1156 if len(args) > 0 and isinstance(args[0], StemImplementation):
1157 args[0].__disown__()
1158 _stem_init.__doc__ = Stem.__init__.__doc__
1159 Stem.__init__ = _stem_init
1162 # Add wrappers for Query::MatchAll and Query::MatchNothing
1163 Query.MatchAll = Query("")
1164 Query.MatchNothing = Query()
1167 # Set the list of names which should be public.
1168 # Note that this needs to happen at the end of xapian.py.
1169 __all__ = []
1170 for item in dir():
1171 if item.startswith('_') or item.endswith('_swigregister') or item.endswith('Iterator'):
1172 continue
1173 __all__.append(item)
1174 __all__ = tuple(__all__)
1177 /* vim:syntax=python:set expandtab: */