Update omegatest for grouping change
[xapian.git] / xapian-bindings / python / pythontest.py
blob2c127767f24eb842e5f4826e3b20c007ed2ac030
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
74 return db
76 def test_exception_base():
77 """Check that xapian exceptions have Exception as a base class.
79 """
80 try:
81 raise xapian.InvalidOperationError("Test exception")
82 except Exception, e:
83 pass
85 def test_mset_iter():
86 """Test iterators over MSets.
88 """
89 db = setup_database()
90 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
92 enquire = xapian.Enquire(db)
93 enquire.set_query(query)
94 mset = enquire.get_mset(0, 10)
95 items = [item for item in mset]
96 expect(len(items), 5)
97 expect(len(mset), len(items), "Expected number of items to be length of mset")
99 context("testing returned item from mset")
100 expect(items[2].docid, 4)
101 expect(items[2].rank, 2)
102 expect(items[2].percent, 86)
103 expect(items[2].collapse_key, '')
104 expect(items[2].collapse_count, 0)
105 expect(items[2].document.get_data(), 'was it warm? three')
107 # Test coverage for mset.items
108 mset_items = mset.items
109 expect(len(mset), len(mset_items), "Expected number of items to be length of mset")
111 context("testing mset_items[2]")
112 expect(mset_items[2][xapian.MSET_DID], 4)
113 expect(mset_items[2][xapian.MSET_WT] > 0.0, True)
114 expect(mset_items[2][xapian.MSET_RANK], 2)
115 expect(mset_items[2][xapian.MSET_PERCENT], 86)
116 # MSET_DOCUMENT is documented but not implemented! FIXME: resolve this -
117 # if it has never worked, we may just want to remove the documentation for
118 # it.
119 #expect(mset_items[2][xapian.MSET_DOCUMENT].get_data(), 'was it warm? three')
121 # Check iterators for sub-msets against the whole mset.
122 for start in range(0, 6):
123 for maxitems in range(0, 6):
124 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
125 submset = enquire.get_mset(start, maxitems)
126 num = 0
127 for item in submset:
128 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
129 expect(item.rank, num + start)
131 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
132 hit = submset.get_hit(num)
133 expect(hit.docid, item.docid)
134 expect(hit.rank, item.rank)
135 expect(hit.percent, item.percent)
136 expect(hit.document.get_data(), item.document.get_data())
137 expect(hit.collapse_key, item.collapse_key)
138 expect(hit.collapse_count, item.collapse_count)
140 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
141 hit = mset.get_hit(num + start)
142 expect(hit.docid, item.docid)
143 expect(hit.rank, item.rank)
144 expect(hit.percent, item.percent)
145 expect(hit.document.get_data(), item.document.get_data())
146 expect(hit.collapse_key, item.collapse_key)
147 expect(hit.collapse_count, item.collapse_count)
149 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
150 expect(submset[num].docid, item.docid)
151 expect(submset[num].rank, item.rank)
152 expect(submset[num].percent, item.percent)
153 expect(submset[num].document.get_data(), item.document.get_data())
154 expect(submset[num].collapse_key, item.collapse_key)
155 expect(submset[num].collapse_count, item.collapse_count)
157 num += 1
159 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
160 # Test out-of-range access to mset:
161 expect_exception(IndexError, 'Mset index out of range',
162 submset.__getitem__, -10)
163 expect_exception(IndexError, 'Mset index out of range',
164 submset.__getitem__, 10)
165 expect_exception(IndexError, 'Mset index out of range',
166 submset.__getitem__, -1-len(submset))
167 expect_exception(IndexError, 'Mset index out of range',
168 submset.__getitem__, len(submset))
170 # Check that the item contents remain valid when the iterator has
171 # moved on.
172 saved_items = [item for item in submset]
173 for num in range(len(saved_items)):
174 item = saved_items[num]
175 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
176 expect(submset[num].docid, item.docid)
177 expect(submset[num].rank, item.rank)
178 expect(submset[num].percent, item.percent)
179 expect(submset[num].document.get_data(), item.document.get_data())
180 expect(submset[num].collapse_key, item.collapse_key)
181 expect(submset[num].collapse_count, item.collapse_count)
183 # Check that the right number of items exist in the mset.
184 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
185 items = [item for item in submset]
186 expect(len(items), min(maxitems, 5 - start))
187 expect(len(submset), min(maxitems, 5 - start))
189 def test_eset_iter():
190 """Test iterators over ESets.
193 db = setup_database()
194 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
195 rset = xapian.RSet()
196 rset.add_document(3)
198 context("getting eset items without a query")
199 enquire = xapian.Enquire(db)
200 eset = enquire.get_eset(10, rset)
201 items = [item for item in eset]
202 expect(len(items), 3)
203 expect(len(items), len(eset))
205 context("getting eset items with a query")
206 enquire = xapian.Enquire(db)
207 enquire.set_query(query)
208 eset = enquire.get_eset(10, rset)
209 items2 = [item for item in eset]
210 expect(len(items2), 2)
211 expect(len(items2), len(eset))
213 context("comparing eset items with a query to those without")
214 expect(items2[0].term, items[0].term)
215 expect(items2[1].term, items[2].term)
217 context("comparing eset weights with a query to those without")
218 expect(items2[0].weight, items[0].weight)
219 expect(items2[1].weight, items[2].weight)
221 def test_matchingterms_iter():
222 """Test Enquire.matching_terms iterator.
225 db = setup_database()
226 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
228 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
230 enquire = xapian.Enquire(db)
231 enquire.set_query(query)
232 mset = enquire.get_mset(0, 10)
234 for item in mset:
235 # Make a list of the term names
236 mterms = [term for term in enquire.matching_terms(item.docid)]
237 mterms2 = [term for term in enquire.matching_terms(item)]
238 expect(mterms, mterms2)
240 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
241 expect(mterms, ['it', 'two', 'warm', 'was'])
243 def test_queryterms_iter():
244 """Test Query term iterator.
247 db = setup_database()
248 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
250 # Make a list of the term names
251 terms = [term for term in query]
252 expect(terms, ['it', 'two', 'warm', 'was'])
254 def test_queryparser_stoplist_iter():
255 """Test QueryParser stoplist iterator.
258 stemmer = xapian.Stem('en')
260 # Check behaviour without having set a stoplist.
261 queryparser = xapian.QueryParser()
262 queryparser.set_stemmer(stemmer)
263 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
264 expect([term for term in queryparser.stoplist()], [])
265 query = queryparser.parse_query('to be or not to be is the questions')
266 expect([term for term in queryparser.stoplist()], [])
267 expect(str(query),
268 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
269 'Zis@7 OR Zthe@8 OR Zquestion@9))')
271 # Check behaviour with a stoplist, but no stemmer
272 queryparser = xapian.QueryParser()
273 stopper = xapian.SimpleStopper()
274 stopper.add('to')
275 stopper.add('not')
276 stopper.add('question')
277 queryparser.set_stopper(stopper)
278 expect([term for term in queryparser.stoplist()], [])
279 query = queryparser.parse_query('to be or not to be is the questions')
281 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
282 expect(str(query),
283 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
285 # Check behaviour with a stoplist and a stemmer
286 queryparser.set_stemmer(stemmer)
287 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
288 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to']) # Shouldn't have changed since previous query.
289 query = queryparser.parse_query('to be or not to be is the questions')
291 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
292 expect(str(query),
293 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
295 def test_queryparser_unstem_iter():
296 """Test QueryParser unstemlist iterator.
299 stemmer = xapian.Stem('en')
301 queryparser = xapian.QueryParser()
302 expect([term for term in queryparser.unstemlist('to')], [])
303 expect([term for term in queryparser.unstemlist('question')], [])
304 expect([term for term in queryparser.unstemlist('questions')], [])
305 query = queryparser.parse_query('to question questions')
307 expect([term for term in queryparser.unstemlist('to')], ['to'])
308 expect([term for term in queryparser.unstemlist('question')], ['question'])
309 expect([term for term in queryparser.unstemlist('questions')], ['questions'])
310 expect(str(query),
311 'Query((to@1 OR question@2 OR questions@3))')
314 queryparser = xapian.QueryParser()
315 queryparser.set_stemmer(stemmer)
316 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
317 expect([term for term in queryparser.unstemlist('Zto')], [])
318 expect([term for term in queryparser.unstemlist('Zquestion')], [])
319 expect([term for term in queryparser.unstemlist('Zquestions')], [])
320 query = queryparser.parse_query('to question questions')
322 expect([term for term in queryparser.unstemlist('Zto')], ['to'])
323 expect([term for term in queryparser.unstemlist('Zquestion')], ['question', 'questions'])
324 expect([term for term in queryparser.unstemlist('Zquestions')], [])
325 expect(str(query),
326 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
328 def test_allterms_iter():
329 """Test all-terms iterator on Database.
332 db = setup_database()
334 context("making a list of the term names and frequencies")
335 terms = []
336 freqs = []
337 for termitem in db:
338 terms.append(termitem.term)
339 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
340 freqs.append(termitem.termfreq)
341 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
343 context("checking that items are no longer valid once the iterator has moved on");
344 termitems = [termitem for termitem in db]
346 expect(len(termitems), len(terms))
347 for i in range(len(termitems)):
348 expect(termitems[i].term, terms[i])
350 expect(len(termitems), len(freqs))
351 for termitem in termitems:
352 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
354 context("checking that restricting the terms iterated with a prefix works")
355 prefix_terms = []
356 prefix_freqs = []
357 for i in range(len(terms)):
358 if terms[i][0] == 't':
359 prefix_terms.append(terms[i])
360 prefix_freqs.append(freqs[i])
361 i = 0
362 for termitem in db.allterms('t'):
363 expect(termitem.term, prefix_terms[i])
364 expect(termitem.termfreq, prefix_freqs[i])
365 i += 1
366 expect(len(prefix_terms), i)
368 def test_termlist_iter():
369 """Test termlist iterator on Database.
372 db = setup_database()
374 # Make lists of the item contents
375 terms = []
376 wdfs = []
377 freqs = []
378 positers = []
379 for termitem in db.termlist(3):
380 terms.append(termitem.term)
381 wdfs.append(termitem.wdf)
382 freqs.append(termitem.termfreq)
383 positers.append([pos for pos in termitem.positer])
385 expect(terms, ['it', 'two', 'warm', 'was'])
386 expect(wdfs, [1, 2, 1, 1])
387 expect(freqs, [5, 3, 4, 4])
388 expect(positers, [[2], [], [3], [1]])
390 # Test skip_to().
391 tliter = db.termlist(3)
393 # skip to an item before the first item.
394 termitem = tliter.skip_to('a')
395 expect((termitem.term, termitem.wdf, termitem.termfreq,
396 [pos for pos in termitem.positer]), ('it', 1, 5, [2]))
398 # skip forwards to an item.
399 termitem = tliter.skip_to('two')
400 expect((termitem.term, termitem.wdf, termitem.termfreq,
401 [pos for pos in termitem.positer]), ('two', 2, 3, []))
403 # skip to same place (should return same item)
404 termitem = tliter.skip_to('two')
405 expect((termitem.term, termitem.wdf, termitem.termfreq,
406 [pos for pos in termitem.positer]), ('two', 2, 3, []))
408 # next() after a skip_to(), should return next item.
409 termitem = next(tliter)
410 expect((termitem.term, termitem.wdf, termitem.termfreq,
411 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
413 # skip to same place (should return same item)
414 termitem = tliter.skip_to('warm')
415 expect((termitem.term, termitem.wdf, termitem.termfreq,
416 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
418 # skip backwards (should return same item)
419 termitem = tliter.skip_to('a')
421 # skip to after end.
422 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
423 # skip backwards (should still return StopIteration).
424 expect_exception(StopIteration, '', tliter.skip_to, 'a')
425 # next should continue to return StopIteration.
426 expect_exception(StopIteration, '', next, tliter)
429 # Make a list of the terms (so we can test if they're still valid
430 # once the iterator has moved on).
431 termitems = [termitem for termitem in db.termlist(3)]
433 expect(len(termitems), len(terms))
434 for i in range(len(termitems)):
435 expect(termitems[i].term, terms[i])
437 expect(len(termitems), len(wdfs))
438 for i in range(len(termitems)):
439 expect(termitems[i].wdf, wdfs[i])
441 expect(len(termitems), len(freqs))
442 for termitem in termitems:
443 expect_exception(xapian.InvalidOperationError,
444 'Iterator has moved, and does not support random access',
445 getattr, termitem, 'termfreq')
447 expect(len(termitems), len(freqs))
448 for termitem in termitems:
449 expect_exception(xapian.InvalidOperationError,
450 'Iterator has moved, and does not support random access',
451 getattr, termitem, 'positer')
453 def test_dbdocument_iter():
454 """Test document terms iterator for document taken from a database.
457 db = setup_database()
459 doc = db.get_document(3)
461 # Make lists of the item contents
462 terms = []
463 wdfs = []
464 freqs = []
465 positers = []
466 for termitem in doc:
467 terms.append(termitem.term)
468 wdfs.append(termitem.wdf)
469 freqs.append(termitem.termfreq)
470 positers.append([pos for pos in termitem.positer])
472 expect(terms, ['it', 'two', 'warm', 'was'])
473 expect(wdfs, [1, 2, 1, 1])
474 expect(freqs, [5, 3, 4, 4])
475 expect(positers, [[2], [], [3], [1]])
477 # Make a list of the terms (so we can test if they're still valid
478 # once the iterator has moved on).
479 termitems = [termitem for termitem in doc]
481 expect(len(termitems), len(terms))
482 for i in range(len(termitems)):
483 expect(termitems[i].term, terms[i])
485 expect(len(termitems), len(wdfs))
486 for i in range(len(termitems)):
487 expect(termitems[i].wdf, wdfs[i])
489 expect(len(termitems), len(freqs))
490 for termitem in termitems:
491 expect_exception(xapian.InvalidOperationError,
492 'Iterator has moved, and does not support random access',
493 getattr, termitem, 'termfreq')
495 expect(len(termitems), len(freqs))
496 for termitem in termitems:
497 expect_exception(xapian.InvalidOperationError,
498 'Iterator has moved, and does not support random access',
499 getattr, termitem, 'positer')
501 def test_newdocument_iter():
502 """Test document terms iterator for newly created document.
505 doc = xapian.Document()
506 doc.set_data("was it warm? two")
507 doc.add_posting("was", 1)
508 doc.add_posting("it", 2)
509 doc.add_posting("warm", 3)
510 doc.add_term("two", 2)
512 # Make lists of the item contents
513 terms = []
514 wdfs = []
515 positers = []
516 for termitem in doc:
517 terms.append(termitem.term)
518 wdfs.append(termitem.wdf)
519 expect_exception(xapian.InvalidOperationError,
520 "Can't get term frequency from a document termlist "
521 "which is not associated with a database.",
522 getattr, termitem, 'termfreq')
523 positers.append([pos for pos in termitem.positer])
525 expect(terms, ['it', 'two', 'warm', 'was'])
526 expect(wdfs, [1, 2, 1, 1])
527 expect(positers, [[2], [], [3], [1]])
529 # Make a list of the terms (so we can test if they're still valid
530 # once the iterator has moved on).
531 termitems = [termitem for termitem in doc]
533 expect(len(termitems), len(terms))
534 for i in range(len(termitems)):
535 expect(termitems[i].term, terms[i])
537 expect(len(termitems), len(wdfs))
538 for i in range(len(termitems)):
539 expect(termitems[i].wdf, wdfs[i])
541 for termitem in termitems:
542 expect_exception(xapian.InvalidOperationError,
543 'Iterator has moved, and does not support random access',
544 getattr, termitem, 'termfreq')
546 expect(len(termitems), len(positers))
547 for termitem in termitems:
548 expect_exception(xapian.InvalidOperationError,
549 'Iterator has moved, and does not support random access',
550 getattr, termitem, 'positer')
552 def test_postinglist_iter():
553 """Test postinglist iterator on Database.
556 db = setup_database()
558 # Make lists of the item contents
559 docids = []
560 doclengths = []
561 wdfs = []
562 positers = []
563 for posting in db.postlist('it'):
564 docids.append(posting.docid)
565 doclengths.append(posting.doclength)
566 wdfs.append(posting.wdf)
567 positers.append([pos for pos in posting.positer])
569 expect(docids, [1, 2, 3, 4, 5])
570 expect(doclengths, [3, 3, 5, 8, 19])
571 expect(wdfs, [1, 1, 1, 1, 8])
572 expect(positers, [[1], [2], [2], [2], [2, 7]])
574 # Test skip_to().
575 pliter = db.postlist('it')
577 # skip to an item before the first item.
578 posting = pliter.skip_to(0)
579 expect((posting.docid, posting.doclength, posting.wdf,
580 [pos for pos in posting.positer]), (1, 3, 1, [1]))
582 # skip forwards to an item.
583 posting = pliter.skip_to(3)
584 expect((posting.docid, posting.doclength, posting.wdf,
585 [pos for pos in posting.positer]), (3, 5, 1, [2]))
587 # skip to same place (should return same item)
588 posting = pliter.skip_to(3)
589 expect((posting.docid, posting.doclength, posting.wdf,
590 [pos for pos in posting.positer]), (3, 5, 1, [2]))
592 # next() after a skip_to(), should return next item.
593 posting = next(pliter)
594 expect((posting.docid, posting.doclength, posting.wdf,
595 [pos for pos in posting.positer]), (4, 8, 1, [2]))
597 # skip to same place (should return same item)
598 posting = pliter.skip_to(4)
599 expect((posting.docid, posting.doclength, posting.wdf,
600 [pos for pos in posting.positer]), (4, 8, 1, [2]))
602 # skip backwards (should return same item)
603 posting = pliter.skip_to(2)
604 expect((posting.docid, posting.doclength, posting.wdf,
605 [pos for pos in posting.positer]), (4, 8, 1, [2]))
607 # skip to after end.
608 expect_exception(StopIteration, '', pliter.skip_to, 6)
609 # skip backwards (should still return StopIteration).
610 expect_exception(StopIteration, '', pliter.skip_to, 6)
611 # next should continue to return StopIteration.
612 expect_exception(StopIteration, '', next, pliter)
615 # Make a list of the postings (so we can test if they're still valid once
616 # the iterator has moved on).
617 postings = [posting for posting in db.postlist('it')]
619 expect(len(postings), len(docids))
620 for i in range(len(postings)):
621 expect(postings[i].docid, docids[i])
623 expect(len(postings), len(doclengths))
624 for i in range(len(postings)):
625 expect(postings[i].doclength, doclengths[i])
627 expect(len(postings), len(wdfs))
628 for i in range(len(postings)):
629 expect(postings[i].wdf, wdfs[i])
631 expect(len(postings), len(positers))
632 for posting in postings:
633 expect_exception(xapian.InvalidOperationError,
634 'Iterator has moved, and does not support random access',
635 getattr, posting, 'positer')
637 def test_valuestream_iter():
638 """Test a valuestream iterator on Database.
641 db = setup_database()
643 # Check basic iteration
644 expect([(item.docid, item.value) for item in db.valuestream(0)],
645 [(3, '\xa4'), (4, '\xa2'), (5, '\xa4')])
646 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
647 expect([(item.docid, item.value) for item in db.valuestream(5)],
648 [(5, "five")])
649 expect([(item.docid, item.value) for item in db.valuestream(9)],
650 [(5, "nine")])
652 # Test skip_to() on iterator with no values, and behaviours when called
653 # after already returning StopIteration.
654 i = db.valuestream(1)
655 expect_exception(StopIteration, "", i.skip_to, 1)
656 expect_exception(StopIteration, "", i.skip_to, 1)
657 i = db.valuestream(1)
658 expect_exception(StopIteration, "", i.skip_to, 1)
659 expect_exception(StopIteration, "", i.next)
660 i = db.valuestream(1)
661 expect_exception(StopIteration, "", i.next)
662 expect_exception(StopIteration, "", i.skip_to, 1)
664 # Test that skipping to a value works, and that skipping doesn't have to
665 # advance.
666 i = db.valuestream(0)
667 item = i.skip_to(4)
668 expect((item.docid, item.value), (4, '\xa2'))
669 item = i.skip_to(4)
670 expect((item.docid, item.value), (4, '\xa2'))
671 item = i.skip_to(1)
672 expect((item.docid, item.value), (4, '\xa2'))
673 item = i.skip_to(5)
674 expect((item.docid, item.value), (5, '\xa4'))
675 expect_exception(StopIteration, "", i.skip_to, 6)
677 # Test that alternating skip_to() and next() works.
678 i = db.valuestream(0)
679 item = i.next()
680 expect((item.docid, item.value), (3, '\xa4'))
681 item = i.skip_to(4)
682 expect((item.docid, item.value), (4, '\xa2'))
683 item = i.next()
684 expect((item.docid, item.value), (5, '\xa4'))
685 expect_exception(StopIteration, "", i.skip_to, 6)
687 # Test that next works correctly after skip_to() called with an earlier
688 # item.
689 i = db.valuestream(0)
690 item = i.skip_to(4)
691 expect((item.docid, item.value), (4, '\xa2'))
692 item = i.skip_to(1)
693 expect((item.docid, item.value), (4, '\xa2'))
694 item = i.next()
695 expect((item.docid, item.value), (5, '\xa4'))
697 # Test that next works correctly after skipping to last item
698 i = db.valuestream(0)
699 item = i.skip_to(5)
700 expect((item.docid, item.value), (5, '\xa4'))
701 expect_exception(StopIteration, "", i.next)
703 def test_position_iter():
704 """Test position iterator for a document in a database.
707 db = setup_database()
709 doc = db.get_document(5)
711 # Make lists of the item contents
712 positions = [position for position in db.positionlist(5, 'it')]
714 expect(positions, [2, 7])
716 def test_value_iter():
717 """Test iterators over list of values in a document.
720 db = setup_database()
721 doc = db.get_document(5)
723 items = list(doc.values())
724 expect(len(items), 3)
725 expect(items[0].num, 0)
726 expect(items[0].value, xapian.sortable_serialise(2))
727 expect(items[1].num, 5)
728 expect(items[1].value, 'five')
729 expect(items[2].num, 9)
730 expect(items[2].value, 'nine')
732 def test_synonyms_iter():
733 """Test iterators over list of synonyms in a database.
736 dbpath = 'db_test_synonyms_iter'
737 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
739 db.add_synonym('hello', 'hi')
740 db.add_synonym('hello', 'howdy')
742 expect([item for item in db.synonyms('foo')], [])
743 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
744 expect([item for item in db.synonym_keys()], ['hello'])
745 expect([item for item in db.synonym_keys('foo')], [])
746 expect([item for item in db.synonym_keys('he')], ['hello'])
747 expect([item for item in db.synonym_keys('hello')], ['hello'])
749 dbr=xapian.Database(dbpath)
750 expect([item for item in dbr.synonyms('foo')], [])
751 expect([item for item in dbr.synonyms('hello')], [])
752 expect([item for item in dbr.synonym_keys()], [])
753 expect([item for item in dbr.synonym_keys('foo')], [])
754 expect([item for item in dbr.synonym_keys('he')], [])
755 expect([item for item in dbr.synonym_keys('hello')], [])
757 db.commit()
759 expect([item for item in db.synonyms('foo')], [])
760 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
761 expect([item for item in db.synonym_keys()], ['hello'])
762 expect([item for item in db.synonym_keys('foo')], [])
763 expect([item for item in db.synonym_keys('he')], ['hello'])
764 expect([item for item in db.synonym_keys('hello')], ['hello'])
766 dbr=xapian.Database(dbpath)
767 expect([item for item in dbr.synonyms('foo')] , [])
768 expect([item for item in dbr.synonyms('hello')], ['hi', 'howdy'])
769 expect([item for item in dbr.synonym_keys()], ['hello'])
770 expect([item for item in dbr.synonym_keys('foo')], [])
771 expect([item for item in dbr.synonym_keys('he')], ['hello'])
772 expect([item for item in dbr.synonym_keys('hello')], ['hello'])
774 db.close()
775 expect(xapian.Database.check(dbpath), 0);
776 dbr.close()
777 shutil.rmtree(dbpath)
779 def test_metadata_keys_iter():
780 """Test iterators over list of metadata keys in a database.
783 dbpath = 'db_test_metadata_iter'
784 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
786 db.set_metadata('author', 'richard')
787 db.set_metadata('item1', 'hello')
788 db.set_metadata('item1', 'hi')
789 db.set_metadata('item2', 'howdy')
790 db.set_metadata('item3', '')
791 db.set_metadata('item4', 'goodbye')
792 db.set_metadata('item4', '')
793 db.set_metadata('type', 'greeting')
795 expect([item for item in db.metadata_keys()],
796 ['author', 'item1', 'item2', 'type'])
797 expect([item for item in db.metadata_keys('foo')], [])
798 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
799 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
800 expect([item for item in db.metadata_keys('type')], ['type'])
802 dbr=xapian.Database(dbpath)
803 expect([item for item in dbr.metadata_keys()], [])
804 expect([item for item in dbr.metadata_keys('foo')], [])
805 expect([item for item in dbr.metadata_keys('item')], [])
806 expect([item for item in dbr.metadata_keys('it')], [])
807 expect([item for item in dbr.metadata_keys('type')], [])
809 db.commit()
810 expect([item for item in db.metadata_keys()],
811 ['author', 'item1', 'item2', 'type'])
812 expect([item for item in db.metadata_keys('foo')], [])
813 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
814 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
815 expect([item for item in db.metadata_keys('type')], ['type'])
817 dbr=xapian.Database(dbpath)
818 expect([item for item in dbr.metadata_keys()],
819 ['author', 'item1', 'item2', 'type'])
820 expect([item for item in dbr.metadata_keys('foo')], [])
821 expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2'])
822 expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2'])
823 expect([item for item in dbr.metadata_keys('type')], ['type'])
825 db.close()
826 expect(xapian.Database.check(dbpath), 0);
827 dbr.close()
828 shutil.rmtree(dbpath)
830 def test_spell():
831 """Test basic spelling correction features.
834 dbpath = 'db_test_spell'
835 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
837 db.add_spelling('hello')
838 db.add_spelling('mell', 2)
839 expect(db.get_spelling_suggestion('hell'), 'mell')
840 expect([(item.term, item.termfreq) for item in db.spellings()], [('hello', 1), ('mell', 2)])
841 dbr=xapian.Database(dbpath)
842 expect(dbr.get_spelling_suggestion('hell'), '')
843 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
844 db.commit()
845 dbr=xapian.Database(dbpath)
846 expect(db.get_spelling_suggestion('hell'), 'mell')
847 expect(dbr.get_spelling_suggestion('hell'), 'mell')
848 expect([(item.term, item.termfreq) for item in dbr.spellings()], [('hello', 1), ('mell', 2)])
850 db.close()
851 dbr.close()
852 shutil.rmtree(dbpath)
854 def test_queryparser_custom_vrp():
855 """Test QueryParser with a custom (in python) ValueRangeProcessor.
858 class MyVRP(xapian.ValueRangeProcessor):
859 def __init__(self):
860 xapian.ValueRangeProcessor.__init__(self)
862 def __call__(self, begin, end):
863 return (7, "A"+begin, "B"+end)
865 queryparser = xapian.QueryParser()
866 myvrp = MyVRP()
868 queryparser.add_valuerangeprocessor(myvrp)
869 query = queryparser.parse_query('5..8')
871 expect(str(query),
872 'Query(0 * VALUE_RANGE 7 A5 B8)')
874 def test_queryparser_custom_vrp_deallocation():
875 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
878 class MyVRP(xapian.ValueRangeProcessor):
879 def __init__(self):
880 xapian.ValueRangeProcessor.__init__(self)
882 def __call__(self, begin, end):
883 return (7, "A"+begin, "B"+end)
885 def make_parser():
886 queryparser = xapian.QueryParser()
887 myvrp = MyVRP()
888 queryparser.add_valuerangeprocessor(myvrp)
889 return queryparser
891 queryparser = make_parser()
892 query = queryparser.parse_query('5..8')
894 expect(str(query),
895 'Query(0 * VALUE_RANGE 7 A5 B8)')
897 def test_scale_weight():
898 """Test query OP_SCALE_WEIGHT feature.
901 db = setup_database()
902 for mult in (0, 1, 2.5):
903 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
904 mult)
905 query1 = xapian.Query("it")
906 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
908 enquire = xapian.Enquire(db)
909 enquire.set_query(query1)
910 mset1 = enquire.get_mset(0, 10)
911 enquire.set_query(query2)
912 mset2 = enquire.get_mset(0, 10)
913 if mult <= 0:
914 expected = [(0, item.docid) for item in mset1]
915 expected.sort()
916 else:
917 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
918 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
920 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
921 query1 = xapian.Query("it")
922 expect_exception(xapian.InvalidArgumentError,
923 "OP_SCALE_WEIGHT requires factor >= 0",
924 xapian.Query,
925 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
928 def test_weight_normalise():
929 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
931 This test first runs a search (asking for no results) to get the maximum
932 possible weight for a query, and then checks that the results of
933 MSet.get_max_possible() match this.
935 This tests that the get_max_possible() value is correct (though it isn't
936 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
937 compensate correctly.
940 db = setup_database()
941 for query in (
942 "it",
943 "was",
944 "it was",
945 "it was four",
946 "it was four five",
947 "\"was it warm\" four notpresent",
948 "notpresent",
950 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
951 qp = xapian.QueryParser()
952 query1 = qp.parse_query(query)
953 enquire = xapian.Enquire(db)
954 enquire.set_query(query1)
955 mset1 = enquire.get_mset(0, 0)
957 # Check the max_attained value is 0 - this gives us some reassurance
958 # that the match didn't actually do the work of calculating any
959 # results.
960 expect(mset1.get_max_attained(), 0)
962 max_possible = mset1.get_max_possible()
963 if query == "notpresent":
964 expect(max_possible, 0)
965 continue
966 mult = 1.0 / max_possible
967 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
969 enquire = xapian.Enquire(db)
970 enquire.set_query(query2)
971 mset2 = enquire.get_mset(0, 10)
972 # max_possible should be 1 (excluding rounding errors) for mset2
973 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
974 for item in mset2:
975 expect(item.weight > 0, True)
976 expect(item.weight <= 1, True)
979 def test_valuesetmatchdecider():
980 """Simple tests of the ValueSetMatchDecider class
983 md = xapian.ValueSetMatchDecider(0, True)
984 doc = xapian.Document()
985 expect(md(doc), False)
987 md.add_value('foo')
988 doc.add_value(0, 'foo')
989 expect(md(doc), True)
991 md.remove_value('foo')
992 expect(md(doc), False)
994 md = xapian.ValueSetMatchDecider(0, False)
995 expect(md(doc), True)
997 md.add_value('foo')
998 expect(md(doc), False)
1001 def test_postingsource():
1002 """Simple test of the PostingSource class.
1005 class OddPostingSource(xapian.PostingSource):
1006 def __init__(self, max):
1007 xapian.PostingSource.__init__(self)
1008 self.max = max
1010 def init(self, db):
1011 self.current = -1
1012 self.weight = db.get_doccount() + 1
1013 self.set_maxweight(self.weight)
1015 def get_termfreq_min(self): return 0
1016 def get_termfreq_est(self): return int(self.max / 2)
1017 def get_termfreq_max(self): return self.max
1018 def next(self, minweight):
1019 self.current += 2
1020 self.weight -= 1.0;
1021 self.set_maxweight(self.weight)
1022 def at_end(self): return self.current > self.max
1023 def get_docid(self): return self.current
1024 def get_weight(self): return self.weight
1026 dbpath = 'db_test_postingsource'
1027 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1028 for id in range(10):
1029 doc = xapian.Document()
1030 db.add_document(doc)
1032 # Do a dance to check that the posting source doesn't get dereferenced too
1033 # soon in various cases.
1034 def mkenq(db):
1035 # First - check that it's kept when the source goes out of scope.
1036 def mkquery():
1037 source = OddPostingSource(10)
1038 # The posting source is inside a list to check that case is
1039 # correctly handled.
1040 return xapian.Query(xapian.Query.OP_OR,
1041 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1043 # Check that it's kept when the query goes out of scope.
1044 def submkenq():
1045 query = mkquery()
1046 enquire = xapian.Enquire(db)
1047 enquire.set_query(query)
1048 return enquire
1050 # Check it's kept when the query is retrieved from enquire and put into
1051 # a new enquire.
1052 def submkenq2():
1053 enq1 = submkenq()
1054 enquire = xapian.Enquire(db)
1055 enquire.set_query(enq1.get_query())
1056 return enquire
1058 return submkenq2()
1060 enquire = mkenq(db)
1061 mset = enquire.get_mset(0, 10)
1063 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1064 expect(mset[0].weight, db.get_doccount())
1066 db.close()
1067 expect(xapian.Database.check(dbpath), 0);
1068 shutil.rmtree(dbpath)
1070 def test_postingsource2():
1071 """Simple test of the PostingSource class.
1074 dbpath = 'db_test_postingsource2'
1075 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1076 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1077 for id in range(10):
1078 doc = xapian.Document()
1079 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1080 db.add_document(doc)
1082 source = xapian.ValueWeightPostingSource(1)
1083 query = xapian.Query(source)
1084 del source # Check that query keeps a reference to it.
1086 enquire = xapian.Enquire(db)
1087 enquire.set_query(query)
1088 mset = enquire.get_mset(0, 10)
1090 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1092 db.close()
1093 expect(xapian.Database.check(dbpath), 0);
1094 shutil.rmtree(dbpath)
1096 def test_postingsource3():
1097 """Test that ValuePostingSource can be usefully subclassed.
1100 dbpath = 'db_test_postingsource3'
1101 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1102 vals = (1, 3, 2, 4)
1103 for wt in vals:
1104 doc = xapian.Document()
1105 doc.add_value(1, xapian.sortable_serialise(wt))
1106 db.add_document(doc)
1108 class PyValuePostingSource(xapian.ValuePostingSource):
1109 def __init__(self, slot):
1110 xapian.ValuePostingSource.__init__(self, slot)
1112 def init(self, db):
1113 xapian.ValuePostingSource.init(self, db)
1114 self.current = -1
1115 slot = self.get_slot()
1116 ub = db.get_value_upper_bound(slot)
1117 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1119 def next(self, minweight):
1120 return xapian.ValuePostingSource.next(self, minweight)
1121 def get_weight(self):
1122 value = self.get_value()
1123 return xapian.sortable_unserialise(value) ** 3
1125 source = PyValuePostingSource(1)
1126 query = xapian.Query(source)
1127 #del source # Check that query keeps a reference to it.
1129 enquire = xapian.Enquire(db)
1130 enquire.set_query(query)
1131 mset = enquire.get_mset(0, 10)
1133 expect([item.docid for item in mset], [4, 2, 3, 1])
1135 db.close()
1136 expect(xapian.Database.check(dbpath), 0);
1137 shutil.rmtree(dbpath)
1139 def test_value_stats():
1140 """Simple test of being able to get value statistics.
1143 dbpath = 'db_test_value_stats'
1144 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1146 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1147 for id in range(10):
1148 doc = xapian.Document()
1149 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1150 db.add_document(doc)
1152 expect(db.get_value_freq(0), 0)
1153 expect(db.get_value_lower_bound(0), "")
1154 expect(db.get_value_upper_bound(0), "")
1155 expect(db.get_value_freq(1), 10)
1156 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1157 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1158 expect(db.get_value_freq(2), 0)
1159 expect(db.get_value_lower_bound(2), "")
1160 expect(db.get_value_upper_bound(2), "")
1162 db.close()
1163 expect(xapian.Database.check(dbpath), 0);
1164 shutil.rmtree(dbpath)
1166 def test_get_uuid():
1167 """Test getting UUIDs from databases.
1170 dbpath = 'db_test_get_uuid'
1171 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1172 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1173 dbr1 = xapian.Database(dbpath + "1")
1174 dbr2 = xapian.Database(dbpath + "2")
1175 expect(db1.get_uuid() != db2.get_uuid(), True)
1176 expect(db1.get_uuid(), dbr1.get_uuid())
1177 expect(db2.get_uuid(), dbr2.get_uuid())
1179 db = xapian.Database()
1180 db.add_database(db1)
1181 expect(db1.get_uuid(), db.get_uuid())
1183 db1.close()
1184 db2.close()
1185 dbr1.close()
1186 dbr2.close()
1187 db.close()
1188 shutil.rmtree(dbpath + "1")
1189 shutil.rmtree(dbpath + "2")
1191 def test_director_exception():
1192 """Test handling of an exception raised in a director.
1195 db = setup_database()
1196 query = xapian.Query('it')
1197 enq = xapian.Enquire(db)
1198 enq.set_query(query)
1199 class TestException(Exception):
1200 def __init__(self, a, b):
1201 Exception.__init__(self, a + b)
1203 rset = xapian.RSet()
1204 rset.add_document(1)
1205 class EDecider(xapian.ExpandDecider):
1206 def __call__(self, term):
1207 raise TestException("foo", "bar")
1208 edecider = EDecider()
1209 expect_exception(TestException, "foobar", edecider, "foo")
1210 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1212 class MDecider(xapian.MatchDecider):
1213 def __call__(self, doc):
1214 raise TestException("foo", "bar")
1215 mdecider = MDecider()
1216 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1217 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1219 def check_vals(db, vals):
1220 """Check that the values in slot 1 are as in vals.
1223 for docid in xrange(1, db.get_lastdocid() + 1):
1224 val = db.get_document(docid).get_value(1)
1225 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1227 def test_value_mods():
1228 """Test handling of modifications to values.
1231 dbpath = 'db_test_value_mods'
1232 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1233 random.seed(42)
1234 doccount = 1000
1235 vals = {}
1237 # Add a value to all the documents
1238 for num in xrange(1, doccount):
1239 doc=xapian.Document()
1240 val = 'val%d' % num
1241 doc.add_value(1, val)
1242 db.add_document(doc)
1243 vals[num] = val
1244 db.commit()
1245 check_vals(db, vals)
1247 # Modify one of the values (this is a regression test which failed with the
1248 # initial implementation of streaming values).
1249 doc = xapian.Document()
1250 val = 'newval0'
1251 doc.add_value(1, val)
1252 db.replace_document(2, doc)
1253 vals[2] = val
1254 db.commit()
1255 check_vals(db, vals)
1257 # Do some random modifications.
1258 for count in xrange(1, doccount * 2):
1259 docid = random.randint(1, doccount)
1260 doc = xapian.Document()
1262 if count % 5 == 0:
1263 val = ''
1264 else:
1265 val = 'newval%d' % count
1266 doc.add_value(1, val)
1267 db.replace_document(docid, doc)
1268 vals[docid] = val
1270 # Check the values before and after modification.
1271 check_vals(db, vals)
1272 db.commit()
1273 check_vals(db, vals)
1275 # Delete all the values which are non-empty, in a random order.
1276 keys = [key for key, val in vals.iteritems() if val != '']
1277 random.shuffle(keys)
1278 for key in keys:
1279 doc = xapian.Document()
1280 db.replace_document(key, doc)
1281 vals[key] = ''
1282 check_vals(db, vals)
1283 db.commit()
1284 check_vals(db, vals)
1286 db.close()
1287 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1288 shutil.rmtree(dbpath)
1290 def test_serialise_document():
1291 """Test serialisation of documents.
1294 doc = xapian.Document()
1295 doc.add_term('foo', 2)
1296 doc.add_value(1, 'bar')
1297 doc.set_data('baz')
1298 s = doc.serialise()
1299 doc2 = xapian.Document.unserialise(s)
1300 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1301 expect(len(list(doc.termlist())), 1)
1302 expect([(item.term, item.wdf) for item in doc.termlist()],
1303 [(item.term, item.wdf) for item in doc2.termlist()])
1304 expect([(item.num, item.value) for item in doc.values()],
1305 [(item.num, item.value) for item in doc2.values()])
1306 expect(doc.get_data(), doc2.get_data())
1307 expect(doc.get_data(), 'baz')
1309 db = setup_database()
1310 doc = db.get_document(1)
1311 s = doc.serialise()
1312 doc2 = xapian.Document.unserialise(s)
1313 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1314 expect(len(list(doc.termlist())), 3)
1315 expect([(item.term, item.wdf) for item in doc.termlist()],
1316 [(item.term, item.wdf) for item in doc2.termlist()])
1317 expect([(item.num, item.value) for item in doc.values()],
1318 [(item.num, item.value) for item in doc2.values()])
1319 expect(doc.get_data(), doc2.get_data())
1320 expect(doc.get_data(), 'is it cold?')
1322 def test_serialise_query():
1323 """Test serialisation of queries.
1326 q = xapian.Query()
1327 q2 = xapian.Query.unserialise(q.serialise())
1328 expect(str(q), str(q2))
1329 expect(str(q), 'Query()')
1331 q = xapian.Query('hello')
1332 q2 = xapian.Query.unserialise(q.serialise())
1333 expect(str(q), str(q2))
1334 expect(str(q), 'Query(hello)')
1336 q = xapian.Query(xapian.Query.OP_OR, ('hello', 'world'))
1337 q2 = xapian.Query.unserialise(q.serialise())
1338 expect(str(q), str(q2))
1339 expect(str(q), 'Query((hello OR world))')
1341 def test_preserve_query_parser_stopper():
1342 """Test preservation of stopper set on query parser.
1345 def make_qp():
1346 queryparser = xapian.QueryParser()
1347 stopper = xapian.SimpleStopper()
1348 stopper.add('to')
1349 stopper.add('not')
1350 queryparser.set_stopper(stopper)
1351 del stopper
1352 return queryparser
1353 queryparser = make_qp()
1354 query = queryparser.parse_query('to be')
1355 expect([term for term in queryparser.stoplist()], ['to'])
1357 def test_preserve_term_generator_stopper():
1358 """Test preservation of stopper set on term generator.
1361 def make_tg():
1362 termgen = xapian.TermGenerator()
1363 termgen.set_stemmer(xapian.Stem('en'))
1364 stopper = xapian.SimpleStopper()
1365 stopper.add('to')
1366 stopper.add('not')
1367 termgen.set_stopper(stopper)
1368 del stopper
1369 return termgen
1370 termgen = make_tg()
1372 termgen.index_text('to be')
1373 doc = termgen.get_document()
1374 terms = [term.term for term in doc.termlist()]
1375 terms.sort()
1376 expect(terms, ['Zbe', 'be', 'to'])
1378 def test_preserve_enquire_sorter():
1379 """Test preservation of sorter set on enquire.
1382 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1383 doc = xapian.Document()
1384 doc.add_term('foo')
1385 doc.add_value(1, '1')
1386 db.add_document(doc)
1387 db.add_document(doc)
1389 def make_enq1(db):
1390 enq = xapian.Enquire(db)
1391 sorter = xapian.MultiValueKeyMaker()
1392 enq.set_sort_by_key(sorter, False)
1393 del sorter
1394 return enq
1395 enq = make_enq1(db)
1396 enq.set_query(xapian.Query('foo'))
1397 enq.get_mset(0, 10)
1399 def make_enq2(db):
1400 enq = xapian.Enquire(db)
1401 sorter = xapian.MultiValueKeyMaker()
1402 enq.set_sort_by_key_then_relevance(sorter, False)
1403 del sorter
1404 return enq
1405 enq = make_enq2(db)
1406 enq.set_query(xapian.Query('foo'))
1407 enq.get_mset(0, 10)
1409 def make_enq3(db):
1410 enq = xapian.Enquire(db)
1411 sorter = xapian.MultiValueKeyMaker()
1412 enq.set_sort_by_relevance_then_key(sorter, False)
1413 del sorter
1414 return enq
1415 enq = make_enq3(db)
1416 enq.set_query(xapian.Query('foo'))
1417 enq.get_mset(0, 10)
1419 def test_matchspy():
1420 """Test use of matchspies.
1423 db = setup_database()
1424 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1425 enq = xapian.Enquire(db)
1426 enq.set_query(query)
1428 def set_matchspy_deref(enq):
1429 """Set a matchspy, and then drop the reference, to check that it
1430 doesn't get deleted too soon.
1432 spy = xapian.ValueCountMatchSpy(0)
1433 enq.add_matchspy(spy)
1434 del spy
1435 set_matchspy_deref(enq)
1436 mset = enq.get_mset(0, 10)
1437 expect(len(mset), 5)
1439 spy = xapian.ValueCountMatchSpy(0)
1440 enq.add_matchspy(spy)
1441 # Regression test for clear_matchspies() - used to always raise an
1442 # exception due to a copy and paste error in its definition.
1443 enq.clear_matchspies()
1444 mset = enq.get_mset(0, 10)
1445 expect([item for item in spy.values()], [])
1447 enq.add_matchspy(spy)
1448 mset = enq.get_mset(0, 10)
1449 expect(spy.get_total(), 5)
1450 expect([(item.term, item.termfreq) for item in spy.values()], [
1451 (xapian.sortable_serialise(1.5), 1),
1452 (xapian.sortable_serialise(2), 2),
1454 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1455 (xapian.sortable_serialise(2), 2),
1456 (xapian.sortable_serialise(1.5), 1),
1459 def test_import_star():
1460 """Test that "from xapian import *" works.
1462 This is a regression test - this failed in the 1.2.0 release.
1463 It's not normally good style to use it, but it should work anyway!
1466 import test_xapian_star
1468 def test_latlongcoords_iter():
1469 """Test LatLongCoordsIterator wrapping.
1472 coords = xapian.LatLongCoords()
1473 expect([c for c in coords], [])
1474 coords.append(xapian.LatLongCoord(0, 0))
1475 coords.append(xapian.LatLongCoord(0, 1))
1476 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1477 'Xapian::LatLongCoord(0, 1)'])
1480 def test_compactor():
1481 """Test that xapian.Compactor works.
1484 tmpdir = tempfile.mkdtemp()
1485 db1 = db2 = db3 = None
1486 try:
1487 db1path = os.path.join(tmpdir, 'db1')
1488 db2path = os.path.join(tmpdir, 'db2')
1489 db3path = os.path.join(tmpdir, 'db3')
1491 # Set up a couple of sample input databases
1492 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1493 doc1 = xapian.Document()
1494 doc1.add_term('Hello')
1495 doc1.add_term('Hello1')
1496 doc1.add_value(0, 'Val1')
1497 db1.set_metadata('key', '1')
1498 db1.set_metadata('key1', '1')
1499 db1.add_document(doc1)
1500 db1.commit()
1502 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1503 doc2 = xapian.Document()
1504 doc2.add_term('Hello')
1505 doc2.add_term('Hello2')
1506 doc2.add_value(0, 'Val2')
1507 db2.set_metadata('key', '2')
1508 db2.set_metadata('key2', '2')
1509 db2.add_document(doc2)
1510 db2.commit()
1512 # Compact with the default compactor
1513 # Metadata conflicts are resolved by picking the first value
1514 c = xapian.Compactor()
1515 c.add_source(db1path)
1516 c.add_source(db2path)
1517 c.set_destdir(db3path)
1518 c.compact()
1520 db3 = xapian.Database(db3path)
1521 expect([(item.term, item.termfreq) for item in db3.allterms()],
1522 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1523 expect(db3.get_document(1).get_value(0), 'Val1')
1524 expect(db3.get_document(2).get_value(0), 'Val2')
1525 expect(db3.get_metadata('key'), '1')
1526 expect(db3.get_metadata('key1'), '1')
1527 expect(db3.get_metadata('key2'), '2')
1529 context("testing a custom compactor which merges duplicate metadata")
1530 class MyCompactor(xapian.Compactor):
1531 def __init__(self):
1532 xapian.Compactor.__init__(self)
1533 self.log = []
1535 def set_status(self, table, status):
1536 if len(status) == 0:
1537 self.log.append('Starting %s' % table)
1538 else:
1539 self.log.append('%s: %s' % (table, status))
1541 def resolve_duplicate_metadata(self, key, vals):
1542 return ','.join(vals)
1544 c = MyCompactor()
1545 c.add_source(db1path)
1546 c.add_source(db2path)
1547 c.set_destdir(db3path)
1548 c.compact()
1549 log = '\n'.join(c.log)
1550 # Check we got some messages in the log
1551 expect('Starting postlist' in log, True)
1553 db3 = xapian.Database(db3path)
1554 expect([(item.term, item.termfreq) for item in db3.allterms()],
1555 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1556 expect(db3.get_metadata('key'), '1,2')
1557 expect(db3.get_metadata('key1'), '1')
1558 expect(db3.get_metadata('key2'), '2')
1560 finally:
1561 if db1 is not None:
1562 db1.close()
1563 if db2 is not None:
1564 db2.close()
1565 if db3 is not None:
1566 db3.close()
1568 shutil.rmtree(tmpdir)
1570 def test_leak_mset_items():
1571 """Test that items property of MSet doesn't leak
1574 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1575 doc = xapian.Document()
1576 doc.add_term('drip')
1577 db.add_document(doc)
1578 enq = xapian.Enquire(db)
1579 enq.set_query(xapian.Query('drip'))
1580 mset = enq.get_mset(0, 10)
1582 # Prior to 1.2.4 this next line leaked an object.
1583 mset.items
1585 def test_custom_matchspy():
1586 class MSpy(xapian.MatchSpy):
1587 def __init__(self):
1588 xapian.MatchSpy.__init__(self)
1589 self.count = 0
1591 def __call__(self, doc, weight):
1592 self.count += 1
1594 mspy = MSpy()
1596 db = setup_database()
1597 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1599 enquire = xapian.Enquire(db)
1600 enquire.add_matchspy(mspy)
1601 enquire.set_query(query)
1602 mset = enquire.get_mset(0, 1)
1603 expect(len(mset), 1)
1604 expect(mspy.count >= 1, True)
1606 expect(db.get_doccount(), 5)
1608 def test_removed_features():
1609 ok = True
1610 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1611 doc = xapian.Document()
1612 enq = xapian.Enquire(db)
1613 eset = xapian.ESet()
1614 mset = xapian.MSet()
1615 query = xapian.Query()
1616 qp = xapian.QueryParser()
1617 titer = xapian._TermIterator()
1618 postiter = xapian._PostingIterator()
1620 def check_missing(obj, attr):
1621 expect_exception(AttributeError, None, getattr, obj, attr)
1623 check_missing(xapian, 'Stem_get_available_languages')
1624 check_missing(xapian, 'TermIterator')
1625 check_missing(xapian, 'PositionIterator')
1626 check_missing(xapian, 'PostingIterator')
1627 check_missing(xapian, 'ValueIterator')
1628 check_missing(xapian, 'MSetIterator')
1629 check_missing(xapian, 'ESetIterator')
1630 check_missing(db, 'allterms_begin')
1631 check_missing(db, 'allterms_end')
1632 check_missing(db, 'metadata_keys_begin')
1633 check_missing(db, 'metadata_keys_end')
1634 check_missing(db, 'synonym_keys_begin')
1635 check_missing(db, 'synonym_keys_end')
1636 check_missing(db, 'synonyms_begin')
1637 check_missing(db, 'synonyms_end')
1638 check_missing(db, 'spellings_begin')
1639 check_missing(db, 'spellings_end')
1640 check_missing(db, 'positionlist_begin')
1641 check_missing(db, 'positionlist_end')
1642 check_missing(db, 'postlist_begin')
1643 check_missing(db, 'postlist_end')
1644 check_missing(db, 'termlist_begin')
1645 check_missing(db, 'termlist_end')
1646 check_missing(doc, 'termlist_begin')
1647 check_missing(doc, 'termlist_end')
1648 check_missing(doc, 'values_begin')
1649 check_missing(doc, 'values_end')
1650 check_missing(enq, 'get_matching_terms_begin')
1651 check_missing(enq, 'get_matching_terms_end')
1652 check_missing(eset, 'begin')
1653 check_missing(eset, 'end')
1654 check_missing(mset, 'begin')
1655 check_missing(mset, 'end')
1656 check_missing(postiter, 'positionlist_begin')
1657 check_missing(postiter, 'positionlist_end')
1658 check_missing(query, 'get_terms_begin')
1659 check_missing(query, 'get_terms_end')
1660 check_missing(qp, 'stoplist_begin')
1661 check_missing(qp, 'stoplist_end')
1662 check_missing(qp, 'unstem_begin')
1663 check_missing(qp, 'unstem_end')
1664 check_missing(titer, 'positionlist_begin')
1665 check_missing(titer, 'positionlist_end')
1667 result = True
1669 # Run all tests (ie, callables with names starting "test_").
1670 def run():
1671 global result
1672 if not runtests(globals(), sys.argv[1:]):
1673 result = False
1675 print "Running tests without threads"
1676 run()
1678 if have_threads:
1679 print "Running tests with threads"
1681 # This testcase seems to just block when run in a thread, so just remove
1682 # it before running tests in a thread.
1683 del test_import_star
1685 t = threading.Thread(name='test runner', target=run)
1686 t.start()
1687 # Block until the thread has completed so the thread gets a chance to exit
1688 # with error status.
1689 t.join()
1691 if not result:
1692 sys.exit(1)
1694 # vim:syntax=python:set expandtab: