Add DB_BACKEND_INMEMORY; deprecate InMemory::open()
[xapian.git] / xapian-bindings / python3 / pythontest.py
blob1588f55eee5f01d54630f940398ffc95a6f66ea3
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
75 # Test that str is rejected by sortable_unserialise().
76 try:
77 xapian.sortable_unserialise("unicode")
78 except TypeError as e:
79 expect(str(e), 'expected bytes, str found')
81 return db
83 def test_exception_base():
84 """Check that xapian exceptions have Exception as a base class.
86 """
87 try:
88 raise xapian.InvalidOperationError("Test exception")
89 except Exception as e:
90 pass
92 def test_mset_iter():
93 """Test iterators over MSets.
95 """
96 db = setup_database()
97 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
99 enquire = xapian.Enquire(db)
100 enquire.set_query(query)
101 mset = enquire.get_mset(0, 10)
102 items = [item for item in mset]
103 expect(len(items), 5)
104 expect(len(mset), len(items), "Expected number of items to be length of mset")
106 context("testing returned item from mset")
107 expect(items[2].docid, 4)
108 expect(items[2].rank, 2)
109 expect(items[2].percent, 86)
110 expect(items[2].collapse_key, b'')
111 expect(items[2].collapse_count, 0)
112 expect(items[2].document.get_data(), b'was it warm? three')
114 # Check iterators for sub-msets against the whole mset.
115 for start in range(0, 6):
116 for maxitems in range(0, 6):
117 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
118 submset = enquire.get_mset(start, maxitems)
119 num = 0
120 for item in submset:
121 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
122 expect(item.rank, num + start)
124 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
125 hit = submset.get_hit(num)
126 expect(hit.docid, item.docid)
127 expect(hit.rank, item.rank)
128 expect(hit.percent, item.percent)
129 expect(hit.document.get_data(), item.document.get_data())
130 expect(hit.collapse_key, item.collapse_key)
131 expect(hit.collapse_count, item.collapse_count)
133 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
134 hit = mset.get_hit(num + start)
135 expect(hit.docid, item.docid)
136 expect(hit.rank, item.rank)
137 expect(hit.percent, item.percent)
138 expect(hit.document.get_data(), item.document.get_data())
139 expect(hit.collapse_key, item.collapse_key)
140 expect(hit.collapse_count, item.collapse_count)
142 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
143 expect(submset[num].docid, item.docid)
144 expect(submset[num].rank, item.rank)
145 expect(submset[num].percent, item.percent)
146 expect(submset[num].document.get_data(), item.document.get_data())
147 expect(submset[num].collapse_key, item.collapse_key)
148 expect(submset[num].collapse_count, item.collapse_count)
150 num += 1
152 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
153 # Test out-of-range access to mset:
154 expect_exception(IndexError, 'Mset index out of range',
155 submset.__getitem__, -10)
156 expect_exception(IndexError, 'Mset index out of range',
157 submset.__getitem__, 10)
158 expect_exception(IndexError, 'Mset index out of range',
159 submset.__getitem__, -1-len(submset))
160 expect_exception(IndexError, 'Mset index out of range',
161 submset.__getitem__, len(submset))
163 # Check that the item contents remain valid when the iterator has
164 # moved on.
165 saved_items = [item for item in submset]
166 for num in range(len(saved_items)):
167 item = saved_items[num]
168 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
169 expect(submset[num].docid, item.docid)
170 expect(submset[num].rank, item.rank)
171 expect(submset[num].percent, item.percent)
172 expect(submset[num].document.get_data(), item.document.get_data())
173 expect(submset[num].collapse_key, item.collapse_key)
174 expect(submset[num].collapse_count, item.collapse_count)
176 # Check that the right number of items exist in the mset.
177 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
178 items = [item for item in submset]
179 expect(len(items), min(maxitems, 5 - start))
180 expect(len(submset), min(maxitems, 5 - start))
182 def test_eset_iter():
183 """Test iterators over ESets.
186 db = setup_database()
187 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
188 rset = xapian.RSet()
189 rset.add_document(3)
191 context("getting eset items without a query")
192 enquire = xapian.Enquire(db)
193 eset = enquire.get_eset(10, rset)
194 items = [item for item in eset]
195 expect(len(items), 3)
196 expect(len(items), len(eset))
198 context("getting eset items with a query")
199 enquire = xapian.Enquire(db)
200 enquire.set_query(query)
201 eset = enquire.get_eset(10, rset)
202 items2 = [item for item in eset]
203 expect(len(items2), 2)
204 expect(len(items2), len(eset))
206 context("comparing eset items with a query to those without")
207 expect(items2[0].term, items[0].term)
208 expect(items2[1].term, items[2].term)
210 context("comparing eset weights with a query to those without")
211 expect(items2[0].weight, items[0].weight)
212 expect(items2[1].weight, items[2].weight)
214 def test_matchingterms_iter():
215 """Test Enquire.matching_terms iterator.
218 db = setup_database()
219 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
221 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
223 enquire = xapian.Enquire(db)
224 enquire.set_query(query)
225 mset = enquire.get_mset(0, 10)
227 for item in mset:
228 # Make a list of the term names
229 mterms = [term for term in enquire.matching_terms(item.docid)]
230 mterms2 = [term for term in enquire.matching_terms(item)]
231 expect(mterms, mterms2)
233 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
234 expect(mterms, [b'it', b'two', b'warm', b'was'])
236 def test_queryterms_iter():
237 """Test Query term iterator.
240 db = setup_database()
241 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
243 # Make a list of the term names
244 terms = [term for term in query]
245 expect(terms, [b'it', b'two', b'warm', b'was'])
247 def test_queryparser_stoplist_iter():
248 """Test QueryParser stoplist iterator.
251 stemmer = xapian.Stem('en')
253 # Check behaviour without having set a stoplist.
254 queryparser = xapian.QueryParser()
255 queryparser.set_stemmer(stemmer)
256 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
257 expect([term for term in queryparser.stoplist()], [])
258 query = queryparser.parse_query('to be or not to be is the questions')
259 expect([term for term in queryparser.stoplist()], [])
260 expect(str(query),
261 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
262 'Zis@7 OR Zthe@8 OR Zquestion@9))')
264 # Check behaviour with a stoplist, but no stemmer
265 queryparser = xapian.QueryParser()
266 stopper = xapian.SimpleStopper()
267 stopper.add('to')
268 stopper.add('not')
269 stopper.add('question')
270 queryparser.set_stopper(stopper)
271 expect([term for term in queryparser.stoplist()], [])
272 query = queryparser.parse_query('to be or not to be is the questions')
274 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
275 expect(str(query),
276 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
278 # Check behaviour with a stoplist and a stemmer
279 queryparser.set_stemmer(stemmer)
280 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
281 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to']) # Shouldn't have changed since previous query.
282 query = queryparser.parse_query('to be or not to be is the questions')
284 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
285 expect(str(query),
286 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
288 def test_queryparser_unstem_iter():
289 """Test QueryParser unstemlist iterator.
292 stemmer = xapian.Stem('en')
294 queryparser = xapian.QueryParser()
295 expect([term for term in queryparser.unstemlist('to')], [])
296 expect([term for term in queryparser.unstemlist('question')], [])
297 expect([term for term in queryparser.unstemlist('questions')], [])
298 query = queryparser.parse_query('to question questions')
300 expect([term for term in queryparser.unstemlist('to')], [b'to'])
301 expect([term for term in queryparser.unstemlist('question')], [b'question'])
302 expect([term for term in queryparser.unstemlist('questions')], [b'questions'])
303 expect(str(query),
304 'Query((to@1 OR question@2 OR questions@3))')
307 queryparser = xapian.QueryParser()
308 queryparser.set_stemmer(stemmer)
309 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
310 expect([term for term in queryparser.unstemlist('Zto')], [])
311 expect([term for term in queryparser.unstemlist('Zquestion')], [])
312 expect([term for term in queryparser.unstemlist('Zquestions')], [])
313 query = queryparser.parse_query('to question questions')
315 expect([term for term in queryparser.unstemlist('Zto')], [b'to'])
316 expect([term for term in queryparser.unstemlist('Zquestion')], [b'question', b'questions'])
317 expect([term for term in queryparser.unstemlist('Zquestions')], [])
318 expect(str(query),
319 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
321 def test_allterms_iter():
322 """Test all-terms iterator on Database.
325 db = setup_database()
327 context("making a list of the term names and frequencies")
328 terms = []
329 freqs = []
330 for termitem in db:
331 terms.append(termitem.term)
332 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
333 freqs.append(termitem.termfreq)
334 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
336 context("checking that items are no longer valid once the iterator has moved on");
337 termitems = [termitem for termitem in db]
339 expect(len(termitems), len(terms))
340 for i in range(len(termitems)):
341 expect(termitems[i].term, terms[i])
343 expect(len(termitems), len(freqs))
344 for termitem in termitems:
345 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
347 context("checking that restricting the terms iterated with a prefix works")
348 prefix_terms = []
349 prefix_freqs = []
350 for i in range(len(terms)):
351 if terms[i].startswith(b't'):
352 prefix_terms.append(terms[i])
353 prefix_freqs.append(freqs[i])
354 i = 0
355 for termitem in db.allterms('t'):
356 expect(termitem.term, prefix_terms[i])
357 expect(termitem.termfreq, prefix_freqs[i])
358 i += 1
359 expect(len(prefix_terms), i)
361 def test_termlist_iter():
362 """Test termlist iterator on Database.
365 db = setup_database()
367 # Make lists of the item contents
368 terms = []
369 wdfs = []
370 freqs = []
371 positers = []
372 for termitem in db.termlist(3):
373 terms.append(termitem.term)
374 wdfs.append(termitem.wdf)
375 freqs.append(termitem.termfreq)
376 positers.append([pos for pos in termitem.positer])
378 expect(terms, [b'it', b'two', b'warm', b'was'])
379 expect(wdfs, [1, 2, 1, 1])
380 expect(freqs, [5, 3, 4, 4])
381 expect(positers, [[2], [], [3], [1]])
383 # Test skip_to().
384 tliter = db.termlist(3)
386 # skip to an item before the first item.
387 termitem = tliter.skip_to('a')
388 expect((termitem.term, termitem.wdf, termitem.termfreq,
389 [pos for pos in termitem.positer]), (b'it', 1, 5, [2]))
391 # skip forwards to an item.
392 termitem = tliter.skip_to('two')
393 expect((termitem.term, termitem.wdf, termitem.termfreq,
394 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
396 # skip to same place (should return same item)
397 termitem = tliter.skip_to('two')
398 expect((termitem.term, termitem.wdf, termitem.termfreq,
399 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
401 # next() after a skip_to(), should return next item.
402 termitem = next(tliter)
403 expect((termitem.term, termitem.wdf, termitem.termfreq,
404 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
406 # skip to same place (should return same item)
407 termitem = tliter.skip_to('warm')
408 expect((termitem.term, termitem.wdf, termitem.termfreq,
409 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
411 # skip backwards (should return same item)
412 termitem = tliter.skip_to('a')
414 # skip to after end.
415 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
416 # skip backwards (should still return StopIteration).
417 expect_exception(StopIteration, '', tliter.skip_to, 'a')
418 # next should continue to return StopIteration.
419 expect_exception(StopIteration, '', next, tliter)
422 # Make a list of the terms (so we can test if they're still valid
423 # once the iterator has moved on).
424 termitems = [termitem for termitem in db.termlist(3)]
426 expect(len(termitems), len(terms))
427 for i in range(len(termitems)):
428 expect(termitems[i].term, terms[i])
430 expect(len(termitems), len(wdfs))
431 for i in range(len(termitems)):
432 expect(termitems[i].wdf, wdfs[i])
434 expect(len(termitems), len(freqs))
435 for termitem in termitems:
436 expect_exception(xapian.InvalidOperationError,
437 'Iterator has moved, and does not support random access',
438 getattr, termitem, 'termfreq')
440 expect(len(termitems), len(freqs))
441 for termitem in termitems:
442 expect_exception(xapian.InvalidOperationError,
443 'Iterator has moved, and does not support random access',
444 getattr, termitem, 'positer')
446 def test_dbdocument_iter():
447 """Test document terms iterator for document taken from a database.
450 db = setup_database()
452 doc = db.get_document(3)
454 # Make lists of the item contents
455 terms = []
456 wdfs = []
457 freqs = []
458 positers = []
459 for termitem in doc:
460 terms.append(termitem.term)
461 wdfs.append(termitem.wdf)
462 freqs.append(termitem.termfreq)
463 positers.append([pos for pos in termitem.positer])
465 expect(terms, [b'it', b'two', b'warm', b'was'])
466 expect(wdfs, [1, 2, 1, 1])
467 expect(freqs, [5, 3, 4, 4])
468 expect(positers, [[2], [], [3], [1]])
470 # Make a list of the terms (so we can test if they're still valid
471 # once the iterator has moved on).
472 termitems = [termitem for termitem in doc]
474 expect(len(termitems), len(terms))
475 for i in range(len(termitems)):
476 expect(termitems[i].term, terms[i])
478 expect(len(termitems), len(wdfs))
479 for i in range(len(termitems)):
480 expect(termitems[i].wdf, wdfs[i])
482 expect(len(termitems), len(freqs))
483 for termitem in termitems:
484 expect_exception(xapian.InvalidOperationError,
485 'Iterator has moved, and does not support random access',
486 getattr, termitem, 'termfreq')
488 expect(len(termitems), len(freqs))
489 for termitem in termitems:
490 expect_exception(xapian.InvalidOperationError,
491 'Iterator has moved, and does not support random access',
492 getattr, termitem, 'positer')
494 def test_newdocument_iter():
495 """Test document terms iterator for newly created document.
498 doc = xapian.Document()
499 doc.set_data("was it warm? two")
500 doc.add_posting("was", 1)
501 doc.add_posting("it", 2)
502 doc.add_posting("warm", 3)
503 doc.add_term("two", 2)
505 # Make lists of the item contents
506 terms = []
507 wdfs = []
508 positers = []
509 for termitem in doc:
510 terms.append(termitem.term)
511 wdfs.append(termitem.wdf)
512 expect_exception(xapian.InvalidOperationError,
513 "Can't get term frequency from a document termlist "
514 "which is not associated with a database.",
515 getattr, termitem, 'termfreq')
516 positers.append([pos for pos in termitem.positer])
518 expect(terms, [b'it', b'two', b'warm', b'was'])
519 expect(wdfs, [1, 2, 1, 1])
520 expect(positers, [[2], [], [3], [1]])
522 # Make a list of the terms (so we can test if they're still valid
523 # once the iterator has moved on).
524 termitems = [termitem for termitem in doc]
526 expect(len(termitems), len(terms))
527 for i in range(len(termitems)):
528 expect(termitems[i].term, terms[i])
530 expect(len(termitems), len(wdfs))
531 for i in range(len(termitems)):
532 expect(termitems[i].wdf, wdfs[i])
534 for termitem in termitems:
535 expect_exception(xapian.InvalidOperationError,
536 'Iterator has moved, and does not support random access',
537 getattr, termitem, 'termfreq')
539 expect(len(termitems), len(positers))
540 for termitem in termitems:
541 expect_exception(xapian.InvalidOperationError,
542 'Iterator has moved, and does not support random access',
543 getattr, termitem, 'positer')
545 def test_postinglist_iter():
546 """Test postinglist iterator on Database.
549 db = setup_database()
551 # Make lists of the item contents
552 docids = []
553 doclengths = []
554 wdfs = []
555 positers = []
556 for posting in db.postlist('it'):
557 docids.append(posting.docid)
558 doclengths.append(posting.doclength)
559 wdfs.append(posting.wdf)
560 positers.append([pos for pos in posting.positer])
562 expect(docids, [1, 2, 3, 4, 5])
563 expect(doclengths, [3, 3, 5, 8, 19])
564 expect(wdfs, [1, 1, 1, 1, 8])
565 expect(positers, [[1], [2], [2], [2], [2, 7]])
567 # Test skip_to().
568 pliter = db.postlist('it')
570 # skip to an item before the first item.
571 posting = pliter.skip_to(0)
572 expect((posting.docid, posting.doclength, posting.wdf,
573 [pos for pos in posting.positer]), (1, 3, 1, [1]))
575 # skip forwards to an item.
576 posting = pliter.skip_to(3)
577 expect((posting.docid, posting.doclength, posting.wdf,
578 [pos for pos in posting.positer]), (3, 5, 1, [2]))
580 # skip to same place (should return same item)
581 posting = pliter.skip_to(3)
582 expect((posting.docid, posting.doclength, posting.wdf,
583 [pos for pos in posting.positer]), (3, 5, 1, [2]))
585 # next() after a skip_to(), should return next item.
586 posting = next(pliter)
587 expect((posting.docid, posting.doclength, posting.wdf,
588 [pos for pos in posting.positer]), (4, 8, 1, [2]))
590 # skip to same place (should return same item)
591 posting = pliter.skip_to(4)
592 expect((posting.docid, posting.doclength, posting.wdf,
593 [pos for pos in posting.positer]), (4, 8, 1, [2]))
595 # skip backwards (should return same item)
596 posting = pliter.skip_to(2)
597 expect((posting.docid, posting.doclength, posting.wdf,
598 [pos for pos in posting.positer]), (4, 8, 1, [2]))
600 # skip to after end.
601 expect_exception(StopIteration, '', pliter.skip_to, 6)
602 # skip backwards (should still return StopIteration).
603 expect_exception(StopIteration, '', pliter.skip_to, 6)
604 # next should continue to return StopIteration.
605 expect_exception(StopIteration, '', next, pliter)
608 # Make a list of the postings (so we can test if they're still valid once
609 # the iterator has moved on).
610 postings = [posting for posting in db.postlist('it')]
612 expect(len(postings), len(docids))
613 for i in range(len(postings)):
614 expect(postings[i].docid, docids[i])
616 expect(len(postings), len(doclengths))
617 for i in range(len(postings)):
618 expect(postings[i].doclength, doclengths[i])
620 expect(len(postings), len(wdfs))
621 for i in range(len(postings)):
622 expect(postings[i].wdf, wdfs[i])
624 expect(len(postings), len(positers))
625 for posting in postings:
626 expect_exception(xapian.InvalidOperationError,
627 'Iterator has moved, and does not support random access',
628 getattr, posting, 'positer')
630 def test_valuestream_iter():
631 """Test a valuestream iterator on Database.
634 db = setup_database()
636 # Check basic iteration
637 expect([(item.docid, item.value) for item in db.valuestream(0)],
638 [(3, b'\xa4'), (4, b'\xa2'), (5, b'\xa4')])
639 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
640 expect([(item.docid, item.value) for item in db.valuestream(5)],
641 [(5, b"five")])
642 expect([(item.docid, item.value) for item in db.valuestream(9)],
643 [(5, b"nine")])
645 # Test skip_to() on iterator with no values, and behaviours when called
646 # after already returning StopIteration.
647 i = db.valuestream(1)
648 expect_exception(StopIteration, "", i.skip_to, 1)
649 expect_exception(StopIteration, "", i.skip_to, 1)
650 i = db.valuestream(1)
651 expect_exception(StopIteration, "", i.skip_to, 1)
652 expect_exception(StopIteration, "", i.__next__)
653 i = db.valuestream(1)
654 expect_exception(StopIteration, "", i.__next__)
655 expect_exception(StopIteration, "", i.skip_to, 1)
657 # Test that skipping to a value works, and that skipping doesn't have to
658 # advance.
659 i = db.valuestream(0)
660 item = i.skip_to(4)
661 expect((item.docid, item.value), (4, b'\xa2'))
662 item = i.skip_to(4)
663 expect((item.docid, item.value), (4, b'\xa2'))
664 item = i.skip_to(1)
665 expect((item.docid, item.value), (4, b'\xa2'))
666 item = i.skip_to(5)
667 expect((item.docid, item.value), (5, b'\xa4'))
668 expect_exception(StopIteration, "", i.skip_to, 6)
670 # Test that alternating skip_to() and next() works.
671 i = db.valuestream(0)
672 item = next(i)
673 expect((item.docid, item.value), (3, b'\xa4'))
674 item = i.skip_to(4)
675 expect((item.docid, item.value), (4, b'\xa2'))
676 item = next(i)
677 expect((item.docid, item.value), (5, b'\xa4'))
678 expect_exception(StopIteration, "", i.skip_to, 6)
680 # Test that next works correctly after skip_to() called with an earlier
681 # item.
682 i = db.valuestream(0)
683 item = i.skip_to(4)
684 expect((item.docid, item.value), (4, b'\xa2'))
685 item = i.skip_to(1)
686 expect((item.docid, item.value), (4, b'\xa2'))
687 item = next(i)
688 expect((item.docid, item.value), (5, b'\xa4'))
690 # Test that next works correctly after skipping to last item
691 i = db.valuestream(0)
692 item = i.skip_to(5)
693 expect((item.docid, item.value), (5, b'\xa4'))
694 expect_exception(StopIteration, "", i.__next__)
696 def test_position_iter():
697 """Test position iterator for a document in a database.
700 db = setup_database()
702 doc = db.get_document(5)
704 # Make lists of the item contents
705 positions = [position for position in db.positionlist(5, 'it')]
707 expect(positions, [2, 7])
709 def test_value_iter():
710 """Test iterators over list of values in a document.
713 db = setup_database()
714 doc = db.get_document(5)
716 items = list(doc.values())
717 expect(len(items), 3)
718 expect(items[0].num, 0)
719 expect(items[0].value, xapian.sortable_serialise(2))
720 expect(items[1].num, 5)
721 expect(items[1].value, b'five')
722 expect(items[2].num, 9)
723 expect(items[2].value, b'nine')
725 def test_synonyms_iter():
726 """Test iterators over list of synonyms in a database.
729 dbpath = 'db_test_synonyms_iter'
730 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
732 db.add_synonym('hello', 'hi')
733 db.add_synonym('hello', 'howdy')
735 expect([item for item in db.synonyms('foo')], [])
736 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
737 expect([item for item in db.synonym_keys()], [b'hello'])
738 expect([item for item in db.synonym_keys('foo')], [])
739 expect([item for item in db.synonym_keys('he')], [b'hello'])
740 expect([item for item in db.synonym_keys('hello')], [b'hello'])
742 dbr=xapian.Database(dbpath)
743 expect([item for item in dbr.synonyms('foo')], [])
744 expect([item for item in dbr.synonyms('hello')], [])
745 expect([item for item in dbr.synonym_keys()], [])
746 expect([item for item in dbr.synonym_keys('foo')], [])
747 expect([item for item in dbr.synonym_keys('he')], [])
748 expect([item for item in dbr.synonym_keys('hello')], [])
750 db.commit()
752 expect([item for item in db.synonyms('foo')], [])
753 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
754 expect([item for item in db.synonym_keys()], [b'hello'])
755 expect([item for item in db.synonym_keys('foo')], [])
756 expect([item for item in db.synonym_keys('he')], [b'hello'])
757 expect([item for item in db.synonym_keys('hello')], [b'hello'])
759 dbr=xapian.Database(dbpath)
760 expect([item for item in dbr.synonyms('foo')] , [])
761 expect([item for item in dbr.synonyms('hello')], [b'hi', b'howdy'])
762 expect([item for item in dbr.synonym_keys()], [b'hello'])
763 expect([item for item in dbr.synonym_keys('foo')], [])
764 expect([item for item in dbr.synonym_keys('he')], [b'hello'])
765 expect([item for item in dbr.synonym_keys('hello')], [b'hello'])
767 db.close()
768 expect(xapian.Database.check(dbpath), 0);
769 dbr.close()
770 shutil.rmtree(dbpath)
772 def test_metadata_keys_iter():
773 """Test iterators over list of metadata keys in a database.
776 dbpath = 'db_test_metadata_iter'
777 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
779 db.set_metadata('author', 'richard')
780 db.set_metadata('item1', 'hello')
781 db.set_metadata('item1', 'hi')
782 db.set_metadata('item2', 'howdy')
783 db.set_metadata('item3', '')
784 db.set_metadata('item4', 'goodbye')
785 db.set_metadata('item4', '')
786 db.set_metadata('type', 'greeting')
788 expect([item for item in db.metadata_keys()],
789 [b'author', b'item1', b'item2', b'type'])
790 expect([item for item in db.metadata_keys('foo')], [])
791 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
792 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
793 expect([item for item in db.metadata_keys('type')], [b'type'])
795 dbr=xapian.Database(dbpath)
796 expect([item for item in dbr.metadata_keys()], [])
797 expect([item for item in dbr.metadata_keys('foo')], [])
798 expect([item for item in dbr.metadata_keys('item')], [])
799 expect([item for item in dbr.metadata_keys('it')], [])
800 expect([item for item in dbr.metadata_keys('type')], [])
802 db.commit()
803 expect([item for item in db.metadata_keys()],
804 [b'author', b'item1', b'item2', b'type'])
805 expect([item for item in db.metadata_keys('foo')], [])
806 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
807 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
808 expect([item for item in db.metadata_keys('type')], [b'type'])
810 dbr=xapian.Database(dbpath)
811 expect([item for item in dbr.metadata_keys()],
812 [b'author', b'item1', b'item2', b'type'])
813 expect([item for item in dbr.metadata_keys('foo')], [])
814 expect([item for item in dbr.metadata_keys('item')], [b'item1', b'item2'])
815 expect([item for item in dbr.metadata_keys('it')], [b'item1', b'item2'])
816 expect([item for item in dbr.metadata_keys('type')], [b'type'])
818 db.close()
819 expect(xapian.Database.check(dbpath), 0);
820 dbr.close()
821 shutil.rmtree(dbpath)
823 def test_spell():
824 """Test basic spelling correction features.
827 dbpath = 'db_test_spell'
828 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
830 db.add_spelling('hello')
831 db.add_spelling('mell', 2)
832 expect(db.get_spelling_suggestion('hell'), b'mell')
833 expect([(item.term, item.termfreq) for item in db.spellings()], [(b'hello', 1), (b'mell', 2)])
834 dbr=xapian.Database(dbpath)
835 expect(dbr.get_spelling_suggestion('hell'), b'')
836 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
837 db.commit()
838 dbr=xapian.Database(dbpath)
839 expect(db.get_spelling_suggestion('hell'), b'mell')
840 expect(dbr.get_spelling_suggestion('hell'), b'mell')
841 expect([(item.term, item.termfreq) for item in dbr.spellings()], [(b'hello', 1), (b'mell', 2)])
843 db.close()
844 expect(xapian.Database.check(dbpath), 0);
845 dbr.close()
846 shutil.rmtree(dbpath)
848 def test_queryparser_custom_vrp():
849 """Test QueryParser with a custom (in python) ValueRangeProcessor.
852 class MyVRP(xapian.ValueRangeProcessor):
853 def __init__(self):
854 xapian.ValueRangeProcessor.__init__(self)
856 def __call__(self, begin, end):
857 return (7, "A"+begin, "B"+end)
859 queryparser = xapian.QueryParser()
860 myvrp = MyVRP()
862 queryparser.add_valuerangeprocessor(myvrp)
863 query = queryparser.parse_query('5..8')
865 expect(str(query),
866 'Query(0 * VALUE_RANGE 7 A5 B8)')
868 def test_queryparser_custom_vrp_deallocation():
869 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
872 class MyVRP(xapian.ValueRangeProcessor):
873 def __init__(self):
874 xapian.ValueRangeProcessor.__init__(self)
876 def __call__(self, begin, end):
877 return (7, "A"+begin, "B"+end)
879 def make_parser():
880 queryparser = xapian.QueryParser()
881 myvrp = MyVRP()
882 queryparser.add_valuerangeprocessor(myvrp)
883 return queryparser
885 queryparser = make_parser()
886 query = queryparser.parse_query('5..8')
888 expect(str(query),
889 'Query(0 * VALUE_RANGE 7 A5 B8)')
891 def test_scale_weight():
892 """Test query OP_SCALE_WEIGHT feature.
895 db = setup_database()
896 for mult in (0, 1, 2.5):
897 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
898 mult)
899 query1 = xapian.Query("it")
900 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
902 enquire = xapian.Enquire(db)
903 enquire.set_query(query1)
904 mset1 = enquire.get_mset(0, 10)
905 enquire.set_query(query2)
906 mset2 = enquire.get_mset(0, 10)
907 if mult <= 0:
908 expected = [(0, item.docid) for item in mset1]
909 expected.sort()
910 else:
911 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
912 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
914 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
915 query1 = xapian.Query("it")
916 expect_exception(xapian.InvalidArgumentError,
917 "OP_SCALE_WEIGHT requires factor >= 0",
918 xapian.Query,
919 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
922 def test_weight_normalise():
923 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
925 This test first runs a search (asking for no results) to get the maximum
926 possible weight for a query, and then checks that the results of
927 MSet.get_max_possible() match this.
929 This tests that the get_max_possible() value is correct (though it isn't
930 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
931 compensate correctly.
934 db = setup_database()
935 for query in (
936 "it",
937 "was",
938 "it was",
939 "it was four",
940 "it was four five",
941 "\"was it warm\" four notpresent",
942 "notpresent",
944 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
945 qp = xapian.QueryParser()
946 query1 = qp.parse_query(query)
947 enquire = xapian.Enquire(db)
948 enquire.set_query(query1)
949 mset1 = enquire.get_mset(0, 0)
951 # Check the max_attained value is 0 - this gives us some reassurance
952 # that the match didn't actually do the work of calculating any
953 # results.
954 expect(mset1.get_max_attained(), 0)
956 max_possible = mset1.get_max_possible()
957 if query == "notpresent":
958 expect(max_possible, 0)
959 continue
960 mult = 1.0 / max_possible
961 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
963 enquire = xapian.Enquire(db)
964 enquire.set_query(query2)
965 mset2 = enquire.get_mset(0, 10)
966 # max_possible should be 1 (excluding rounding errors) for mset2
967 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
968 for item in mset2:
969 expect(item.weight > 0, True)
970 expect(item.weight <= 1, True)
973 def test_valuesetmatchdecider():
974 """Simple tests of the ValueSetMatchDecider class
977 md = xapian.ValueSetMatchDecider(0, True)
978 doc = xapian.Document()
979 expect(md(doc), False)
981 md.add_value('foo')
982 doc.add_value(0, 'foo')
983 expect(md(doc), True)
985 md.remove_value('foo')
986 expect(md(doc), False)
988 md = xapian.ValueSetMatchDecider(0, False)
989 expect(md(doc), True)
991 md.add_value('foo')
992 expect(md(doc), False)
995 def test_postingsource():
996 """Simple test of the PostingSource class.
999 class OddPostingSource(xapian.PostingSource):
1000 def __init__(self, max):
1001 xapian.PostingSource.__init__(self)
1002 self.max = max
1004 def init(self, db):
1005 self.current = -1
1006 self.weight = db.get_doccount() + 1
1007 self.set_maxweight(self.weight)
1009 def get_termfreq_min(self): return 0
1010 def get_termfreq_est(self): return int(self.max / 2)
1011 def get_termfreq_max(self): return self.max
1012 def __next__(self, minweight):
1013 self.current += 2
1014 self.weight -= 1.0;
1015 self.set_maxweight(self.weight)
1016 def at_end(self): return self.current > self.max
1017 def get_docid(self): return self.current
1018 def get_weight(self): return self.weight
1020 dbpath = 'db_test_postingsource'
1021 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1022 for id in range(10):
1023 doc = xapian.Document()
1024 db.add_document(doc)
1026 # Do a dance to check that the posting source doesn't get dereferenced too
1027 # soon in various cases.
1028 def mkenq(db):
1029 # First - check that it's kept when the source goes out of scope.
1030 def mkquery():
1031 source = OddPostingSource(10)
1032 # The posting source is inside a list to check that case is
1033 # correctly handled.
1034 return xapian.Query(xapian.Query.OP_OR,
1035 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1037 # Check that it's kept when the query goes out of scope.
1038 def submkenq():
1039 query = mkquery()
1040 enquire = xapian.Enquire(db)
1041 enquire.set_query(query)
1042 return enquire
1044 # Check it's kept when the query is retrieved from enquire and put into
1045 # a new enquire.
1046 def submkenq2():
1047 enq1 = submkenq()
1048 enquire = xapian.Enquire(db)
1049 enquire.set_query(enq1.get_query())
1050 return enquire
1052 return submkenq2()
1054 enquire = mkenq(db)
1055 mset = enquire.get_mset(0, 10)
1057 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1058 expect(mset[0].weight, db.get_doccount())
1060 db.close()
1061 expect(xapian.Database.check(dbpath), 0);
1062 shutil.rmtree(dbpath)
1064 def test_postingsource2():
1065 """Simple test of the PostingSource class.
1068 dbpath = 'db_test_postingsource2'
1069 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1070 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1071 for id in range(10):
1072 doc = xapian.Document()
1073 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1074 db.add_document(doc)
1076 source = xapian.ValueWeightPostingSource(1)
1077 query = xapian.Query(source)
1078 del source # Check that query keeps a reference to it.
1080 enquire = xapian.Enquire(db)
1081 enquire.set_query(query)
1082 mset = enquire.get_mset(0, 10)
1084 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1086 db.close()
1087 shutil.rmtree(dbpath)
1089 def test_postingsource3():
1090 """Test that ValuePostingSource can be usefully subclassed.
1093 dbpath = 'db_test_postingsource3'
1094 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1095 vals = (1, 3, 2, 4)
1096 for wt in vals:
1097 doc = xapian.Document()
1098 doc.add_value(1, xapian.sortable_serialise(wt))
1099 db.add_document(doc)
1101 class PyValuePostingSource(xapian.ValuePostingSource):
1102 def __init__(self, slot):
1103 xapian.ValuePostingSource.__init__(self, slot)
1105 def init(self, db):
1106 xapian.ValuePostingSource.init(self, db)
1107 self.current = -1
1108 slot = self.get_slot()
1109 ub = db.get_value_upper_bound(slot)
1110 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1112 def next(self, minweight):
1113 return xapian.ValuePostingSource.next(self, minweight)
1114 def get_weight(self):
1115 value = self.get_value()
1116 return xapian.sortable_unserialise(value) ** 3
1118 source = PyValuePostingSource(1)
1119 query = xapian.Query(source)
1120 #del source # Check that query keeps a reference to it.
1122 enquire = xapian.Enquire(db)
1123 enquire.set_query(query)
1124 mset = enquire.get_mset(0, 10)
1126 expect([item.docid for item in mset], [4, 2, 3, 1])
1128 db.close()
1129 expect(xapian.Database.check(dbpath), 0);
1130 shutil.rmtree(dbpath)
1132 def test_value_stats():
1133 """Simple test of being able to get value statistics.
1136 dbpath = 'db_test_value_stats'
1137 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1139 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1140 for id in range(10):
1141 doc = xapian.Document()
1142 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1143 db.add_document(doc)
1145 expect(db.get_value_freq(0), 0)
1146 expect(db.get_value_lower_bound(0), b"")
1147 expect(db.get_value_upper_bound(0), b"")
1148 expect(db.get_value_freq(1), 10)
1149 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1150 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1151 expect(db.get_value_freq(2), 0)
1152 expect(db.get_value_lower_bound(2), b"")
1153 expect(db.get_value_upper_bound(2), b"")
1155 db.close()
1156 expect(xapian.Database.check(dbpath), 0);
1157 shutil.rmtree(dbpath)
1159 def test_get_uuid():
1160 """Test getting UUIDs from databases.
1163 dbpath = 'db_test_get_uuid'
1164 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1165 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1166 dbr1 = xapian.Database(dbpath + "1")
1167 dbr2 = xapian.Database(dbpath + "2")
1168 expect(db1.get_uuid() != db2.get_uuid(), True)
1169 expect(db1.get_uuid(), dbr1.get_uuid())
1170 expect(db2.get_uuid(), dbr2.get_uuid())
1172 db = xapian.Database()
1173 db.add_database(db1)
1174 expect(db1.get_uuid(), db.get_uuid())
1176 db1.close()
1177 db2.close()
1178 dbr1.close()
1179 dbr2.close()
1180 db.close()
1181 shutil.rmtree(dbpath + "1")
1182 shutil.rmtree(dbpath + "2")
1184 def test_director_exception():
1185 """Test handling of an exception raised in a director.
1188 db = setup_database()
1189 query = xapian.Query('it')
1190 enq = xapian.Enquire(db)
1191 enq.set_query(query)
1192 class TestException(Exception):
1193 def __init__(self, a, b):
1194 Exception.__init__(self, a + b)
1196 rset = xapian.RSet()
1197 rset.add_document(1)
1198 class EDecider(xapian.ExpandDecider):
1199 def __call__(self, term):
1200 raise TestException("foo", "bar")
1201 edecider = EDecider()
1202 expect_exception(TestException, "foobar", edecider, "foo")
1203 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1205 class MDecider(xapian.MatchDecider):
1206 def __call__(self, doc):
1207 raise TestException("foo", "bar")
1208 mdecider = MDecider()
1209 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1210 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1212 def check_vals(db, vals):
1213 """Check that the values in slot 1 are as in vals.
1216 for docid in range(1, db.get_lastdocid() + 1):
1217 val = db.get_document(docid).get_value(1)
1218 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1220 def test_value_mods():
1221 """Test handling of modifications to values.
1224 dbpath = 'db_test_value_mods'
1225 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1226 random.seed(42)
1227 doccount = 1000
1228 vals = {}
1230 # Add a value to all the documents
1231 for num in range(1, doccount):
1232 doc=xapian.Document()
1233 val = ('val%d' % num).encode('utf-8')
1234 doc.add_value(1, val)
1235 db.add_document(doc)
1236 vals[num] = val
1237 db.commit()
1238 check_vals(db, vals)
1240 # Modify one of the values (this is a regression test which failed with the
1241 # initial implementation of streaming values).
1242 doc = xapian.Document()
1243 val = b'newval0'
1244 doc.add_value(1, val)
1245 db.replace_document(2, doc)
1246 vals[2] = val
1247 db.commit()
1248 check_vals(db, vals)
1250 # Do some random modifications.
1251 for count in range(1, doccount * 2):
1252 docid = random.randint(1, doccount)
1253 doc = xapian.Document()
1255 if count % 5 == 0:
1256 val = b''
1257 else:
1258 val = ('newval%d' % count).encode('utf-8')
1259 doc.add_value(1, val)
1260 db.replace_document(docid, doc)
1261 vals[docid] = val
1263 # Check the values before and after modification.
1264 check_vals(db, vals)
1265 db.commit()
1266 check_vals(db, vals)
1268 # Delete all the values which are non-empty, in a random order.
1269 keys = [key for key, val in vals.items() if val != '']
1270 random.shuffle(keys)
1271 for key in keys:
1272 doc = xapian.Document()
1273 db.replace_document(key, doc)
1274 vals[key] = b''
1275 check_vals(db, vals)
1276 db.commit()
1277 check_vals(db, vals)
1279 db.close()
1280 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1281 shutil.rmtree(dbpath)
1283 def test_serialise_document():
1284 """Test serialisation of documents.
1287 doc = xapian.Document()
1288 doc.add_term('foo', 2)
1289 doc.add_value(1, b'bar')
1290 doc.set_data('baz')
1291 s = doc.serialise()
1292 doc2 = xapian.Document.unserialise(s)
1293 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1294 expect(len(list(doc.termlist())), 1)
1295 expect([(item.term, item.wdf) for item in doc.termlist()],
1296 [(item.term, item.wdf) for item in doc2.termlist()])
1297 expect([(item.num, item.value) for item in list(doc.values())],
1298 [(item.num, item.value) for item in list(doc2.values())])
1299 expect(doc.get_data(), doc2.get_data())
1300 expect(doc.get_data(), b'baz')
1302 db = setup_database()
1303 doc = db.get_document(1)
1304 s = doc.serialise()
1305 doc2 = xapian.Document.unserialise(s)
1306 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1307 expect(len(list(doc.termlist())), 3)
1308 expect([(item.term, item.wdf) for item in doc.termlist()],
1309 [(item.term, item.wdf) for item in doc2.termlist()])
1310 expect([(item.num, item.value) for item in list(doc.values())],
1311 [(item.num, item.value) for item in list(doc2.values())])
1312 expect(doc.get_data(), doc2.get_data())
1313 expect(doc.get_data(), b'is it cold?')
1315 def test_serialise_query():
1316 """Test serialisation of queries.
1319 q = xapian.Query()
1320 q2 = xapian.Query.unserialise(q.serialise())
1321 expect(str(q), str(q2))
1322 expect(str(q), 'Query()')
1324 q = xapian.Query('hello')
1325 q2 = xapian.Query.unserialise(q.serialise())
1326 expect(str(q), str(q2))
1327 expect(str(q), 'Query(hello)')
1329 q = xapian.Query(xapian.Query.OP_OR, ('hello', b'world'))
1330 q2 = xapian.Query.unserialise(q.serialise())
1331 expect(str(q), str(q2))
1332 expect(str(q), 'Query((hello OR world))')
1334 def test_preserve_query_parser_stopper():
1335 """Test preservation of stopper set on query parser.
1338 def make_qp():
1339 queryparser = xapian.QueryParser()
1340 stopper = xapian.SimpleStopper()
1341 stopper.add('to')
1342 stopper.add('not')
1343 queryparser.set_stopper(stopper)
1344 del stopper
1345 return queryparser
1346 queryparser = make_qp()
1347 query = queryparser.parse_query('to be')
1348 expect([term for term in queryparser.stoplist()], [b'to'])
1350 def test_preserve_term_generator_stopper():
1351 """Test preservation of stopper set on term generator.
1354 def make_tg():
1355 termgen = xapian.TermGenerator()
1356 termgen.set_stemmer(xapian.Stem('en'))
1357 stopper = xapian.SimpleStopper()
1358 stopper.add('to')
1359 stopper.add('not')
1360 termgen.set_stopper(stopper)
1361 del stopper
1362 return termgen
1363 termgen = make_tg()
1365 termgen.index_text('to be')
1366 doc = termgen.get_document()
1367 terms = [term.term for term in doc.termlist()]
1368 terms.sort()
1369 expect(terms, [b'Zbe', b'be', b'to'])
1371 def test_preserve_enquire_sorter():
1372 """Test preservation of sorter set on enquire.
1375 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1376 doc = xapian.Document()
1377 doc.add_term('foo')
1378 doc.add_value(1, '1')
1379 db.add_document(doc)
1380 db.add_document(doc)
1382 def make_enq1(db):
1383 enq = xapian.Enquire(db)
1384 sorter = xapian.MultiValueKeyMaker()
1385 enq.set_sort_by_key(sorter, False)
1386 del sorter
1387 return enq
1388 enq = make_enq1(db)
1389 enq.set_query(xapian.Query('foo'))
1390 enq.get_mset(0, 10)
1392 def make_enq2(db):
1393 enq = xapian.Enquire(db)
1394 sorter = xapian.MultiValueKeyMaker()
1395 enq.set_sort_by_key_then_relevance(sorter, False)
1396 del sorter
1397 return enq
1398 enq = make_enq2(db)
1399 enq.set_query(xapian.Query('foo'))
1400 enq.get_mset(0, 10)
1402 def make_enq3(db):
1403 enq = xapian.Enquire(db)
1404 sorter = xapian.MultiValueKeyMaker()
1405 enq.set_sort_by_relevance_then_key(sorter, False)
1406 del sorter
1407 return enq
1408 enq = make_enq3(db)
1409 enq.set_query(xapian.Query('foo'))
1410 enq.get_mset(0, 10)
1412 def test_matchspy():
1413 """Test use of matchspies.
1416 db = setup_database()
1417 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1418 enq = xapian.Enquire(db)
1419 enq.set_query(query)
1421 def set_matchspy_deref(enq):
1422 """Set a matchspy, and then drop the reference, to check that it
1423 doesn't get deleted too soon.
1425 spy = xapian.ValueCountMatchSpy(0)
1426 enq.add_matchspy(spy)
1427 del spy
1428 set_matchspy_deref(enq)
1429 mset = enq.get_mset(0, 10)
1430 expect(len(mset), 5)
1432 spy = xapian.ValueCountMatchSpy(0)
1433 enq.add_matchspy(spy)
1434 # Regression test for clear_matchspies() - used to always raise an
1435 # exception due to a copy and paste error in its definition.
1436 enq.clear_matchspies()
1437 mset = enq.get_mset(0, 10)
1438 expect([item for item in list(spy.values())], [])
1440 enq.add_matchspy(spy)
1441 mset = enq.get_mset(0, 10)
1442 expect(spy.get_total(), 5)
1443 expect([(item.term, item.termfreq) for item in list(spy.values())], [
1444 (xapian.sortable_serialise(1.5), 1),
1445 (xapian.sortable_serialise(2), 2),
1447 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1448 (xapian.sortable_serialise(2), 2),
1449 (xapian.sortable_serialise(1.5), 1),
1452 def test_import_star():
1453 """Test that "from xapian import *" works.
1455 This is a regression test - this failed in the 1.2.0 release.
1456 It's not normally good style to use it, but it should work anyway!
1459 import test_xapian_star
1461 def test_latlongcoords_iter():
1462 """Test LatLongCoordsIterator wrapping.
1465 coords = xapian.LatLongCoords()
1466 expect([c for c in coords], [])
1467 coords.append(xapian.LatLongCoord(0, 0))
1468 coords.append(xapian.LatLongCoord(0, 1))
1469 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1470 'Xapian::LatLongCoord(0, 1)'])
1473 def test_compactor():
1474 """Test that xapian.Compactor works.
1477 tmpdir = tempfile.mkdtemp()
1478 db1 = db2 = db3 = None
1479 try:
1480 db1path = os.path.join(tmpdir, 'db1')
1481 db2path = os.path.join(tmpdir, 'db2')
1482 db3path = os.path.join(tmpdir, 'db3')
1484 # Set up a couple of sample input databases
1485 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1486 doc1 = xapian.Document()
1487 doc1.add_term('Hello')
1488 doc1.add_term('Hello1')
1489 doc1.add_value(0, 'Val1')
1490 db1.set_metadata('key', '1')
1491 db1.set_metadata('key1', '1')
1492 db1.add_document(doc1)
1493 db1.flush()
1495 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1496 doc2 = xapian.Document()
1497 doc2.add_term('Hello')
1498 doc2.add_term('Hello2')
1499 doc2.add_value(0, 'Val2')
1500 db2.set_metadata('key', '2')
1501 db2.set_metadata('key2', '2')
1502 db2.add_document(doc2)
1503 db2.flush()
1505 # Compact with the default compactor
1506 # Metadata conflicts are resolved by picking the first value
1507 c = xapian.Compactor()
1508 c.add_source(db1path)
1509 c.add_source(db2path)
1510 c.set_destdir(db3path)
1511 c.compact()
1513 db3 = xapian.Database(db3path)
1514 expect([(item.term, item.termfreq) for item in db3.allterms()],
1515 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1516 expect(db3.get_document(1).get_value(0), b'Val1')
1517 expect(db3.get_document(2).get_value(0), b'Val2')
1518 expect(db3.get_metadata('key'), b'1')
1519 expect(db3.get_metadata('key1'), b'1')
1520 expect(db3.get_metadata('key2'), b'2')
1522 context("testing a custom compactor which merges duplicate metadata")
1523 class MyCompactor(xapian.Compactor):
1524 def __init__(self):
1525 xapian.Compactor.__init__(self)
1526 self.log = []
1528 def set_status(self, table, status):
1529 if len(status) == 0:
1530 self.log.append('Starting %s' % table.decode('utf-8'))
1531 else:
1532 self.log.append('%s: %s' % (table.decode('utf-8'), status.decode('utf-8')))
1534 def resolve_duplicate_metadata(self, key, vals):
1535 return b','.join(vals)
1537 c = MyCompactor()
1538 c.add_source(db1path)
1539 c.add_source(db2path)
1540 c.set_destdir(db3path)
1541 c.compact()
1542 log = '\n'.join(c.log)
1543 # Check we got some messages in the log
1544 expect('Starting postlist' in log, True)
1546 db3 = xapian.Database(db3path)
1547 expect([(item.term, item.termfreq) for item in db3.allterms()],
1548 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1549 expect(db3.get_metadata('key'), b'1,2')
1550 expect(db3.get_metadata('key1'), b'1')
1551 expect(db3.get_metadata('key2'), b'2')
1553 finally:
1554 if db1 is not None:
1555 db1.close()
1556 if db2 is not None:
1557 db2.close()
1558 if db3 is not None:
1559 db3.close()
1561 shutil.rmtree(tmpdir)
1563 def test_custom_matchspy():
1564 class MSpy(xapian.MatchSpy):
1565 def __init__(self):
1566 xapian.MatchSpy.__init__(self)
1567 self.count = 0
1569 def __call__(self, doc, weight):
1570 self.count += 1
1572 mspy = MSpy()
1574 db = setup_database()
1575 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1577 enquire = xapian.Enquire(db)
1578 enquire.add_matchspy(mspy)
1579 enquire.set_query(query)
1580 mset = enquire.get_mset(0, 1)
1581 expect(len(mset), 1)
1582 expect(mspy.count >= 1, True)
1584 expect(db.get_doccount(), 5)
1586 def test_removed_features():
1587 ok = True
1588 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1589 doc = xapian.Document()
1590 enq = xapian.Enquire(db)
1591 eset = xapian.ESet()
1592 mset = xapian.MSet()
1593 query = xapian.Query()
1594 qp = xapian.QueryParser()
1595 titer = xapian._TermIterator()
1596 postiter = xapian._PostingIterator()
1598 def check_missing(obj, attr):
1599 expect_exception(AttributeError, None, getattr, obj, attr)
1601 check_missing(xapian, 'Stem_get_available_languages')
1602 check_missing(xapian, 'TermIterator')
1603 check_missing(xapian, 'PositionIterator')
1604 check_missing(xapian, 'PostingIterator')
1605 check_missing(xapian, 'ValueIterator')
1606 check_missing(xapian, 'MSetIterator')
1607 check_missing(xapian, 'ESetIterator')
1608 check_missing(db, 'allterms_begin')
1609 check_missing(db, 'allterms_end')
1610 check_missing(db, 'metadata_keys_begin')
1611 check_missing(db, 'metadata_keys_end')
1612 check_missing(db, 'synonym_keys_begin')
1613 check_missing(db, 'synonym_keys_end')
1614 check_missing(db, 'synonyms_begin')
1615 check_missing(db, 'synonyms_end')
1616 check_missing(db, 'spellings_begin')
1617 check_missing(db, 'spellings_end')
1618 check_missing(db, 'positionlist_begin')
1619 check_missing(db, 'positionlist_end')
1620 check_missing(db, 'postlist_begin')
1621 check_missing(db, 'postlist_end')
1622 check_missing(db, 'termlist_begin')
1623 check_missing(db, 'termlist_end')
1624 check_missing(doc, 'termlist_begin')
1625 check_missing(doc, 'termlist_end')
1626 check_missing(doc, 'values_begin')
1627 check_missing(doc, 'values_end')
1628 check_missing(enq, 'get_matching_terms_begin')
1629 check_missing(enq, 'get_matching_terms_end')
1630 check_missing(eset, 'begin')
1631 check_missing(eset, 'end')
1632 check_missing(mset, 'begin')
1633 check_missing(mset, 'end')
1634 check_missing(postiter, 'positionlist_begin')
1635 check_missing(postiter, 'positionlist_end')
1636 check_missing(query, 'get_terms_begin')
1637 check_missing(query, 'get_terms_end')
1638 check_missing(qp, 'stoplist_begin')
1639 check_missing(qp, 'stoplist_end')
1640 check_missing(qp, 'unstem_begin')
1641 check_missing(qp, 'unstem_end')
1642 check_missing(titer, 'positionlist_begin')
1643 check_missing(titer, 'positionlist_end')
1645 result = True
1647 # Run all tests (ie, callables with names starting "test_").
1648 def run():
1649 global result
1650 if not runtests(globals(), sys.argv[1:]):
1651 result = False
1653 print("Running tests without threads")
1654 run()
1656 if have_threads:
1657 print("Running tests with threads")
1659 # This testcase seems to just block when run in a thread under Python 3
1660 # on some plaforms. It fails with 3.2.3 on Debian wheezy, but passes
1661 # with the exact same package version on Debian unstable not long after
1662 # the jessie release. The issue it's actually serving to regression
1663 # test for is covered by running it without threads, so just disable it
1664 # rather than risk test failures that don't seem to indicate a problem
1665 # in Xapian.
1666 del test_import_star
1668 t = threading.Thread(name='test runner', target=run)
1669 t.start()
1670 # Block until the thread has completed so the thread gets a chance to exit
1671 # with error status.
1672 t.join()
1674 if not result:
1675 sys.exit(1)
1677 # vim:syntax=python:set expandtab: