bootstrap: Add --fetch-url-command option
[xapian.git] / xapian-bindings / python3 / pythontest.py
blobb193bd378fdadb8fb680bab70405b3783edb38f9
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
75 # Test that str is rejected by sortable_unserialise().
76 try:
77 xapian.sortable_unserialise("unicode")
78 except TypeError as e:
79 expect(str(e), 'expected bytes, str found')
81 return db
83 def test_exception_base():
84 """Check that xapian exceptions have Exception as a base class.
86 """
87 try:
88 raise xapian.InvalidOperationError("Test exception")
89 except Exception as e:
90 pass
92 def test_mset_iter():
93 """Test iterators over MSets.
95 """
96 db = setup_database()
97 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
99 enquire = xapian.Enquire(db)
100 enquire.set_query(query)
101 mset = enquire.get_mset(0, 10)
102 items = [item for item in mset]
103 expect(len(items), 5)
104 expect(len(mset), len(items), "Expected number of items to be length of mset")
106 context("testing returned item from mset")
107 expect(items[2].docid, 4)
108 expect(items[2].rank, 2)
109 expect(items[2].percent, 86)
110 expect(items[2].collapse_key, b'')
111 expect(items[2].collapse_count, 0)
112 expect(items[2].document.get_data(), b'was it warm? three')
114 # Check iterators for sub-msets against the whole mset.
115 for start in range(0, 6):
116 for maxitems in range(0, 6):
117 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
118 submset = enquire.get_mset(start, maxitems)
119 num = 0
120 for item in submset:
121 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
122 expect(item.rank, num + start)
124 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
125 hit = submset.get_hit(num)
126 expect(hit.docid, item.docid)
127 expect(hit.rank, item.rank)
128 expect(hit.percent, item.percent)
129 expect(hit.document.get_data(), item.document.get_data())
130 expect(hit.collapse_key, item.collapse_key)
131 expect(hit.collapse_count, item.collapse_count)
133 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
134 hit = mset.get_hit(num + start)
135 expect(hit.docid, item.docid)
136 expect(hit.rank, item.rank)
137 expect(hit.percent, item.percent)
138 expect(hit.document.get_data(), item.document.get_data())
139 expect(hit.collapse_key, item.collapse_key)
140 expect(hit.collapse_count, item.collapse_count)
142 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
143 expect(submset[num].docid, item.docid)
144 expect(submset[num].rank, item.rank)
145 expect(submset[num].percent, item.percent)
146 expect(submset[num].document.get_data(), item.document.get_data())
147 expect(submset[num].collapse_key, item.collapse_key)
148 expect(submset[num].collapse_count, item.collapse_count)
150 num += 1
152 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
153 # Test out-of-range access to mset:
154 expect_exception(IndexError, 'Mset index out of range',
155 submset.__getitem__, -10)
156 expect_exception(IndexError, 'Mset index out of range',
157 submset.__getitem__, 10)
158 expect_exception(IndexError, 'Mset index out of range',
159 submset.__getitem__, -1-len(submset))
160 expect_exception(IndexError, 'Mset index out of range',
161 submset.__getitem__, len(submset))
163 # Check that the item contents remain valid when the iterator has
164 # moved on.
165 saved_items = [item for item in submset]
166 for num in range(len(saved_items)):
167 item = saved_items[num]
168 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
169 expect(submset[num].docid, item.docid)
170 expect(submset[num].rank, item.rank)
171 expect(submset[num].percent, item.percent)
172 expect(submset[num].document.get_data(), item.document.get_data())
173 expect(submset[num].collapse_key, item.collapse_key)
174 expect(submset[num].collapse_count, item.collapse_count)
176 # Check that the right number of items exist in the mset.
177 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
178 items = [item for item in submset]
179 expect(len(items), min(maxitems, 5 - start))
180 expect(len(submset), min(maxitems, 5 - start))
182 def test_eset_iter():
183 """Test iterators over ESets.
186 db = setup_database()
187 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
188 rset = xapian.RSet()
189 rset.add_document(3)
191 context("getting eset items without a query")
192 enquire = xapian.Enquire(db)
193 eset = enquire.get_eset(10, rset)
194 items = [item for item in eset]
195 expect(len(items), 3)
196 expect(len(items), len(eset))
198 context("getting eset items with a query")
199 enquire = xapian.Enquire(db)
200 enquire.set_query(query)
201 eset = enquire.get_eset(10, rset)
202 items2 = [item for item in eset]
203 expect(len(items2), 2)
204 expect(len(items2), len(eset))
206 context("comparing eset items with a query to those without")
207 expect(items2[0].term, items[0].term)
208 expect(items2[1].term, items[2].term)
210 context("comparing eset weights with a query to those without")
211 expect(items2[0].weight, items[0].weight)
212 expect(items2[1].weight, items[2].weight)
214 def test_matchingterms_iter():
215 """Test Enquire.matching_terms iterator.
218 db = setup_database()
219 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
221 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
223 enquire = xapian.Enquire(db)
224 enquire.set_query(query)
225 mset = enquire.get_mset(0, 10)
227 for item in mset:
228 # Make a list of the term names
229 mterms = [term for term in enquire.matching_terms(item.docid)]
230 mterms2 = [term for term in enquire.matching_terms(item)]
231 expect(mterms, mterms2)
233 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
234 expect(mterms, [b'it', b'two', b'warm', b'was'])
236 def test_queryterms_iter():
237 """Test Query term iterator.
240 db = setup_database()
241 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
243 # Make a list of the term names
244 terms = [term for term in query]
245 expect(terms, [b'it', b'two', b'warm', b'was'])
247 def test_queryparser_stoplist_iter():
248 """Test QueryParser stoplist iterator.
251 stemmer = xapian.Stem('en')
253 # Check behaviour without having set a stoplist.
254 queryparser = xapian.QueryParser()
255 queryparser.set_stemmer(stemmer)
256 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
257 expect([term for term in queryparser.stoplist()], [])
258 query = queryparser.parse_query('to be or not to be is the questions')
259 expect([term for term in queryparser.stoplist()], [])
260 expect(str(query),
261 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
262 'Zis@7 OR Zthe@8 OR Zquestion@9))')
264 # Check behaviour with a stoplist, but no stemmer
265 queryparser = xapian.QueryParser()
266 stopper = xapian.SimpleStopper()
267 stopper.add('to')
268 stopper.add('not')
269 stopper.add('question')
270 queryparser.set_stopper(stopper)
271 expect([term for term in queryparser.stoplist()], [])
272 query = queryparser.parse_query('to be or not to be is the questions')
274 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
275 expect(str(query),
276 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
278 # Check behaviour with a stoplist and a stemmer
279 queryparser.set_stemmer(stemmer)
280 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
281 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to']) # Shouldn't have changed since previous query.
282 query = queryparser.parse_query('to be or not to be is the questions')
284 expect([term for term in queryparser.stoplist()], [b'to', b'not', b'to'])
285 expect(str(query),
286 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
288 def test_queryparser_unstem_iter():
289 """Test QueryParser unstemlist iterator.
292 stemmer = xapian.Stem('en')
294 queryparser = xapian.QueryParser()
295 expect([term for term in queryparser.unstemlist('to')], [])
296 expect([term for term in queryparser.unstemlist('question')], [])
297 expect([term for term in queryparser.unstemlist('questions')], [])
298 query = queryparser.parse_query('to question questions')
300 expect([term for term in queryparser.unstemlist('to')], [b'to'])
301 expect([term for term in queryparser.unstemlist('question')], [b'question'])
302 expect([term for term in queryparser.unstemlist('questions')], [b'questions'])
303 expect(str(query),
304 'Query((to@1 OR question@2 OR questions@3))')
307 queryparser = xapian.QueryParser()
308 queryparser.set_stemmer(stemmer)
309 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
310 expect([term for term in queryparser.unstemlist('Zto')], [])
311 expect([term for term in queryparser.unstemlist('Zquestion')], [])
312 expect([term for term in queryparser.unstemlist('Zquestions')], [])
313 query = queryparser.parse_query('to question questions')
315 expect([term for term in queryparser.unstemlist('Zto')], [b'to'])
316 expect([term for term in queryparser.unstemlist('Zquestion')], [b'question', b'questions'])
317 expect([term for term in queryparser.unstemlist('Zquestions')], [])
318 expect(str(query),
319 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
321 def test_allterms_iter():
322 """Test all-terms iterator on Database.
325 db = setup_database()
327 context("making a list of the term names and frequencies")
328 terms = []
329 freqs = []
330 for termitem in db:
331 terms.append(termitem.term)
332 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
333 freqs.append(termitem.termfreq)
334 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
336 context("checking that items are no longer valid once the iterator has moved on")
337 termitems = [termitem for termitem in db]
339 expect(len(termitems), len(terms))
340 for i in range(len(termitems)):
341 expect(termitems[i].term, terms[i])
343 expect(len(termitems), len(freqs))
344 for termitem in termitems:
345 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
347 context("checking that restricting the terms iterated with a prefix works")
348 prefix_terms = []
349 prefix_freqs = []
350 for i in range(len(terms)):
351 if terms[i].startswith(b't'):
352 prefix_terms.append(terms[i])
353 prefix_freqs.append(freqs[i])
354 i = 0
355 for termitem in db.allterms('t'):
356 expect(termitem.term, prefix_terms[i])
357 expect(termitem.termfreq, prefix_freqs[i])
358 i += 1
359 expect(len(prefix_terms), i)
361 def test_termlist_iter():
362 """Test termlist iterator on Database.
365 db = setup_database()
367 # Make lists of the item contents
368 terms = []
369 wdfs = []
370 freqs = []
371 positers = []
372 for termitem in db.termlist(3):
373 terms.append(termitem.term)
374 wdfs.append(termitem.wdf)
375 freqs.append(termitem.termfreq)
376 positers.append([pos for pos in termitem.positer])
378 expect(terms, [b'it', b'two', b'warm', b'was'])
379 expect(wdfs, [1, 2, 1, 1])
380 expect(freqs, [5, 3, 4, 4])
381 expect(positers, [[2], [], [3], [1]])
383 # Test skip_to().
384 tliter = db.termlist(3)
386 # skip to an item before the first item.
387 termitem = tliter.skip_to('a')
388 expect((termitem.term, termitem.wdf, termitem.termfreq,
389 [pos for pos in termitem.positer]), (b'it', 1, 5, [2]))
391 # skip forwards to an item.
392 termitem = tliter.skip_to('two')
393 expect((termitem.term, termitem.wdf, termitem.termfreq,
394 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
396 # skip to same place (should return same item)
397 termitem = tliter.skip_to('two')
398 expect((termitem.term, termitem.wdf, termitem.termfreq,
399 [pos for pos in termitem.positer]), (b'two', 2, 3, []))
401 # next() after a skip_to(), should return next item.
402 termitem = next(tliter)
403 expect((termitem.term, termitem.wdf, termitem.termfreq,
404 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
406 # skip to same place (should return same item)
407 termitem = tliter.skip_to('warm')
408 expect((termitem.term, termitem.wdf, termitem.termfreq,
409 [pos for pos in termitem.positer]), (b'warm', 1, 4, [3]))
411 # skip backwards (should return same item)
412 termitem = tliter.skip_to('a')
414 # skip to after end.
415 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
416 # skip backwards (should still return StopIteration).
417 expect_exception(StopIteration, '', tliter.skip_to, 'a')
418 # next should continue to return StopIteration.
419 expect_exception(StopIteration, '', next, tliter)
422 # Make a list of the terms (so we can test if they're still valid
423 # once the iterator has moved on).
424 termitems = [termitem for termitem in db.termlist(3)]
426 expect(len(termitems), len(terms))
427 for i in range(len(termitems)):
428 expect(termitems[i].term, terms[i])
430 expect(len(termitems), len(wdfs))
431 for i in range(len(termitems)):
432 expect(termitems[i].wdf, wdfs[i])
434 expect(len(termitems), len(freqs))
435 for termitem in termitems:
436 expect_exception(xapian.InvalidOperationError,
437 'Iterator has moved, and does not support random access',
438 getattr, termitem, 'termfreq')
440 expect(len(termitems), len(freqs))
441 for termitem in termitems:
442 expect_exception(xapian.InvalidOperationError,
443 'Iterator has moved, and does not support random access',
444 getattr, termitem, 'positer')
446 def test_dbdocument_iter():
447 """Test document terms iterator for document taken from a database.
450 db = setup_database()
452 doc = db.get_document(3)
454 # Make lists of the item contents
455 terms = []
456 wdfs = []
457 freqs = []
458 positers = []
459 for termitem in doc:
460 terms.append(termitem.term)
461 wdfs.append(termitem.wdf)
462 freqs.append(termitem.termfreq)
463 positers.append([pos for pos in termitem.positer])
465 expect(terms, [b'it', b'two', b'warm', b'was'])
466 expect(wdfs, [1, 2, 1, 1])
467 expect(freqs, [5, 3, 4, 4])
468 expect(positers, [[2], [], [3], [1]])
470 # Make a list of the terms (so we can test if they're still valid
471 # once the iterator has moved on).
472 termitems = [termitem for termitem in doc]
474 expect(len(termitems), len(terms))
475 for i in range(len(termitems)):
476 expect(termitems[i].term, terms[i])
478 expect(len(termitems), len(wdfs))
479 for i in range(len(termitems)):
480 expect(termitems[i].wdf, wdfs[i])
482 expect(len(termitems), len(freqs))
483 for termitem in termitems:
484 expect_exception(xapian.InvalidOperationError,
485 'Iterator has moved, and does not support random access',
486 getattr, termitem, 'termfreq')
488 expect(len(termitems), len(freqs))
489 for termitem in termitems:
490 expect_exception(xapian.InvalidOperationError,
491 'Iterator has moved, and does not support random access',
492 getattr, termitem, 'positer')
494 def test_newdocument_iter():
495 """Test document terms iterator for newly created document.
498 doc = xapian.Document()
499 doc.set_data("was it warm? two")
500 doc.add_posting("was", 1)
501 doc.add_posting("it", 2)
502 doc.add_posting("warm", 3)
503 doc.add_term("two", 2)
505 # Make lists of the item contents
506 terms = []
507 wdfs = []
508 positers = []
509 for termitem in doc:
510 terms.append(termitem.term)
511 wdfs.append(termitem.wdf)
512 expect_exception(xapian.InvalidOperationError,
513 "get_termfreq() not valid for a TermIterator from a "
514 "Document which is not associated with a database",
515 getattr, termitem, 'termfreq')
516 positers.append([pos for pos in termitem.positer])
518 expect(terms, [b'it', b'two', b'warm', b'was'])
519 expect(wdfs, [1, 2, 1, 1])
520 expect(positers, [[2], [], [3], [1]])
522 # Make a list of the terms (so we can test if they're still valid
523 # once the iterator has moved on).
524 termitems = [termitem for termitem in doc]
526 expect(len(termitems), len(terms))
527 for i in range(len(termitems)):
528 expect(termitems[i].term, terms[i])
530 expect(len(termitems), len(wdfs))
531 for i in range(len(termitems)):
532 expect(termitems[i].wdf, wdfs[i])
534 for termitem in termitems:
535 expect_exception(xapian.InvalidOperationError,
536 'Iterator has moved, and does not support random access',
537 getattr, termitem, 'termfreq')
539 expect(len(termitems), len(positers))
540 for termitem in termitems:
541 expect_exception(xapian.InvalidOperationError,
542 'Iterator has moved, and does not support random access',
543 getattr, termitem, 'positer')
545 def test_postinglist_iter():
546 """Test postinglist iterator on Database.
549 db = setup_database()
551 # Make lists of the item contents
552 docids = []
553 doclengths = []
554 wdfs = []
555 positers = []
556 for posting in db.postlist('it'):
557 docids.append(posting.docid)
558 doclengths.append(posting.doclength)
559 wdfs.append(posting.wdf)
560 positers.append([pos for pos in posting.positer])
562 expect(docids, [1, 2, 3, 4, 5])
563 expect(doclengths, [3, 3, 5, 8, 19])
564 expect(wdfs, [1, 1, 1, 1, 8])
565 expect(positers, [[1], [2], [2], [2], [2, 7]])
567 # Test skip_to().
568 pliter = db.postlist('it')
570 # skip to an item before the first item.
571 posting = pliter.skip_to(0)
572 expect((posting.docid, posting.doclength, posting.wdf,
573 [pos for pos in posting.positer]), (1, 3, 1, [1]))
575 # skip forwards to an item.
576 posting = pliter.skip_to(3)
577 expect((posting.docid, posting.doclength, posting.wdf,
578 [pos for pos in posting.positer]), (3, 5, 1, [2]))
580 # skip to same place (should return same item)
581 posting = pliter.skip_to(3)
582 expect((posting.docid, posting.doclength, posting.wdf,
583 [pos for pos in posting.positer]), (3, 5, 1, [2]))
585 # next() after a skip_to(), should return next item.
586 posting = next(pliter)
587 expect((posting.docid, posting.doclength, posting.wdf,
588 [pos for pos in posting.positer]), (4, 8, 1, [2]))
590 # skip to same place (should return same item)
591 posting = pliter.skip_to(4)
592 expect((posting.docid, posting.doclength, posting.wdf,
593 [pos for pos in posting.positer]), (4, 8, 1, [2]))
595 # skip backwards (should return same item)
596 posting = pliter.skip_to(2)
597 expect((posting.docid, posting.doclength, posting.wdf,
598 [pos for pos in posting.positer]), (4, 8, 1, [2]))
600 # skip to after end.
601 expect_exception(StopIteration, '', pliter.skip_to, 6)
602 # skip backwards (should still return StopIteration).
603 expect_exception(StopIteration, '', pliter.skip_to, 6)
604 # next should continue to return StopIteration.
605 expect_exception(StopIteration, '', next, pliter)
608 # Make a list of the postings (so we can test if they're still valid once
609 # the iterator has moved on).
610 postings = [posting for posting in db.postlist('it')]
612 expect(len(postings), len(docids))
613 for i in range(len(postings)):
614 expect(postings[i].docid, docids[i])
616 expect(len(postings), len(doclengths))
617 for i in range(len(postings)):
618 expect(postings[i].doclength, doclengths[i])
620 expect(len(postings), len(wdfs))
621 for i in range(len(postings)):
622 expect(postings[i].wdf, wdfs[i])
624 expect(len(postings), len(positers))
625 for posting in postings:
626 expect_exception(xapian.InvalidOperationError,
627 'Iterator has moved, and does not support random access',
628 getattr, posting, 'positer')
630 def test_valuestream_iter():
631 """Test a valuestream iterator on Database.
634 db = setup_database()
636 # Check basic iteration
637 expect([(item.docid, item.value) for item in db.valuestream(0)],
638 [(3, b'\xa4'), (4, b'\xa2'), (5, b'\xa4')])
639 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
640 expect([(item.docid, item.value) for item in db.valuestream(5)],
641 [(5, b"five")])
642 expect([(item.docid, item.value) for item in db.valuestream(9)],
643 [(5, b"nine")])
645 # Test skip_to() on iterator with no values, and behaviours when called
646 # after already returning StopIteration.
647 i = db.valuestream(1)
648 expect_exception(StopIteration, "", i.skip_to, 1)
649 expect_exception(StopIteration, "", i.skip_to, 1)
650 i = db.valuestream(1)
651 expect_exception(StopIteration, "", i.skip_to, 1)
652 expect_exception(StopIteration, "", i.__next__)
653 i = db.valuestream(1)
654 expect_exception(StopIteration, "", i.__next__)
655 expect_exception(StopIteration, "", i.skip_to, 1)
657 # Test that skipping to a value works, and that skipping doesn't have to
658 # advance.
659 i = db.valuestream(0)
660 item = i.skip_to(4)
661 expect((item.docid, item.value), (4, b'\xa2'))
662 item = i.skip_to(4)
663 expect((item.docid, item.value), (4, b'\xa2'))
664 item = i.skip_to(1)
665 expect((item.docid, item.value), (4, b'\xa2'))
666 item = i.skip_to(5)
667 expect((item.docid, item.value), (5, b'\xa4'))
668 expect_exception(StopIteration, "", i.skip_to, 6)
670 # Test that alternating skip_to() and next() works.
671 i = db.valuestream(0)
672 item = next(i)
673 expect((item.docid, item.value), (3, b'\xa4'))
674 item = i.skip_to(4)
675 expect((item.docid, item.value), (4, b'\xa2'))
676 item = next(i)
677 expect((item.docid, item.value), (5, b'\xa4'))
678 expect_exception(StopIteration, "", i.skip_to, 6)
680 # Test that next works correctly after skip_to() called with an earlier
681 # item.
682 i = db.valuestream(0)
683 item = i.skip_to(4)
684 expect((item.docid, item.value), (4, b'\xa2'))
685 item = i.skip_to(1)
686 expect((item.docid, item.value), (4, b'\xa2'))
687 item = next(i)
688 expect((item.docid, item.value), (5, b'\xa4'))
690 # Test that next works correctly after skipping to last item
691 i = db.valuestream(0)
692 item = i.skip_to(5)
693 expect((item.docid, item.value), (5, b'\xa4'))
694 expect_exception(StopIteration, "", i.__next__)
696 def test_position_iter():
697 """Test position iterator for a document in a database.
700 db = setup_database()
702 doc = db.get_document(5)
704 # Make lists of the item contents
705 positions = [position for position in db.positionlist(5, 'it')]
707 expect(positions, [2, 7])
709 def test_value_iter():
710 """Test iterators over list of values in a document.
713 db = setup_database()
714 doc = db.get_document(5)
716 items = list(doc.values())
717 expect(len(items), 3)
718 expect(items[0].num, 0)
719 expect(items[0].value, xapian.sortable_serialise(2))
720 expect(items[1].num, 5)
721 expect(items[1].value, b'five')
722 expect(items[2].num, 9)
723 expect(items[2].value, b'nine')
725 def test_synonyms_iter():
726 """Test iterators over list of synonyms in a database.
729 dbpath = 'db_test_synonyms_iter'
730 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
732 db.add_synonym('hello', 'hi')
733 db.add_synonym('hello', 'howdy')
735 expect([item for item in db.synonyms('foo')], [])
736 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
737 expect([item for item in db.synonym_keys()], [b'hello'])
738 expect([item for item in db.synonym_keys('foo')], [])
739 expect([item for item in db.synonym_keys('he')], [b'hello'])
740 expect([item for item in db.synonym_keys('hello')], [b'hello'])
742 dbr=xapian.Database(dbpath)
743 expect([item for item in dbr.synonyms('foo')], [])
744 expect([item for item in dbr.synonyms('hello')], [])
745 expect([item for item in dbr.synonym_keys()], [])
746 expect([item for item in dbr.synonym_keys('foo')], [])
747 expect([item for item in dbr.synonym_keys('he')], [])
748 expect([item for item in dbr.synonym_keys('hello')], [])
750 db.commit()
752 expect([item for item in db.synonyms('foo')], [])
753 expect([item for item in db.synonyms('hello')], [b'hi', b'howdy'])
754 expect([item for item in db.synonym_keys()], [b'hello'])
755 expect([item for item in db.synonym_keys('foo')], [])
756 expect([item for item in db.synonym_keys('he')], [b'hello'])
757 expect([item for item in db.synonym_keys('hello')], [b'hello'])
759 dbr=xapian.Database(dbpath)
760 expect([item for item in dbr.synonyms('foo')] , [])
761 expect([item for item in dbr.synonyms('hello')], [b'hi', b'howdy'])
762 expect([item for item in dbr.synonym_keys()], [b'hello'])
763 expect([item for item in dbr.synonym_keys('foo')], [])
764 expect([item for item in dbr.synonym_keys('he')], [b'hello'])
765 expect([item for item in dbr.synonym_keys('hello')], [b'hello'])
767 db.close()
768 expect(xapian.Database.check(dbpath), 0)
769 dbr.close()
770 shutil.rmtree(dbpath)
772 def test_metadata_keys_iter():
773 """Test iterators over list of metadata keys in a database.
776 dbpath = 'db_test_metadata_iter'
777 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
779 db.set_metadata('author', 'richard')
780 db.set_metadata('item1', 'hello')
781 db.set_metadata('item1', 'hi')
782 db.set_metadata('item2', 'howdy')
783 db.set_metadata('item3', '')
784 db.set_metadata('item4', 'goodbye')
785 db.set_metadata('item4', '')
786 db.set_metadata('type', 'greeting')
788 expect([item for item in db.metadata_keys()],
789 [b'author', b'item1', b'item2', b'type'])
790 expect([item for item in db.metadata_keys('foo')], [])
791 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
792 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
793 expect([item for item in db.metadata_keys('type')], [b'type'])
795 dbr=xapian.Database(dbpath)
796 expect([item for item in dbr.metadata_keys()], [])
797 expect([item for item in dbr.metadata_keys('foo')], [])
798 expect([item for item in dbr.metadata_keys('item')], [])
799 expect([item for item in dbr.metadata_keys('it')], [])
800 expect([item for item in dbr.metadata_keys('type')], [])
802 db.commit()
803 expect([item for item in db.metadata_keys()],
804 [b'author', b'item1', b'item2', b'type'])
805 expect([item for item in db.metadata_keys('foo')], [])
806 expect([item for item in db.metadata_keys('item')], [b'item1', b'item2'])
807 expect([item for item in db.metadata_keys('it')], [b'item1', b'item2'])
808 expect([item for item in db.metadata_keys('type')], [b'type'])
810 dbr=xapian.Database(dbpath)
811 expect([item for item in dbr.metadata_keys()],
812 [b'author', b'item1', b'item2', b'type'])
813 expect([item for item in dbr.metadata_keys('foo')], [])
814 expect([item for item in dbr.metadata_keys('item')], [b'item1', b'item2'])
815 expect([item for item in dbr.metadata_keys('it')], [b'item1', b'item2'])
816 expect([item for item in dbr.metadata_keys('type')], [b'type'])
818 db.close()
819 expect(xapian.Database.check(dbpath), 0)
820 dbr.close()
821 shutil.rmtree(dbpath)
823 def test_spell():
824 """Test basic spelling correction features.
827 dbpath = 'db_test_spell'
828 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
830 db.add_spelling('hello')
831 db.add_spelling('mell', 2)
832 expect(db.get_spelling_suggestion('hell'), b'mell')
833 expect([(item.term, item.termfreq) for item in db.spellings()], [(b'hello', 1), (b'mell', 2)])
834 dbr=xapian.Database(dbpath)
835 expect(dbr.get_spelling_suggestion('hell'), b'')
836 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
837 db.commit()
838 dbr=xapian.Database(dbpath)
839 expect(db.get_spelling_suggestion('hell'), b'mell')
840 expect(dbr.get_spelling_suggestion('hell'), b'mell')
841 expect([(item.term, item.termfreq) for item in dbr.spellings()], [(b'hello', 1), (b'mell', 2)])
843 db.close()
844 expect(xapian.Database.check(dbpath), 0)
845 dbr.close()
846 shutil.rmtree(dbpath)
848 def test_queryparser_custom_vrp():
849 """Test QueryParser with a custom (in python) ValueRangeProcessor.
852 class MyVRP(xapian.ValueRangeProcessor):
853 def __init__(self):
854 xapian.ValueRangeProcessor.__init__(self)
856 def __call__(self, begin, end):
857 return (7, "A"+begin, "B"+end)
859 queryparser = xapian.QueryParser()
860 myvrp = MyVRP()
862 queryparser.add_valuerangeprocessor(myvrp)
863 query = queryparser.parse_query('5..8')
865 expect(str(query),
866 'Query(VALUE_RANGE 7 A5 B8)')
868 def test_queryparser_custom_vrp_deallocation():
869 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
872 class MyVRP(xapian.ValueRangeProcessor):
873 def __init__(self):
874 xapian.ValueRangeProcessor.__init__(self)
876 def __call__(self, begin, end):
877 return (7, "A"+begin, "B"+end)
879 def make_parser():
880 queryparser = xapian.QueryParser()
881 myvrp = MyVRP()
882 queryparser.add_valuerangeprocessor(myvrp)
883 return queryparser
885 queryparser = make_parser()
886 query = queryparser.parse_query('5..8')
888 expect(str(query),
889 'Query(VALUE_RANGE 7 A5 B8)')
891 def test_queryparser_custom_rp():
892 """Test QueryParser with a custom (in python) RangeProcessor.
895 class MyRP(xapian.RangeProcessor):
896 def __init__(self):
897 xapian.RangeProcessor.__init__(self)
899 def __call__(self, begin, end):
900 begin = "A" + begin.decode('utf-8')
901 end = "B" + end.decode('utf-8')
902 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
904 queryparser = xapian.QueryParser()
905 myrp = MyRP()
907 queryparser.add_rangeprocessor(myrp)
908 query = queryparser.parse_query('5..8')
910 expect(str(query),
911 'Query(VALUE_RANGE 7 A5 B8)')
913 def test_queryparser_custom_rp_deallocation():
914 """Test that QueryParser doesn't delete RangeProcessors too soon.
917 class MyRP(xapian.RangeProcessor):
918 def __init__(self):
919 xapian.RangeProcessor.__init__(self)
921 def __call__(self, begin, end):
922 begin = "A" + begin.decode('utf-8')
923 end = "B" + end.decode('utf-8')
924 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
926 def make_parser():
927 queryparser = xapian.QueryParser()
928 myrp = MyRP()
929 queryparser.add_rangeprocessor(myrp)
930 return queryparser
932 queryparser = make_parser()
933 query = queryparser.parse_query('5..8')
935 expect(str(query),
936 'Query(VALUE_RANGE 7 A5 B8)')
938 def test_scale_weight():
939 """Test query OP_SCALE_WEIGHT feature.
942 db = setup_database()
943 for mult in (0, 1, 2.5):
944 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
945 mult)
946 query1 = xapian.Query("it")
947 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
949 enquire = xapian.Enquire(db)
950 enquire.set_query(query1)
951 mset1 = enquire.get_mset(0, 10)
952 enquire.set_query(query2)
953 mset2 = enquire.get_mset(0, 10)
954 if mult <= 0:
955 expected = [(0, item.docid) for item in mset1]
956 expected.sort()
957 else:
958 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
959 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
961 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
962 query1 = xapian.Query("it")
963 expect_exception(xapian.InvalidArgumentError,
964 "OP_SCALE_WEIGHT requires factor >= 0",
965 xapian.Query,
966 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
969 def test_weight_normalise():
970 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
972 This test first runs a search (asking for no results) to get the maximum
973 possible weight for a query, and then checks that the results of
974 MSet.get_max_possible() match this.
976 This tests that the get_max_possible() value is correct (though it isn't
977 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
978 compensate correctly.
981 db = setup_database()
982 for query in (
983 "it",
984 "was",
985 "it was",
986 "it was four",
987 "it was four five",
988 "\"was it warm\" four notpresent",
989 "notpresent",
991 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
992 qp = xapian.QueryParser()
993 query1 = qp.parse_query(query)
994 enquire = xapian.Enquire(db)
995 enquire.set_query(query1)
996 mset1 = enquire.get_mset(0, 0)
998 # Check the max_attained value is 0 - this gives us some reassurance
999 # that the match didn't actually do the work of calculating any
1000 # results.
1001 expect(mset1.get_max_attained(), 0)
1003 max_possible = mset1.get_max_possible()
1004 if query == "notpresent":
1005 expect(max_possible, 0)
1006 continue
1007 mult = 1.0 / max_possible
1008 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
1010 enquire = xapian.Enquire(db)
1011 enquire.set_query(query2)
1012 mset2 = enquire.get_mset(0, 10)
1013 # max_possible should be 1 (excluding rounding errors) for mset2
1014 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
1015 for item in mset2:
1016 expect(item.weight > 0, True)
1017 expect(item.weight <= 1, True)
1020 def test_valuesetmatchdecider():
1021 """Simple tests of the ValueSetMatchDecider class
1024 md = xapian.ValueSetMatchDecider(0, True)
1025 doc = xapian.Document()
1026 expect(md(doc), False)
1028 md.add_value('foo')
1029 doc.add_value(0, 'foo')
1030 expect(md(doc), True)
1032 md.remove_value('foo')
1033 expect(md(doc), False)
1035 md = xapian.ValueSetMatchDecider(0, False)
1036 expect(md(doc), True)
1038 md.add_value('foo')
1039 expect(md(doc), False)
1042 def test_postingsource():
1043 """Simple test of the PostingSource class.
1046 class OddPostingSource(xapian.PostingSource):
1047 def __init__(self, max):
1048 xapian.PostingSource.__init__(self)
1049 self.max = max
1051 def init(self, db):
1052 self.current = -1
1053 self.weight = db.get_doccount() + 1
1054 self.set_maxweight(self.weight)
1056 def get_termfreq_min(self): return 0
1057 def get_termfreq_est(self): return int(self.max / 2)
1058 def get_termfreq_max(self): return self.max
1059 def __next__(self, minweight):
1060 self.current += 2
1061 self.weight -= 1.0
1062 self.set_maxweight(self.weight)
1063 def at_end(self): return self.current > self.max
1064 def get_docid(self): return self.current
1065 def get_weight(self): return self.weight
1067 dbpath = 'db_test_postingsource'
1068 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1069 for id in range(10):
1070 doc = xapian.Document()
1071 db.add_document(doc)
1073 # Do a dance to check that the posting source doesn't get dereferenced too
1074 # soon in various cases.
1075 def mkenq(db):
1076 # First - check that it's kept when the source goes out of scope.
1077 def mkquery():
1078 source = OddPostingSource(10)
1079 # The posting source is inside a list to check that case is
1080 # correctly handled.
1081 return xapian.Query(xapian.Query.OP_OR,
1082 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1084 # Check that it's kept when the query goes out of scope.
1085 def submkenq():
1086 query = mkquery()
1087 enquire = xapian.Enquire(db)
1088 enquire.set_query(query)
1089 return enquire
1091 # Check it's kept when the query is retrieved from enquire and put into
1092 # a new enquire.
1093 def submkenq2():
1094 enq1 = submkenq()
1095 enquire = xapian.Enquire(db)
1096 enquire.set_query(enq1.get_query())
1097 return enquire
1099 return submkenq2()
1101 enquire = mkenq(db)
1102 mset = enquire.get_mset(0, 10)
1104 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1105 expect(mset[0].weight, db.get_doccount())
1107 db.close()
1108 expect(xapian.Database.check(dbpath), 0)
1109 shutil.rmtree(dbpath)
1111 def test_postingsource2():
1112 """Simple test of the PostingSource class.
1115 dbpath = 'db_test_postingsource2'
1116 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1117 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1118 for id in range(10):
1119 doc = xapian.Document()
1120 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1121 db.add_document(doc)
1123 source = xapian.ValueWeightPostingSource(1)
1124 query = xapian.Query(source)
1125 del source # Check that query keeps a reference to it.
1127 enquire = xapian.Enquire(db)
1128 enquire.set_query(query)
1129 mset = enquire.get_mset(0, 10)
1131 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1133 db.close()
1134 shutil.rmtree(dbpath)
1136 def test_postingsource3():
1137 """Test that ValuePostingSource can be usefully subclassed.
1140 dbpath = 'db_test_postingsource3'
1141 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1142 vals = (1, 3, 2, 4)
1143 for wt in vals:
1144 doc = xapian.Document()
1145 doc.add_value(1, xapian.sortable_serialise(wt))
1146 db.add_document(doc)
1148 class PyValuePostingSource(xapian.ValuePostingSource):
1149 def __init__(self, slot):
1150 xapian.ValuePostingSource.__init__(self, slot)
1152 def init(self, db):
1153 xapian.ValuePostingSource.init(self, db)
1154 self.current = -1
1155 slot = self.get_slot()
1156 ub = db.get_value_upper_bound(slot)
1157 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1159 def next(self, minweight):
1160 return xapian.ValuePostingSource.next(self, minweight)
1161 def get_weight(self):
1162 value = self.get_value()
1163 return xapian.sortable_unserialise(value) ** 3
1165 source = PyValuePostingSource(1)
1166 query = xapian.Query(source)
1167 #del source # Check that query keeps a reference to it.
1169 enquire = xapian.Enquire(db)
1170 enquire.set_query(query)
1171 mset = enquire.get_mset(0, 10)
1173 expect([item.docid for item in mset], [4, 2, 3, 1])
1175 db.close()
1176 expect(xapian.Database.check(dbpath), 0)
1177 shutil.rmtree(dbpath)
1179 def test_value_stats():
1180 """Simple test of being able to get value statistics.
1183 dbpath = 'db_test_value_stats'
1184 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1186 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1187 for id in range(10):
1188 doc = xapian.Document()
1189 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1190 db.add_document(doc)
1192 expect(db.get_value_freq(0), 0)
1193 expect(db.get_value_lower_bound(0), b"")
1194 expect(db.get_value_upper_bound(0), b"")
1195 expect(db.get_value_freq(1), 10)
1196 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1197 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1198 expect(db.get_value_freq(2), 0)
1199 expect(db.get_value_lower_bound(2), b"")
1200 expect(db.get_value_upper_bound(2), b"")
1202 db.close()
1203 expect(xapian.Database.check(dbpath), 0)
1204 shutil.rmtree(dbpath)
1206 def test_get_uuid():
1207 """Test getting UUIDs from databases.
1210 dbpath = 'db_test_get_uuid'
1211 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1212 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1213 dbr1 = xapian.Database(dbpath + "1")
1214 dbr2 = xapian.Database(dbpath + "2")
1215 expect(db1.get_uuid() != db2.get_uuid(), True)
1216 expect(db1.get_uuid(), dbr1.get_uuid())
1217 expect(db2.get_uuid(), dbr2.get_uuid())
1219 db = xapian.Database()
1220 db.add_database(db1)
1221 expect(db1.get_uuid(), db.get_uuid())
1223 db1.close()
1224 db2.close()
1225 dbr1.close()
1226 dbr2.close()
1227 db.close()
1228 shutil.rmtree(dbpath + "1")
1229 shutil.rmtree(dbpath + "2")
1231 def test_director_exception():
1232 """Test handling of an exception raised in a director.
1235 db = setup_database()
1236 query = xapian.Query('it')
1237 enq = xapian.Enquire(db)
1238 enq.set_query(query)
1239 class TestException(Exception):
1240 def __init__(self, a, b):
1241 Exception.__init__(self, a + b)
1243 rset = xapian.RSet()
1244 rset.add_document(1)
1245 class EDecider(xapian.ExpandDecider):
1246 def __call__(self, term):
1247 raise TestException("foo", "bar")
1248 edecider = EDecider()
1249 expect_exception(TestException, "foobar", edecider, "foo")
1250 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1252 class MDecider(xapian.MatchDecider):
1253 def __call__(self, doc):
1254 raise TestException("foo", "bar")
1255 mdecider = MDecider()
1256 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1257 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1259 def check_vals(db, vals):
1260 """Check that the values in slot 1 are as in vals.
1263 for docid in range(1, db.get_lastdocid() + 1):
1264 val = db.get_document(docid).get_value(1)
1265 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1267 def test_value_mods():
1268 """Test handling of modifications to values.
1271 dbpath = 'db_test_value_mods'
1272 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1273 random.seed(42)
1274 doccount = 1000
1275 vals = {}
1277 # Add a value to all the documents
1278 for num in range(1, doccount):
1279 doc=xapian.Document()
1280 val = ('val%d' % num).encode('utf-8')
1281 doc.add_value(1, val)
1282 db.add_document(doc)
1283 vals[num] = val
1284 db.commit()
1285 check_vals(db, vals)
1287 # Modify one of the values (this is a regression test which failed with the
1288 # initial implementation of streaming values).
1289 doc = xapian.Document()
1290 val = b'newval0'
1291 doc.add_value(1, val)
1292 db.replace_document(2, doc)
1293 vals[2] = val
1294 db.commit()
1295 check_vals(db, vals)
1297 # Do some random modifications.
1298 for count in range(1, doccount * 2):
1299 docid = random.randint(1, doccount)
1300 doc = xapian.Document()
1302 if count % 5 == 0:
1303 val = b''
1304 else:
1305 val = ('newval%d' % count).encode('utf-8')
1306 doc.add_value(1, val)
1307 db.replace_document(docid, doc)
1308 vals[docid] = val
1310 # Check the values before and after modification.
1311 check_vals(db, vals)
1312 db.commit()
1313 check_vals(db, vals)
1315 # Delete all the values which are non-empty, in a random order.
1316 keys = [key for key, val in vals.items() if val != '']
1317 random.shuffle(keys)
1318 for key in keys:
1319 doc = xapian.Document()
1320 db.replace_document(key, doc)
1321 vals[key] = b''
1322 check_vals(db, vals)
1323 db.commit()
1324 check_vals(db, vals)
1326 db.close()
1327 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1328 shutil.rmtree(dbpath)
1330 def test_serialise_document():
1331 """Test serialisation of documents.
1334 doc = xapian.Document()
1335 doc.add_term('foo', 2)
1336 doc.add_value(1, b'bar')
1337 doc.set_data('baz')
1338 s = doc.serialise()
1339 doc2 = xapian.Document.unserialise(s)
1340 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1341 expect(len(list(doc.termlist())), 1)
1342 expect([(item.term, item.wdf) for item in doc.termlist()],
1343 [(item.term, item.wdf) for item in doc2.termlist()])
1344 expect([(item.num, item.value) for item in list(doc.values())],
1345 [(item.num, item.value) for item in list(doc2.values())])
1346 expect(doc.get_data(), doc2.get_data())
1347 expect(doc.get_data(), b'baz')
1349 db = setup_database()
1350 doc = db.get_document(1)
1351 s = doc.serialise()
1352 doc2 = xapian.Document.unserialise(s)
1353 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1354 expect(len(list(doc.termlist())), 3)
1355 expect([(item.term, item.wdf) for item in doc.termlist()],
1356 [(item.term, item.wdf) for item in doc2.termlist()])
1357 expect([(item.num, item.value) for item in list(doc.values())],
1358 [(item.num, item.value) for item in list(doc2.values())])
1359 expect(doc.get_data(), doc2.get_data())
1360 expect(doc.get_data(), b'is it cold?')
1362 def test_serialise_query():
1363 """Test serialisation of queries.
1366 q = xapian.Query()
1367 q2 = xapian.Query.unserialise(q.serialise())
1368 expect(str(q), str(q2))
1369 expect(str(q), 'Query()')
1371 q = xapian.Query('hello')
1372 q2 = xapian.Query.unserialise(q.serialise())
1373 expect(str(q), str(q2))
1374 expect(str(q), 'Query(hello)')
1376 q = xapian.Query(xapian.Query.OP_OR, ('hello', b'world'))
1377 q2 = xapian.Query.unserialise(q.serialise())
1378 expect(str(q), str(q2))
1379 expect(str(q), 'Query((hello OR world))')
1381 def test_preserve_query_parser_stopper():
1382 """Test preservation of stopper set on query parser.
1385 def make_qp():
1386 queryparser = xapian.QueryParser()
1387 stopper = xapian.SimpleStopper()
1388 stopper.add('to')
1389 stopper.add('not')
1390 queryparser.set_stopper(stopper)
1391 del stopper
1392 return queryparser
1393 queryparser = make_qp()
1394 query = queryparser.parse_query('to be')
1395 expect([term for term in queryparser.stoplist()], [b'to'])
1397 def test_preserve_term_generator_stopper():
1398 """Test preservation of stopper set on term generator.
1401 def make_tg():
1402 termgen = xapian.TermGenerator()
1403 termgen.set_stemmer(xapian.Stem('en'))
1404 stopper = xapian.SimpleStopper()
1405 stopper.add('to')
1406 stopper.add('not')
1407 termgen.set_stopper(stopper)
1408 del stopper
1409 return termgen
1410 termgen = make_tg()
1412 termgen.index_text('to be')
1413 doc = termgen.get_document()
1414 terms = [term.term for term in doc.termlist()]
1415 terms.sort()
1416 expect(terms, [b'Zbe', b'be', b'to'])
1418 def test_preserve_enquire_sorter():
1419 """Test preservation of sorter set on enquire.
1422 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1423 doc = xapian.Document()
1424 doc.add_term('foo')
1425 doc.add_value(1, '1')
1426 db.add_document(doc)
1427 db.add_document(doc)
1429 def make_enq1(db):
1430 enq = xapian.Enquire(db)
1431 sorter = xapian.MultiValueKeyMaker()
1432 enq.set_sort_by_key(sorter, False)
1433 del sorter
1434 return enq
1435 enq = make_enq1(db)
1436 enq.set_query(xapian.Query('foo'))
1437 enq.get_mset(0, 10)
1439 def make_enq2(db):
1440 enq = xapian.Enquire(db)
1441 sorter = xapian.MultiValueKeyMaker()
1442 enq.set_sort_by_key_then_relevance(sorter, False)
1443 del sorter
1444 return enq
1445 enq = make_enq2(db)
1446 enq.set_query(xapian.Query('foo'))
1447 enq.get_mset(0, 10)
1449 def make_enq3(db):
1450 enq = xapian.Enquire(db)
1451 sorter = xapian.MultiValueKeyMaker()
1452 enq.set_sort_by_relevance_then_key(sorter, False)
1453 del sorter
1454 return enq
1455 enq = make_enq3(db)
1456 enq.set_query(xapian.Query('foo'))
1457 enq.get_mset(0, 10)
1459 def test_matchspy():
1460 """Test use of matchspies.
1463 db = setup_database()
1464 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1465 enq = xapian.Enquire(db)
1466 enq.set_query(query)
1468 def set_matchspy_deref(enq):
1469 """Set a matchspy, and then drop the reference, to check that it
1470 doesn't get deleted too soon.
1472 spy = xapian.ValueCountMatchSpy(0)
1473 enq.add_matchspy(spy)
1474 del spy
1475 set_matchspy_deref(enq)
1476 mset = enq.get_mset(0, 10)
1477 expect(len(mset), 5)
1479 spy = xapian.ValueCountMatchSpy(0)
1480 enq.add_matchspy(spy)
1481 # Regression test for clear_matchspies() - used to always raise an
1482 # exception due to a copy and paste error in its definition.
1483 enq.clear_matchspies()
1484 mset = enq.get_mset(0, 10)
1485 expect([item for item in list(spy.values())], [])
1487 enq.add_matchspy(spy)
1488 mset = enq.get_mset(0, 10)
1489 expect(spy.get_total(), 5)
1490 expect([(item.term, item.termfreq) for item in list(spy.values())], [
1491 (xapian.sortable_serialise(1.5), 1),
1492 (xapian.sortable_serialise(2), 2),
1494 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1495 (xapian.sortable_serialise(2), 2),
1496 (xapian.sortable_serialise(1.5), 1),
1499 def test_import_star():
1500 """Test that "from xapian import *" works.
1502 This is a regression test - this failed in the 1.2.0 release.
1503 It's not normally good style to use it, but it should work anyway!
1506 import test_xapian_star
1508 def test_latlongcoords_iter():
1509 """Test LatLongCoordsIterator wrapping.
1512 coords = xapian.LatLongCoords()
1513 expect([c for c in coords], [])
1514 coords.append(xapian.LatLongCoord(0, 0))
1515 coords.append(xapian.LatLongCoord(0, 1))
1516 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1517 'Xapian::LatLongCoord(0, 1)'])
1520 def test_compactor():
1521 """Test that xapian.Compactor works.
1524 tmpdir = tempfile.mkdtemp()
1525 db1 = db2 = db3 = None
1526 try:
1527 db1path = os.path.join(tmpdir, 'db1')
1528 db2path = os.path.join(tmpdir, 'db2')
1529 db3path = os.path.join(tmpdir, 'db3')
1531 # Set up a couple of sample input databases
1532 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1533 doc1 = xapian.Document()
1534 doc1.add_term('Hello')
1535 doc1.add_term('Hello1')
1536 doc1.add_value(0, 'Val1')
1537 db1.set_metadata('key', '1')
1538 db1.set_metadata('key1', '1')
1539 db1.add_document(doc1)
1540 db1.commit()
1542 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1543 doc2 = xapian.Document()
1544 doc2.add_term('Hello')
1545 doc2.add_term('Hello2')
1546 doc2.add_value(0, 'Val2')
1547 db2.set_metadata('key', '2')
1548 db2.set_metadata('key2', '2')
1549 db2.add_document(doc2)
1550 db2.commit()
1552 db_to_compact = xapian.Database()
1553 db_to_compact.add_database(xapian.Database(db1path))
1554 db_to_compact.add_database(xapian.Database(db2path))
1555 # Compact with the default compactor
1556 # Metadata conflicts are resolved by picking the first value
1557 db_to_compact.compact(db3path)
1559 db3 = xapian.Database(db3path)
1560 expect([(item.term, item.termfreq) for item in db3.allterms()],
1561 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1562 expect(db3.get_document(1).get_value(0), b'Val1')
1563 expect(db3.get_document(2).get_value(0), b'Val2')
1564 expect(db3.get_metadata('key'), b'1')
1565 expect(db3.get_metadata('key1'), b'1')
1566 expect(db3.get_metadata('key2'), b'2')
1568 context("testing a custom compactor which merges duplicate metadata")
1569 class MyCompactor(xapian.Compactor):
1570 def __init__(self):
1571 xapian.Compactor.__init__(self)
1572 self.log = []
1574 def set_status(self, table, status):
1575 if len(status) == 0:
1576 self.log.append('Starting %s' % table.decode('utf-8'))
1577 else:
1578 self.log.append('%s: %s' % (table.decode('utf-8'), status.decode('utf-8')))
1580 def resolve_duplicate_metadata(self, key, vals):
1581 return b','.join(vals)
1583 c = MyCompactor()
1584 db_to_compact = xapian.Database()
1585 db_to_compact.add_database(xapian.Database(db1path))
1586 db_to_compact.add_database(xapian.Database(db2path))
1587 db_to_compact.compact(db3path, 0, 0, c)
1588 log = '\n'.join(c.log)
1589 # Check we got some messages in the log
1590 expect('Starting postlist' in log, True)
1592 db3 = xapian.Database(db3path)
1593 expect([(item.term, item.termfreq) for item in db3.allterms()],
1594 [(b'Hello', 2), (b'Hello1', 1), (b'Hello2', 1)])
1595 expect(db3.get_metadata('key'), b'1,2')
1596 expect(db3.get_metadata('key1'), b'1')
1597 expect(db3.get_metadata('key2'), b'2')
1599 finally:
1600 if db1 is not None:
1601 db1.close()
1602 if db2 is not None:
1603 db2.close()
1604 if db3 is not None:
1605 db3.close()
1607 shutil.rmtree(tmpdir)
1609 def test_custom_matchspy():
1610 class MSpy(xapian.MatchSpy):
1611 def __init__(self):
1612 xapian.MatchSpy.__init__(self)
1613 self.count = 0
1615 def __call__(self, doc, weight):
1616 self.count += 1
1618 mspy = MSpy()
1620 db = setup_database()
1621 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1623 enquire = xapian.Enquire(db)
1624 enquire.add_matchspy(mspy)
1625 enquire.set_query(query)
1626 mset = enquire.get_mset(0, 1)
1627 expect(len(mset), 1)
1628 expect(mspy.count >= 1, True)
1630 expect(db.get_doccount(), 5)
1632 def test_removed_features():
1633 ok = True
1634 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1635 doc = xapian.Document()
1636 enq = xapian.Enquire(db)
1637 eset = xapian.ESet()
1638 mset = xapian.MSet()
1639 query = xapian.Query()
1640 qp = xapian.QueryParser()
1641 titer = xapian._TermIterator()
1642 postiter = xapian._PostingIterator()
1644 def check_missing(obj, attr):
1645 expect_exception(AttributeError, None, getattr, obj, attr)
1647 check_missing(xapian, 'Stem_get_available_languages')
1648 check_missing(xapian, 'TermIterator')
1649 check_missing(xapian, 'PositionIterator')
1650 check_missing(xapian, 'PostingIterator')
1651 check_missing(xapian, 'ValueIterator')
1652 check_missing(xapian, 'MSetIterator')
1653 check_missing(xapian, 'ESetIterator')
1654 check_missing(db, 'allterms_begin')
1655 check_missing(db, 'allterms_end')
1656 check_missing(db, 'metadata_keys_begin')
1657 check_missing(db, 'metadata_keys_end')
1658 check_missing(db, 'synonym_keys_begin')
1659 check_missing(db, 'synonym_keys_end')
1660 check_missing(db, 'synonyms_begin')
1661 check_missing(db, 'synonyms_end')
1662 check_missing(db, 'spellings_begin')
1663 check_missing(db, 'spellings_end')
1664 check_missing(db, 'positionlist_begin')
1665 check_missing(db, 'positionlist_end')
1666 check_missing(db, 'postlist_begin')
1667 check_missing(db, 'postlist_end')
1668 check_missing(db, 'termlist_begin')
1669 check_missing(db, 'termlist_end')
1670 check_missing(doc, 'termlist_begin')
1671 check_missing(doc, 'termlist_end')
1672 check_missing(doc, 'values_begin')
1673 check_missing(doc, 'values_end')
1674 check_missing(enq, 'get_matching_terms_begin')
1675 check_missing(enq, 'get_matching_terms_end')
1676 check_missing(eset, 'begin')
1677 check_missing(eset, 'end')
1678 check_missing(mset, 'begin')
1679 check_missing(mset, 'end')
1680 check_missing(postiter, 'positionlist_begin')
1681 check_missing(postiter, 'positionlist_end')
1682 check_missing(query, 'get_terms_begin')
1683 check_missing(query, 'get_terms_end')
1684 check_missing(qp, 'stoplist_begin')
1685 check_missing(qp, 'stoplist_end')
1686 check_missing(qp, 'unstem_begin')
1687 check_missing(qp, 'unstem_end')
1688 check_missing(titer, 'positionlist_begin')
1689 check_missing(titer, 'positionlist_end')
1691 def test_repr():
1692 # repr() returned None in 1.4.0.
1693 expect(repr(xapian.Query('foo')) is None, False)
1694 expect(repr(xapian.AssertionError('foo')) is None, False)
1695 expect(repr(xapian.InvalidArgumentError('foo')) is None, False)
1696 expect(repr(xapian.InvalidOperationError('foo')) is None, False)
1697 expect(repr(xapian.UnimplementedError('foo')) is None, False)
1698 expect(repr(xapian.DatabaseError('foo')) is None, False)
1699 expect(repr(xapian.DatabaseCorruptError('foo')) is None, False)
1700 expect(repr(xapian.DatabaseCreateError('foo')) is None, False)
1701 expect(repr(xapian.DatabaseLockError('foo')) is None, False)
1702 expect(repr(xapian.DatabaseModifiedError('foo')) is None, False)
1703 expect(repr(xapian.DatabaseOpeningError('foo')) is None, False)
1704 expect(repr(xapian.DatabaseVersionError('foo')) is None, False)
1705 expect(repr(xapian.DocNotFoundError('foo')) is None, False)
1706 expect(repr(xapian.FeatureUnavailableError('foo')) is None, False)
1707 expect(repr(xapian.InternalError('foo')) is None, False)
1708 expect(repr(xapian.NetworkError('foo')) is None, False)
1709 expect(repr(xapian.NetworkTimeoutError('foo')) is None, False)
1710 expect(repr(xapian.QueryParserError('foo')) is None, False)
1711 expect(repr(xapian.SerialisationError('foo')) is None, False)
1712 expect(repr(xapian.RangeError('foo')) is None, False)
1713 expect(repr(xapian.WildcardError('foo')) is None, False)
1714 expect(repr(xapian.Document()) is None, False)
1715 expect(repr(xapian.Registry()) is None, False)
1716 expect(repr(xapian.Query()) is None, False)
1717 expect(repr(xapian.Stem('en')) is None, False)
1718 expect(repr(xapian.TermGenerator()) is None, False)
1719 expect(repr(xapian.MSet()) is None, False)
1720 expect(repr(xapian.ESet()) is None, False)
1721 expect(repr(xapian.RSet()) is None, False)
1722 expect(repr(xapian.MultiValueKeyMaker()) is None, False)
1723 expect(repr(xapian.SimpleStopper()) is None, False)
1724 expect(repr(xapian.RangeProcessor()) is None, False)
1725 expect(repr(xapian.DateRangeProcessor(1)) is None, False)
1726 expect(repr(xapian.NumberRangeProcessor(1)) is None, False)
1727 expect(repr(xapian.StringValueRangeProcessor(1)) is None, False)
1728 expect(repr(xapian.DateValueRangeProcessor(1)) is None, False)
1729 expect(repr(xapian.NumberValueRangeProcessor(1)) is None, False)
1730 expect(repr(xapian.QueryParser()) is None, False)
1731 expect(repr(xapian.BoolWeight()) is None, False)
1732 expect(repr(xapian.TfIdfWeight()) is None, False)
1733 expect(repr(xapian.BM25Weight()) is None, False)
1734 expect(repr(xapian.BM25PlusWeight()) is None, False)
1735 expect(repr(xapian.TradWeight()) is None, False)
1736 expect(repr(xapian.InL2Weight()) is None, False)
1737 expect(repr(xapian.IfB2Weight()) is None, False)
1738 expect(repr(xapian.IneB2Weight()) is None, False)
1739 expect(repr(xapian.BB2Weight()) is None, False)
1740 expect(repr(xapian.DLHWeight()) is None, False)
1741 expect(repr(xapian.PL2Weight()) is None, False)
1742 expect(repr(xapian.PL2PlusWeight()) is None, False)
1743 expect(repr(xapian.DPHWeight()) is None, False)
1744 expect(repr(xapian.LMWeight()) is None, False)
1745 expect(repr(xapian.CoordWeight()) is None, False)
1746 expect(repr(xapian.Compactor()) is None, False)
1747 expect(repr(xapian.ValuePostingSource(1)) is None, False)
1748 expect(repr(xapian.ValueWeightPostingSource(1)) is None, False)
1749 expect(repr(xapian.DecreasingValueWeightPostingSource(1)) is None, False)
1750 expect(repr(xapian.ValueMapPostingSource(1)) is None, False)
1751 expect(repr(xapian.FixedWeightPostingSource(1)) is None, False)
1752 expect(repr(xapian.ValueCountMatchSpy(1)) is None, False)
1753 expect(repr(xapian.LatLongCoord()) is None, False)
1754 expect(repr(xapian.LatLongCoords()) is None, False)
1755 expect(repr(xapian.GreatCircleMetric()) is None, False)
1756 expect(repr(xapian.Database()) is None, False)
1757 expect(repr(xapian.WritableDatabase()) is None, False)
1759 result = True
1761 # Run all tests (ie, callables with names starting "test_").
1762 def run():
1763 global result
1764 if not runtests(globals(), sys.argv[1:]):
1765 result = False
1767 print("Running tests without threads")
1768 run()
1770 if have_threads:
1771 print("Running tests with threads")
1773 # This testcase seems to just block when run in a thread under Python 3
1774 # on some plaforms. It fails with 3.2.3 on Debian wheezy, but passes
1775 # with the exact same package version on Debian unstable not long after
1776 # the jessie release. The issue it's actually serving to regression
1777 # test for is covered by running it without threads, so just disable it
1778 # rather than risk test failures that don't seem to indicate a problem
1779 # in Xapian.
1780 del test_import_star
1782 t = threading.Thread(name='test runner', target=run)
1783 t.start()
1784 # Block until the thread has completed so the thread gets a chance to exit
1785 # with error status.
1786 t.join()
1788 if not result:
1789 sys.exit(1)
1791 # vim:syntax=python:set expandtab: