Stop forward declaring global QueryOptimiser in API header
[xapian.git] / xapian-bindings / python / pythontest.py
blobc421e6f219c8d0a2ac97b4ffb9031bf4e2a54d82
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
74 return db
76 def test_exception_base():
77 """Check that xapian exceptions have Exception as a base class.
79 """
80 try:
81 raise xapian.InvalidOperationError("Test exception")
82 except Exception, e:
83 pass
85 def test_mset_iter():
86 """Test iterators over MSets.
88 """
89 db = setup_database()
90 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
92 enquire = xapian.Enquire(db)
93 enquire.set_query(query)
94 mset = enquire.get_mset(0, 10)
95 items = [item for item in mset]
96 expect(len(items), 5)
97 expect(len(mset), len(items), "Expected number of items to be length of mset")
99 context("testing returned item from mset")
100 expect(items[2].docid, 4)
101 expect(items[2].rank, 2)
102 expect(items[2].percent, 86)
103 expect(items[2].collapse_key, '')
104 expect(items[2].collapse_count, 0)
105 expect(items[2].document.get_data(), 'was it warm? three')
107 # Check iterators for sub-msets against the whole mset.
108 for start in range(0, 6):
109 for maxitems in range(0, 6):
110 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
111 submset = enquire.get_mset(start, maxitems)
112 num = 0
113 for item in submset:
114 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
115 expect(item.rank, num + start)
117 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
118 hit = submset.get_hit(num)
119 expect(hit.docid, item.docid)
120 expect(hit.rank, item.rank)
121 expect(hit.percent, item.percent)
122 expect(hit.document.get_data(), item.document.get_data())
123 expect(hit.collapse_key, item.collapse_key)
124 expect(hit.collapse_count, item.collapse_count)
126 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
127 hit = mset.get_hit(num + start)
128 expect(hit.docid, item.docid)
129 expect(hit.rank, item.rank)
130 expect(hit.percent, item.percent)
131 expect(hit.document.get_data(), item.document.get_data())
132 expect(hit.collapse_key, item.collapse_key)
133 expect(hit.collapse_count, item.collapse_count)
135 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
136 expect(submset[num].docid, item.docid)
137 expect(submset[num].rank, item.rank)
138 expect(submset[num].percent, item.percent)
139 expect(submset[num].document.get_data(), item.document.get_data())
140 expect(submset[num].collapse_key, item.collapse_key)
141 expect(submset[num].collapse_count, item.collapse_count)
143 num += 1
145 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
146 # Test out-of-range access to mset:
147 expect_exception(IndexError, 'Mset index out of range',
148 submset.__getitem__, -10)
149 expect_exception(IndexError, 'Mset index out of range',
150 submset.__getitem__, 10)
151 expect_exception(IndexError, 'Mset index out of range',
152 submset.__getitem__, -1-len(submset))
153 expect_exception(IndexError, 'Mset index out of range',
154 submset.__getitem__, len(submset))
156 # Check that the item contents remain valid when the iterator has
157 # moved on.
158 saved_items = [item for item in submset]
159 for num in range(len(saved_items)):
160 item = saved_items[num]
161 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
162 expect(submset[num].docid, item.docid)
163 expect(submset[num].rank, item.rank)
164 expect(submset[num].percent, item.percent)
165 expect(submset[num].document.get_data(), item.document.get_data())
166 expect(submset[num].collapse_key, item.collapse_key)
167 expect(submset[num].collapse_count, item.collapse_count)
169 # Check that the right number of items exist in the mset.
170 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
171 items = [item for item in submset]
172 expect(len(items), min(maxitems, 5 - start))
173 expect(len(submset), min(maxitems, 5 - start))
175 def test_eset_iter():
176 """Test iterators over ESets.
179 db = setup_database()
180 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
181 rset = xapian.RSet()
182 rset.add_document(3)
184 context("getting eset items without a query")
185 enquire = xapian.Enquire(db)
186 eset = enquire.get_eset(10, rset)
187 items = [item for item in eset]
188 expect(len(items), 3)
189 expect(len(items), len(eset))
191 context("getting eset items with a query")
192 enquire = xapian.Enquire(db)
193 enquire.set_query(query)
194 eset = enquire.get_eset(10, rset)
195 items2 = [item for item in eset]
196 expect(len(items2), 2)
197 expect(len(items2), len(eset))
199 context("comparing eset items with a query to those without")
200 expect(items2[0].term, items[0].term)
201 expect(items2[1].term, items[2].term)
203 context("comparing eset weights with a query to those without")
204 expect(items2[0].weight, items[0].weight)
205 expect(items2[1].weight, items[2].weight)
207 def test_matchingterms_iter():
208 """Test Enquire.matching_terms iterator.
211 db = setup_database()
212 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
214 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
216 enquire = xapian.Enquire(db)
217 enquire.set_query(query)
218 mset = enquire.get_mset(0, 10)
220 for item in mset:
221 # Make a list of the term names
222 mterms = [term for term in enquire.matching_terms(item.docid)]
223 mterms2 = [term for term in enquire.matching_terms(item)]
224 expect(mterms, mterms2)
226 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
227 expect(mterms, ['it', 'two', 'warm', 'was'])
229 def test_queryterms_iter():
230 """Test Query term iterator.
233 db = setup_database()
234 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
236 # Make a list of the term names
237 terms = [term for term in query]
238 expect(terms, ['it', 'two', 'warm', 'was'])
240 def test_queryparser_stoplist_iter():
241 """Test QueryParser stoplist iterator.
244 stemmer = xapian.Stem('en')
246 # Check behaviour without having set a stoplist.
247 queryparser = xapian.QueryParser()
248 queryparser.set_stemmer(stemmer)
249 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
250 expect([term for term in queryparser.stoplist()], [])
251 query = queryparser.parse_query('to be or not to be is the questions')
252 expect([term for term in queryparser.stoplist()], [])
253 expect(str(query),
254 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
255 'Zis@7 OR Zthe@8 OR Zquestion@9))')
257 # Check behaviour with a stoplist, but no stemmer
258 queryparser = xapian.QueryParser()
259 stopper = xapian.SimpleStopper()
260 stopper.add('to')
261 stopper.add('not')
262 stopper.add('question')
263 queryparser.set_stopper(stopper)
264 expect([term for term in queryparser.stoplist()], [])
265 query = queryparser.parse_query('to be or not to be is the questions')
267 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
268 expect(str(query),
269 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
271 # Check behaviour with a stoplist and a stemmer
272 queryparser.set_stemmer(stemmer)
273 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
274 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to']) # Shouldn't have changed since previous query.
275 query = queryparser.parse_query('to be or not to be is the questions')
277 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
278 expect(str(query),
279 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
281 def test_queryparser_unstem_iter():
282 """Test QueryParser unstemlist iterator.
285 stemmer = xapian.Stem('en')
287 queryparser = xapian.QueryParser()
288 expect([term for term in queryparser.unstemlist('to')], [])
289 expect([term for term in queryparser.unstemlist('question')], [])
290 expect([term for term in queryparser.unstemlist('questions')], [])
291 query = queryparser.parse_query('to question questions')
293 expect([term for term in queryparser.unstemlist('to')], ['to'])
294 expect([term for term in queryparser.unstemlist('question')], ['question'])
295 expect([term for term in queryparser.unstemlist('questions')], ['questions'])
296 expect(str(query),
297 'Query((to@1 OR question@2 OR questions@3))')
300 queryparser = xapian.QueryParser()
301 queryparser.set_stemmer(stemmer)
302 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
303 expect([term for term in queryparser.unstemlist('Zto')], [])
304 expect([term for term in queryparser.unstemlist('Zquestion')], [])
305 expect([term for term in queryparser.unstemlist('Zquestions')], [])
306 query = queryparser.parse_query('to question questions')
308 expect([term for term in queryparser.unstemlist('Zto')], ['to'])
309 expect([term for term in queryparser.unstemlist('Zquestion')], ['question', 'questions'])
310 expect([term for term in queryparser.unstemlist('Zquestions')], [])
311 expect(str(query),
312 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
314 def test_allterms_iter():
315 """Test all-terms iterator on Database.
318 db = setup_database()
320 context("making a list of the term names and frequencies")
321 terms = []
322 freqs = []
323 for termitem in db:
324 terms.append(termitem.term)
325 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
326 freqs.append(termitem.termfreq)
327 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
329 context("checking that items are no longer valid once the iterator has moved on")
330 termitems = [termitem for termitem in db]
332 expect(len(termitems), len(terms))
333 for i in range(len(termitems)):
334 expect(termitems[i].term, terms[i])
336 expect(len(termitems), len(freqs))
337 for termitem in termitems:
338 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
340 context("checking that restricting the terms iterated with a prefix works")
341 prefix_terms = []
342 prefix_freqs = []
343 for i in range(len(terms)):
344 if terms[i][0] == 't':
345 prefix_terms.append(terms[i])
346 prefix_freqs.append(freqs[i])
347 i = 0
348 for termitem in db.allterms('t'):
349 expect(termitem.term, prefix_terms[i])
350 expect(termitem.termfreq, prefix_freqs[i])
351 i += 1
352 expect(len(prefix_terms), i)
354 def test_termlist_iter():
355 """Test termlist iterator on Database.
358 db = setup_database()
360 # Make lists of the item contents
361 terms = []
362 wdfs = []
363 freqs = []
364 positers = []
365 for termitem in db.termlist(3):
366 terms.append(termitem.term)
367 wdfs.append(termitem.wdf)
368 freqs.append(termitem.termfreq)
369 positers.append([pos for pos in termitem.positer])
371 expect(terms, ['it', 'two', 'warm', 'was'])
372 expect(wdfs, [1, 2, 1, 1])
373 expect(freqs, [5, 3, 4, 4])
374 expect(positers, [[2], [], [3], [1]])
376 # Test skip_to().
377 tliter = db.termlist(3)
379 # skip to an item before the first item.
380 termitem = tliter.skip_to('a')
381 expect((termitem.term, termitem.wdf, termitem.termfreq,
382 [pos for pos in termitem.positer]), ('it', 1, 5, [2]))
384 # skip forwards to an item.
385 termitem = tliter.skip_to('two')
386 expect((termitem.term, termitem.wdf, termitem.termfreq,
387 [pos for pos in termitem.positer]), ('two', 2, 3, []))
389 # skip to same place (should return same item)
390 termitem = tliter.skip_to('two')
391 expect((termitem.term, termitem.wdf, termitem.termfreq,
392 [pos for pos in termitem.positer]), ('two', 2, 3, []))
394 # next() after a skip_to(), should return next item.
395 termitem = next(tliter)
396 expect((termitem.term, termitem.wdf, termitem.termfreq,
397 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
399 # skip to same place (should return same item)
400 termitem = tliter.skip_to('warm')
401 expect((termitem.term, termitem.wdf, termitem.termfreq,
402 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
404 # skip backwards (should return same item)
405 termitem = tliter.skip_to('a')
407 # skip to after end.
408 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
409 # skip backwards (should still return StopIteration).
410 expect_exception(StopIteration, '', tliter.skip_to, 'a')
411 # next should continue to return StopIteration.
412 expect_exception(StopIteration, '', next, tliter)
415 # Make a list of the terms (so we can test if they're still valid
416 # once the iterator has moved on).
417 termitems = [termitem for termitem in db.termlist(3)]
419 expect(len(termitems), len(terms))
420 for i in range(len(termitems)):
421 expect(termitems[i].term, terms[i])
423 expect(len(termitems), len(wdfs))
424 for i in range(len(termitems)):
425 expect(termitems[i].wdf, wdfs[i])
427 expect(len(termitems), len(freqs))
428 for termitem in termitems:
429 expect_exception(xapian.InvalidOperationError,
430 'Iterator has moved, and does not support random access',
431 getattr, termitem, 'termfreq')
433 expect(len(termitems), len(freqs))
434 for termitem in termitems:
435 expect_exception(xapian.InvalidOperationError,
436 'Iterator has moved, and does not support random access',
437 getattr, termitem, 'positer')
439 def test_dbdocument_iter():
440 """Test document terms iterator for document taken from a database.
443 db = setup_database()
445 doc = db.get_document(3)
447 # Make lists of the item contents
448 terms = []
449 wdfs = []
450 freqs = []
451 positers = []
452 for termitem in doc:
453 terms.append(termitem.term)
454 wdfs.append(termitem.wdf)
455 freqs.append(termitem.termfreq)
456 positers.append([pos for pos in termitem.positer])
458 expect(terms, ['it', 'two', 'warm', 'was'])
459 expect(wdfs, [1, 2, 1, 1])
460 expect(freqs, [5, 3, 4, 4])
461 expect(positers, [[2], [], [3], [1]])
463 # Make a list of the terms (so we can test if they're still valid
464 # once the iterator has moved on).
465 termitems = [termitem for termitem in doc]
467 expect(len(termitems), len(terms))
468 for i in range(len(termitems)):
469 expect(termitems[i].term, terms[i])
471 expect(len(termitems), len(wdfs))
472 for i in range(len(termitems)):
473 expect(termitems[i].wdf, wdfs[i])
475 expect(len(termitems), len(freqs))
476 for termitem in termitems:
477 expect_exception(xapian.InvalidOperationError,
478 'Iterator has moved, and does not support random access',
479 getattr, termitem, 'termfreq')
481 expect(len(termitems), len(freqs))
482 for termitem in termitems:
483 expect_exception(xapian.InvalidOperationError,
484 'Iterator has moved, and does not support random access',
485 getattr, termitem, 'positer')
487 def test_newdocument_iter():
488 """Test document terms iterator for newly created document.
491 doc = xapian.Document()
492 doc.set_data("was it warm? two")
493 doc.add_posting("was", 1)
494 doc.add_posting("it", 2)
495 doc.add_posting("warm", 3)
496 doc.add_term("two", 2)
498 # Make lists of the item contents
499 terms = []
500 wdfs = []
501 positers = []
502 for termitem in doc:
503 terms.append(termitem.term)
504 wdfs.append(termitem.wdf)
505 expect_exception(xapian.InvalidOperationError,
506 "get_termfreq() not valid for a TermIterator from a "
507 "Document which is not associated with a database",
508 getattr, termitem, 'termfreq')
509 positers.append([pos for pos in termitem.positer])
511 expect(terms, ['it', 'two', 'warm', 'was'])
512 expect(wdfs, [1, 2, 1, 1])
513 expect(positers, [[2], [], [3], [1]])
515 # Make a list of the terms (so we can test if they're still valid
516 # once the iterator has moved on).
517 termitems = [termitem for termitem in doc]
519 expect(len(termitems), len(terms))
520 for i in range(len(termitems)):
521 expect(termitems[i].term, terms[i])
523 expect(len(termitems), len(wdfs))
524 for i in range(len(termitems)):
525 expect(termitems[i].wdf, wdfs[i])
527 for termitem in termitems:
528 expect_exception(xapian.InvalidOperationError,
529 'Iterator has moved, and does not support random access',
530 getattr, termitem, 'termfreq')
532 expect(len(termitems), len(positers))
533 for termitem in termitems:
534 expect_exception(xapian.InvalidOperationError,
535 'Iterator has moved, and does not support random access',
536 getattr, termitem, 'positer')
538 def test_postinglist_iter():
539 """Test postinglist iterator on Database.
542 db = setup_database()
544 # Make lists of the item contents
545 docids = []
546 doclengths = []
547 wdfs = []
548 positers = []
549 for posting in db.postlist('it'):
550 docids.append(posting.docid)
551 doclengths.append(posting.doclength)
552 wdfs.append(posting.wdf)
553 positers.append([pos for pos in posting.positer])
555 expect(docids, [1, 2, 3, 4, 5])
556 expect(doclengths, [3, 3, 5, 8, 19])
557 expect(wdfs, [1, 1, 1, 1, 8])
558 expect(positers, [[1], [2], [2], [2], [2, 7]])
560 # Test skip_to().
561 pliter = db.postlist('it')
563 # skip to an item before the first item.
564 posting = pliter.skip_to(0)
565 expect((posting.docid, posting.doclength, posting.wdf,
566 [pos for pos in posting.positer]), (1, 3, 1, [1]))
568 # skip forwards to an item.
569 posting = pliter.skip_to(3)
570 expect((posting.docid, posting.doclength, posting.wdf,
571 [pos for pos in posting.positer]), (3, 5, 1, [2]))
573 # skip to same place (should return same item)
574 posting = pliter.skip_to(3)
575 expect((posting.docid, posting.doclength, posting.wdf,
576 [pos for pos in posting.positer]), (3, 5, 1, [2]))
578 # next() after a skip_to(), should return next item.
579 posting = next(pliter)
580 expect((posting.docid, posting.doclength, posting.wdf,
581 [pos for pos in posting.positer]), (4, 8, 1, [2]))
583 # skip to same place (should return same item)
584 posting = pliter.skip_to(4)
585 expect((posting.docid, posting.doclength, posting.wdf,
586 [pos for pos in posting.positer]), (4, 8, 1, [2]))
588 # skip backwards (should return same item)
589 posting = pliter.skip_to(2)
590 expect((posting.docid, posting.doclength, posting.wdf,
591 [pos for pos in posting.positer]), (4, 8, 1, [2]))
593 # skip to after end.
594 expect_exception(StopIteration, '', pliter.skip_to, 6)
595 # skip backwards (should still return StopIteration).
596 expect_exception(StopIteration, '', pliter.skip_to, 6)
597 # next should continue to return StopIteration.
598 expect_exception(StopIteration, '', next, pliter)
601 # Make a list of the postings (so we can test if they're still valid once
602 # the iterator has moved on).
603 postings = [posting for posting in db.postlist('it')]
605 expect(len(postings), len(docids))
606 for i in range(len(postings)):
607 expect(postings[i].docid, docids[i])
609 expect(len(postings), len(doclengths))
610 for i in range(len(postings)):
611 expect(postings[i].doclength, doclengths[i])
613 expect(len(postings), len(wdfs))
614 for i in range(len(postings)):
615 expect(postings[i].wdf, wdfs[i])
617 expect(len(postings), len(positers))
618 for posting in postings:
619 expect_exception(xapian.InvalidOperationError,
620 'Iterator has moved, and does not support random access',
621 getattr, posting, 'positer')
623 def test_valuestream_iter():
624 """Test a valuestream iterator on Database.
627 db = setup_database()
629 # Check basic iteration
630 expect([(item.docid, item.value) for item in db.valuestream(0)],
631 [(3, '\xa4'), (4, '\xa2'), (5, '\xa4')])
632 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
633 expect([(item.docid, item.value) for item in db.valuestream(5)],
634 [(5, "five")])
635 expect([(item.docid, item.value) for item in db.valuestream(9)],
636 [(5, "nine")])
638 # Test skip_to() on iterator with no values, and behaviours when called
639 # after already returning StopIteration.
640 i = db.valuestream(1)
641 expect_exception(StopIteration, "", i.skip_to, 1)
642 expect_exception(StopIteration, "", i.skip_to, 1)
643 i = db.valuestream(1)
644 expect_exception(StopIteration, "", i.skip_to, 1)
645 expect_exception(StopIteration, "", i.next)
646 i = db.valuestream(1)
647 expect_exception(StopIteration, "", i.next)
648 expect_exception(StopIteration, "", i.skip_to, 1)
650 # Test that skipping to a value works, and that skipping doesn't have to
651 # advance.
652 i = db.valuestream(0)
653 item = i.skip_to(4)
654 expect((item.docid, item.value), (4, '\xa2'))
655 item = i.skip_to(4)
656 expect((item.docid, item.value), (4, '\xa2'))
657 item = i.skip_to(1)
658 expect((item.docid, item.value), (4, '\xa2'))
659 item = i.skip_to(5)
660 expect((item.docid, item.value), (5, '\xa4'))
661 expect_exception(StopIteration, "", i.skip_to, 6)
663 # Test that alternating skip_to() and next() works.
664 i = db.valuestream(0)
665 item = i.next()
666 expect((item.docid, item.value), (3, '\xa4'))
667 item = i.skip_to(4)
668 expect((item.docid, item.value), (4, '\xa2'))
669 item = i.next()
670 expect((item.docid, item.value), (5, '\xa4'))
671 expect_exception(StopIteration, "", i.skip_to, 6)
673 # Test that next works correctly after skip_to() called with an earlier
674 # item.
675 i = db.valuestream(0)
676 item = i.skip_to(4)
677 expect((item.docid, item.value), (4, '\xa2'))
678 item = i.skip_to(1)
679 expect((item.docid, item.value), (4, '\xa2'))
680 item = i.next()
681 expect((item.docid, item.value), (5, '\xa4'))
683 # Test that next works correctly after skipping to last item
684 i = db.valuestream(0)
685 item = i.skip_to(5)
686 expect((item.docid, item.value), (5, '\xa4'))
687 expect_exception(StopIteration, "", i.next)
689 def test_position_iter():
690 """Test position iterator for a document in a database.
693 db = setup_database()
695 doc = db.get_document(5)
697 # Make lists of the item contents
698 positions = [position for position in db.positionlist(5, 'it')]
700 expect(positions, [2, 7])
702 def test_value_iter():
703 """Test iterators over list of values in a document.
706 db = setup_database()
707 doc = db.get_document(5)
709 items = list(doc.values())
710 expect(len(items), 3)
711 expect(items[0].num, 0)
712 expect(items[0].value, xapian.sortable_serialise(2))
713 expect(items[1].num, 5)
714 expect(items[1].value, 'five')
715 expect(items[2].num, 9)
716 expect(items[2].value, 'nine')
718 def test_synonyms_iter():
719 """Test iterators over list of synonyms in a database.
722 dbpath = 'db_test_synonyms_iter'
723 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
725 db.add_synonym('hello', 'hi')
726 db.add_synonym('hello', 'howdy')
728 expect([item for item in db.synonyms('foo')], [])
729 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
730 expect([item for item in db.synonym_keys()], ['hello'])
731 expect([item for item in db.synonym_keys('foo')], [])
732 expect([item for item in db.synonym_keys('he')], ['hello'])
733 expect([item for item in db.synonym_keys('hello')], ['hello'])
735 dbr=xapian.Database(dbpath)
736 expect([item for item in dbr.synonyms('foo')], [])
737 expect([item for item in dbr.synonyms('hello')], [])
738 expect([item for item in dbr.synonym_keys()], [])
739 expect([item for item in dbr.synonym_keys('foo')], [])
740 expect([item for item in dbr.synonym_keys('he')], [])
741 expect([item for item in dbr.synonym_keys('hello')], [])
743 db.commit()
745 expect([item for item in db.synonyms('foo')], [])
746 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
747 expect([item for item in db.synonym_keys()], ['hello'])
748 expect([item for item in db.synonym_keys('foo')], [])
749 expect([item for item in db.synonym_keys('he')], ['hello'])
750 expect([item for item in db.synonym_keys('hello')], ['hello'])
752 dbr=xapian.Database(dbpath)
753 expect([item for item in dbr.synonyms('foo')] , [])
754 expect([item for item in dbr.synonyms('hello')], ['hi', 'howdy'])
755 expect([item for item in dbr.synonym_keys()], ['hello'])
756 expect([item for item in dbr.synonym_keys('foo')], [])
757 expect([item for item in dbr.synonym_keys('he')], ['hello'])
758 expect([item for item in dbr.synonym_keys('hello')], ['hello'])
760 db.close()
761 expect(xapian.Database.check(dbpath), 0)
762 dbr.close()
763 shutil.rmtree(dbpath)
765 def test_metadata_keys_iter():
766 """Test iterators over list of metadata keys in a database.
769 dbpath = 'db_test_metadata_iter'
770 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
772 db.set_metadata('author', 'richard')
773 db.set_metadata('item1', 'hello')
774 db.set_metadata('item1', 'hi')
775 db.set_metadata('item2', 'howdy')
776 db.set_metadata('item3', '')
777 db.set_metadata('item4', 'goodbye')
778 db.set_metadata('item4', '')
779 db.set_metadata('type', 'greeting')
781 expect([item for item in db.metadata_keys()],
782 ['author', 'item1', 'item2', 'type'])
783 expect([item for item in db.metadata_keys('foo')], [])
784 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
785 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
786 expect([item for item in db.metadata_keys('type')], ['type'])
788 dbr=xapian.Database(dbpath)
789 expect([item for item in dbr.metadata_keys()], [])
790 expect([item for item in dbr.metadata_keys('foo')], [])
791 expect([item for item in dbr.metadata_keys('item')], [])
792 expect([item for item in dbr.metadata_keys('it')], [])
793 expect([item for item in dbr.metadata_keys('type')], [])
795 db.commit()
796 expect([item for item in db.metadata_keys()],
797 ['author', 'item1', 'item2', 'type'])
798 expect([item for item in db.metadata_keys('foo')], [])
799 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
800 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
801 expect([item for item in db.metadata_keys('type')], ['type'])
803 dbr=xapian.Database(dbpath)
804 expect([item for item in dbr.metadata_keys()],
805 ['author', 'item1', 'item2', 'type'])
806 expect([item for item in dbr.metadata_keys('foo')], [])
807 expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2'])
808 expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2'])
809 expect([item for item in dbr.metadata_keys('type')], ['type'])
811 db.close()
812 expect(xapian.Database.check(dbpath), 0)
813 dbr.close()
814 shutil.rmtree(dbpath)
816 def test_spell():
817 """Test basic spelling correction features.
820 dbpath = 'db_test_spell'
821 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
823 db.add_spelling('hello')
824 db.add_spelling('mell', 2)
825 expect(db.get_spelling_suggestion('hell'), 'mell')
826 expect([(item.term, item.termfreq) for item in db.spellings()], [('hello', 1), ('mell', 2)])
827 dbr=xapian.Database(dbpath)
828 expect(dbr.get_spelling_suggestion('hell'), '')
829 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
830 db.commit()
831 dbr=xapian.Database(dbpath)
832 expect(db.get_spelling_suggestion('hell'), 'mell')
833 expect(dbr.get_spelling_suggestion('hell'), 'mell')
834 expect([(item.term, item.termfreq) for item in dbr.spellings()], [('hello', 1), ('mell', 2)])
836 db.close()
837 dbr.close()
838 shutil.rmtree(dbpath)
840 def test_queryparser_custom_vrp():
841 """Test QueryParser with a custom (in python) ValueRangeProcessor.
844 class MyVRP(xapian.ValueRangeProcessor):
845 def __init__(self):
846 xapian.ValueRangeProcessor.__init__(self)
848 def __call__(self, begin, end):
849 return (7, "A"+begin, "B"+end)
851 queryparser = xapian.QueryParser()
852 myvrp = MyVRP()
854 queryparser.add_valuerangeprocessor(myvrp)
855 query = queryparser.parse_query('5..8')
857 expect(str(query),
858 'Query(VALUE_RANGE 7 A5 B8)')
860 def test_queryparser_custom_vrp_deallocation():
861 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
864 class MyVRP(xapian.ValueRangeProcessor):
865 def __init__(self):
866 xapian.ValueRangeProcessor.__init__(self)
868 def __call__(self, begin, end):
869 return (7, "A"+begin, "B"+end)
871 def make_parser():
872 queryparser = xapian.QueryParser()
873 myvrp = MyVRP()
874 queryparser.add_valuerangeprocessor(myvrp)
875 return queryparser
877 queryparser = make_parser()
878 query = queryparser.parse_query('5..8')
880 expect(str(query),
881 'Query(VALUE_RANGE 7 A5 B8)')
883 def test_queryparser_custom_rp():
884 """Test QueryParser with a custom (in python) RangeProcessor.
887 class MyRP(xapian.RangeProcessor):
888 def __init__(self):
889 xapian.RangeProcessor.__init__(self)
891 def __call__(self, begin, end):
892 begin = "A" + begin
893 end = "B" + end
894 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
896 queryparser = xapian.QueryParser()
897 myrp = MyRP()
899 queryparser.add_rangeprocessor(myrp)
900 query = queryparser.parse_query('5..8')
902 expect(str(query),
903 'Query(VALUE_RANGE 7 A5 B8)')
905 def test_queryparser_custom_rp_deallocation():
906 """Test that QueryParser doesn't delete RangeProcessors too soon.
909 class MyRP(xapian.RangeProcessor):
910 def __init__(self):
911 xapian.RangeProcessor.__init__(self)
913 def __call__(self, begin, end):
914 begin = "A" + begin
915 end = "B" + end
916 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
918 def make_parser():
919 queryparser = xapian.QueryParser()
920 myrp = MyRP()
921 queryparser.add_rangeprocessor(myrp)
922 return queryparser
924 queryparser = make_parser()
925 query = queryparser.parse_query('5..8')
927 expect(str(query),
928 'Query(VALUE_RANGE 7 A5 B8)')
930 def test_scale_weight():
931 """Test query OP_SCALE_WEIGHT feature.
934 db = setup_database()
935 for mult in (0, 1, 2.5):
936 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
937 mult)
938 query1 = xapian.Query("it")
939 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
941 enquire = xapian.Enquire(db)
942 enquire.set_query(query1)
943 mset1 = enquire.get_mset(0, 10)
944 enquire.set_query(query2)
945 mset2 = enquire.get_mset(0, 10)
946 if mult <= 0:
947 expected = [(0, item.docid) for item in mset1]
948 expected.sort()
949 else:
950 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
951 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
953 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
954 query1 = xapian.Query("it")
955 expect_exception(xapian.InvalidArgumentError,
956 "OP_SCALE_WEIGHT requires factor >= 0",
957 xapian.Query,
958 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
961 def test_weight_normalise():
962 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
964 This test first runs a search (asking for no results) to get the maximum
965 possible weight for a query, and then checks that the results of
966 MSet.get_max_possible() match this.
968 This tests that the get_max_possible() value is correct (though it isn't
969 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
970 compensate correctly.
973 db = setup_database()
974 for query in (
975 "it",
976 "was",
977 "it was",
978 "it was four",
979 "it was four five",
980 "\"was it warm\" four notpresent",
981 "notpresent",
983 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
984 qp = xapian.QueryParser()
985 query1 = qp.parse_query(query)
986 enquire = xapian.Enquire(db)
987 enquire.set_query(query1)
988 mset1 = enquire.get_mset(0, 0)
990 # Check the max_attained value is 0 - this gives us some reassurance
991 # that the match didn't actually do the work of calculating any
992 # results.
993 expect(mset1.get_max_attained(), 0)
995 max_possible = mset1.get_max_possible()
996 if query == "notpresent":
997 expect(max_possible, 0)
998 continue
999 mult = 1.0 / max_possible
1000 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
1002 enquire = xapian.Enquire(db)
1003 enquire.set_query(query2)
1004 mset2 = enquire.get_mset(0, 10)
1005 # max_possible should be 1 (excluding rounding errors) for mset2
1006 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
1007 for item in mset2:
1008 expect(item.weight > 0, True)
1009 expect(item.weight <= 1, True)
1012 def test_valuesetmatchdecider():
1013 """Simple tests of the ValueSetMatchDecider class
1016 md = xapian.ValueSetMatchDecider(0, True)
1017 doc = xapian.Document()
1018 expect(md(doc), False)
1020 md.add_value('foo')
1021 doc.add_value(0, 'foo')
1022 expect(md(doc), True)
1024 md.remove_value('foo')
1025 expect(md(doc), False)
1027 md = xapian.ValueSetMatchDecider(0, False)
1028 expect(md(doc), True)
1030 md.add_value('foo')
1031 expect(md(doc), False)
1034 def test_postingsource():
1035 """Simple test of the PostingSource class.
1038 class OddPostingSource(xapian.PostingSource):
1039 def __init__(self, max):
1040 xapian.PostingSource.__init__(self)
1041 self.max = max
1043 def init(self, db):
1044 self.current = -1
1045 self.weight = db.get_doccount() + 1
1046 self.set_maxweight(self.weight)
1048 def get_termfreq_min(self): return 0
1049 def get_termfreq_est(self): return int(self.max / 2)
1050 def get_termfreq_max(self): return self.max
1051 def next(self, minweight):
1052 self.current += 2
1053 self.weight -= 1.0
1054 self.set_maxweight(self.weight)
1055 def at_end(self): return self.current > self.max
1056 def get_docid(self): return self.current
1057 def get_weight(self): return self.weight
1059 dbpath = 'db_test_postingsource'
1060 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1061 for id in range(10):
1062 doc = xapian.Document()
1063 db.add_document(doc)
1065 # Do a dance to check that the posting source doesn't get dereferenced too
1066 # soon in various cases.
1067 def mkenq(db):
1068 # First - check that it's kept when the source goes out of scope.
1069 def mkquery():
1070 source = OddPostingSource(10)
1071 # The posting source is inside a list to check that case is
1072 # correctly handled.
1073 return xapian.Query(xapian.Query.OP_OR,
1074 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1076 # Check that it's kept when the query goes out of scope.
1077 def submkenq():
1078 query = mkquery()
1079 enquire = xapian.Enquire(db)
1080 enquire.set_query(query)
1081 return enquire
1083 # Check it's kept when the query is retrieved from enquire and put into
1084 # a new enquire.
1085 def submkenq2():
1086 enq1 = submkenq()
1087 enquire = xapian.Enquire(db)
1088 enquire.set_query(enq1.get_query())
1089 return enquire
1091 return submkenq2()
1093 enquire = mkenq(db)
1094 mset = enquire.get_mset(0, 10)
1096 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1097 expect(mset[0].weight, db.get_doccount())
1099 db.close()
1100 expect(xapian.Database.check(dbpath), 0)
1101 shutil.rmtree(dbpath)
1103 def test_postingsource2():
1104 """Simple test of the PostingSource class.
1107 dbpath = 'db_test_postingsource2'
1108 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1109 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1110 for id in range(10):
1111 doc = xapian.Document()
1112 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1113 db.add_document(doc)
1115 source = xapian.ValueWeightPostingSource(1)
1116 query = xapian.Query(source)
1117 del source # Check that query keeps a reference to it.
1119 enquire = xapian.Enquire(db)
1120 enquire.set_query(query)
1121 mset = enquire.get_mset(0, 10)
1123 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1125 db.close()
1126 expect(xapian.Database.check(dbpath), 0)
1127 shutil.rmtree(dbpath)
1129 def test_postingsource3():
1130 """Test that ValuePostingSource can be usefully subclassed.
1133 dbpath = 'db_test_postingsource3'
1134 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1135 vals = (1, 3, 2, 4)
1136 for wt in vals:
1137 doc = xapian.Document()
1138 doc.add_value(1, xapian.sortable_serialise(wt))
1139 db.add_document(doc)
1141 class PyValuePostingSource(xapian.ValuePostingSource):
1142 def __init__(self, slot):
1143 xapian.ValuePostingSource.__init__(self, slot)
1145 def init(self, db):
1146 xapian.ValuePostingSource.init(self, db)
1147 self.current = -1
1148 slot = self.get_slot()
1149 ub = db.get_value_upper_bound(slot)
1150 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1152 def next(self, minweight):
1153 return xapian.ValuePostingSource.next(self, minweight)
1154 def get_weight(self):
1155 value = self.get_value()
1156 return xapian.sortable_unserialise(value) ** 3
1158 source = PyValuePostingSource(1)
1159 query = xapian.Query(source)
1160 #del source # Check that query keeps a reference to it.
1162 enquire = xapian.Enquire(db)
1163 enquire.set_query(query)
1164 mset = enquire.get_mset(0, 10)
1166 expect([item.docid for item in mset], [4, 2, 3, 1])
1168 db.close()
1169 expect(xapian.Database.check(dbpath), 0)
1170 shutil.rmtree(dbpath)
1172 def test_value_stats():
1173 """Simple test of being able to get value statistics.
1176 dbpath = 'db_test_value_stats'
1177 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1179 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1180 for id in range(10):
1181 doc = xapian.Document()
1182 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1183 db.add_document(doc)
1185 expect(db.get_value_freq(0), 0)
1186 expect(db.get_value_lower_bound(0), "")
1187 expect(db.get_value_upper_bound(0), "")
1188 expect(db.get_value_freq(1), 10)
1189 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1190 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1191 expect(db.get_value_freq(2), 0)
1192 expect(db.get_value_lower_bound(2), "")
1193 expect(db.get_value_upper_bound(2), "")
1195 db.close()
1196 expect(xapian.Database.check(dbpath), 0)
1197 shutil.rmtree(dbpath)
1199 def test_get_uuid():
1200 """Test getting UUIDs from databases.
1203 dbpath = 'db_test_get_uuid'
1204 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1205 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1206 dbr1 = xapian.Database(dbpath + "1")
1207 dbr2 = xapian.Database(dbpath + "2")
1208 expect(db1.get_uuid() != db2.get_uuid(), True)
1209 expect(db1.get_uuid(), dbr1.get_uuid())
1210 expect(db2.get_uuid(), dbr2.get_uuid())
1212 db = xapian.Database()
1213 db.add_database(db1)
1214 expect(db1.get_uuid(), db.get_uuid())
1216 db1.close()
1217 db2.close()
1218 dbr1.close()
1219 dbr2.close()
1220 db.close()
1221 shutil.rmtree(dbpath + "1")
1222 shutil.rmtree(dbpath + "2")
1224 def test_director_exception():
1225 """Test handling of an exception raised in a director.
1228 db = setup_database()
1229 query = xapian.Query('it')
1230 enq = xapian.Enquire(db)
1231 enq.set_query(query)
1232 class TestException(Exception):
1233 def __init__(self, a, b):
1234 Exception.__init__(self, a + b)
1236 rset = xapian.RSet()
1237 rset.add_document(1)
1238 class EDecider(xapian.ExpandDecider):
1239 def __call__(self, term):
1240 raise TestException("foo", "bar")
1241 edecider = EDecider()
1242 expect_exception(TestException, "foobar", edecider, "foo")
1243 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1245 class MDecider(xapian.MatchDecider):
1246 def __call__(self, doc):
1247 raise TestException("foo", "bar")
1248 mdecider = MDecider()
1249 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1250 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1252 def check_vals(db, vals):
1253 """Check that the values in slot 1 are as in vals.
1256 for docid in xrange(1, db.get_lastdocid() + 1):
1257 val = db.get_document(docid).get_value(1)
1258 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1260 def test_value_mods():
1261 """Test handling of modifications to values.
1264 dbpath = 'db_test_value_mods'
1265 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1266 random.seed(42)
1267 doccount = 1000
1268 vals = {}
1270 # Add a value to all the documents
1271 for num in xrange(1, doccount):
1272 doc=xapian.Document()
1273 val = 'val%d' % num
1274 doc.add_value(1, val)
1275 db.add_document(doc)
1276 vals[num] = val
1277 db.commit()
1278 check_vals(db, vals)
1280 # Modify one of the values (this is a regression test which failed with the
1281 # initial implementation of streaming values).
1282 doc = xapian.Document()
1283 val = 'newval0'
1284 doc.add_value(1, val)
1285 db.replace_document(2, doc)
1286 vals[2] = val
1287 db.commit()
1288 check_vals(db, vals)
1290 # Do some random modifications.
1291 for count in xrange(1, doccount * 2):
1292 docid = random.randint(1, doccount)
1293 doc = xapian.Document()
1295 if count % 5 == 0:
1296 val = ''
1297 else:
1298 val = 'newval%d' % count
1299 doc.add_value(1, val)
1300 db.replace_document(docid, doc)
1301 vals[docid] = val
1303 # Check the values before and after modification.
1304 check_vals(db, vals)
1305 db.commit()
1306 check_vals(db, vals)
1308 # Delete all the values which are non-empty, in a random order.
1309 keys = [key for key, val in vals.iteritems() if val != '']
1310 random.shuffle(keys)
1311 for key in keys:
1312 doc = xapian.Document()
1313 db.replace_document(key, doc)
1314 vals[key] = ''
1315 check_vals(db, vals)
1316 db.commit()
1317 check_vals(db, vals)
1319 db.close()
1320 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1321 shutil.rmtree(dbpath)
1323 def test_serialise_document():
1324 """Test serialisation of documents.
1327 doc = xapian.Document()
1328 doc.add_term('foo', 2)
1329 doc.add_value(1, 'bar')
1330 doc.set_data('baz')
1331 s = doc.serialise()
1332 doc2 = xapian.Document.unserialise(s)
1333 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1334 expect(len(list(doc.termlist())), 1)
1335 expect([(item.term, item.wdf) for item in doc.termlist()],
1336 [(item.term, item.wdf) for item in doc2.termlist()])
1337 expect([(item.num, item.value) for item in doc.values()],
1338 [(item.num, item.value) for item in doc2.values()])
1339 expect(doc.get_data(), doc2.get_data())
1340 expect(doc.get_data(), 'baz')
1342 db = setup_database()
1343 doc = db.get_document(1)
1344 s = doc.serialise()
1345 doc2 = xapian.Document.unserialise(s)
1346 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1347 expect(len(list(doc.termlist())), 3)
1348 expect([(item.term, item.wdf) for item in doc.termlist()],
1349 [(item.term, item.wdf) for item in doc2.termlist()])
1350 expect([(item.num, item.value) for item in doc.values()],
1351 [(item.num, item.value) for item in doc2.values()])
1352 expect(doc.get_data(), doc2.get_data())
1353 expect(doc.get_data(), 'is it cold?')
1355 def test_serialise_query():
1356 """Test serialisation of queries.
1359 q = xapian.Query()
1360 q2 = xapian.Query.unserialise(q.serialise())
1361 expect(str(q), str(q2))
1362 expect(str(q), 'Query()')
1364 q = xapian.Query('hello')
1365 q2 = xapian.Query.unserialise(q.serialise())
1366 expect(str(q), str(q2))
1367 expect(str(q), 'Query(hello)')
1369 q = xapian.Query(xapian.Query.OP_OR, ('hello', 'world'))
1370 q2 = xapian.Query.unserialise(q.serialise())
1371 expect(str(q), str(q2))
1372 expect(str(q), 'Query((hello OR world))')
1374 def test_preserve_query_parser_stopper():
1375 """Test preservation of stopper set on query parser.
1378 def make_qp():
1379 queryparser = xapian.QueryParser()
1380 stopper = xapian.SimpleStopper()
1381 stopper.add('to')
1382 stopper.add('not')
1383 queryparser.set_stopper(stopper)
1384 del stopper
1385 return queryparser
1386 queryparser = make_qp()
1387 query = queryparser.parse_query('to be')
1388 expect([term for term in queryparser.stoplist()], ['to'])
1390 def test_preserve_term_generator_stopper():
1391 """Test preservation of stopper set on term generator.
1394 def make_tg():
1395 termgen = xapian.TermGenerator()
1396 termgen.set_stemmer(xapian.Stem('en'))
1397 stopper = xapian.SimpleStopper()
1398 stopper.add('to')
1399 stopper.add('not')
1400 termgen.set_stopper(stopper)
1401 del stopper
1402 return termgen
1403 termgen = make_tg()
1405 termgen.index_text('to be')
1406 doc = termgen.get_document()
1407 terms = [term.term for term in doc.termlist()]
1408 terms.sort()
1409 expect(terms, ['Zbe', 'be', 'to'])
1411 def test_preserve_enquire_sorter():
1412 """Test preservation of sorter set on enquire.
1415 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1416 doc = xapian.Document()
1417 doc.add_term('foo')
1418 doc.add_value(1, '1')
1419 db.add_document(doc)
1420 db.add_document(doc)
1422 def make_enq1(db):
1423 enq = xapian.Enquire(db)
1424 sorter = xapian.MultiValueKeyMaker()
1425 enq.set_sort_by_key(sorter, False)
1426 del sorter
1427 return enq
1428 enq = make_enq1(db)
1429 enq.set_query(xapian.Query('foo'))
1430 enq.get_mset(0, 10)
1432 def make_enq2(db):
1433 enq = xapian.Enquire(db)
1434 sorter = xapian.MultiValueKeyMaker()
1435 enq.set_sort_by_key_then_relevance(sorter, False)
1436 del sorter
1437 return enq
1438 enq = make_enq2(db)
1439 enq.set_query(xapian.Query('foo'))
1440 enq.get_mset(0, 10)
1442 def make_enq3(db):
1443 enq = xapian.Enquire(db)
1444 sorter = xapian.MultiValueKeyMaker()
1445 enq.set_sort_by_relevance_then_key(sorter, False)
1446 del sorter
1447 return enq
1448 enq = make_enq3(db)
1449 enq.set_query(xapian.Query('foo'))
1450 enq.get_mset(0, 10)
1452 def test_matchspy():
1453 """Test use of matchspies.
1456 db = setup_database()
1457 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1458 enq = xapian.Enquire(db)
1459 enq.set_query(query)
1461 def set_matchspy_deref(enq):
1462 """Set a matchspy, and then drop the reference, to check that it
1463 doesn't get deleted too soon.
1465 spy = xapian.ValueCountMatchSpy(0)
1466 enq.add_matchspy(spy)
1467 del spy
1468 set_matchspy_deref(enq)
1469 mset = enq.get_mset(0, 10)
1470 expect(len(mset), 5)
1472 spy = xapian.ValueCountMatchSpy(0)
1473 enq.add_matchspy(spy)
1474 # Regression test for clear_matchspies() - used to always raise an
1475 # exception due to a copy and paste error in its definition.
1476 enq.clear_matchspies()
1477 mset = enq.get_mset(0, 10)
1478 expect([item for item in spy.values()], [])
1480 enq.add_matchspy(spy)
1481 mset = enq.get_mset(0, 10)
1482 expect(spy.get_total(), 5)
1483 expect([(item.term, item.termfreq) for item in spy.values()], [
1484 (xapian.sortable_serialise(1.5), 1),
1485 (xapian.sortable_serialise(2), 2),
1487 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1488 (xapian.sortable_serialise(2), 2),
1489 (xapian.sortable_serialise(1.5), 1),
1492 def test_import_star():
1493 """Test that "from xapian import *" works.
1495 This is a regression test - this failed in the 1.2.0 release.
1496 It's not normally good style to use it, but it should work anyway!
1499 import test_xapian_star
1501 def test_latlongcoords_iter():
1502 """Test LatLongCoordsIterator wrapping.
1505 coords = xapian.LatLongCoords()
1506 expect([c for c in coords], [])
1507 coords.append(xapian.LatLongCoord(0, 0))
1508 coords.append(xapian.LatLongCoord(0, 1))
1509 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1510 'Xapian::LatLongCoord(0, 1)'])
1513 def test_compactor():
1514 """Test that xapian.Compactor works.
1517 tmpdir = tempfile.mkdtemp()
1518 db1 = db2 = db3 = None
1519 try:
1520 db1path = os.path.join(tmpdir, 'db1')
1521 db2path = os.path.join(tmpdir, 'db2')
1522 db3path = os.path.join(tmpdir, 'db3')
1524 # Set up a couple of sample input databases
1525 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1526 doc1 = xapian.Document()
1527 doc1.add_term('Hello')
1528 doc1.add_term('Hello1')
1529 doc1.add_value(0, 'Val1')
1530 db1.set_metadata('key', '1')
1531 db1.set_metadata('key1', '1')
1532 db1.add_document(doc1)
1533 db1.commit()
1535 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1536 doc2 = xapian.Document()
1537 doc2.add_term('Hello')
1538 doc2.add_term('Hello2')
1539 doc2.add_value(0, 'Val2')
1540 db2.set_metadata('key', '2')
1541 db2.set_metadata('key2', '2')
1542 db2.add_document(doc2)
1543 db2.commit()
1545 db_to_compact = xapian.Database()
1546 db_to_compact.add_database(xapian.Database(db1path))
1547 db_to_compact.add_database(xapian.Database(db2path))
1548 # Compact with the default compactor
1549 # Metadata conflicts are resolved by picking the first value
1550 db_to_compact.compact(db3path)
1552 db3 = xapian.Database(db3path)
1553 expect([(item.term, item.termfreq) for item in db3.allterms()],
1554 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1555 expect(db3.get_document(1).get_value(0), 'Val1')
1556 expect(db3.get_document(2).get_value(0), 'Val2')
1557 expect(db3.get_metadata('key'), '1')
1558 expect(db3.get_metadata('key1'), '1')
1559 expect(db3.get_metadata('key2'), '2')
1561 context("testing a custom compactor which merges duplicate metadata")
1562 class MyCompactor(xapian.Compactor):
1563 def __init__(self):
1564 xapian.Compactor.__init__(self)
1565 self.log = []
1567 def set_status(self, table, status):
1568 if len(status) == 0:
1569 self.log.append('Starting %s' % table)
1570 else:
1571 self.log.append('%s: %s' % (table, status))
1573 def resolve_duplicate_metadata(self, key, vals):
1574 return ','.join(vals)
1576 c = MyCompactor()
1577 db_to_compact = xapian.Database()
1578 db_to_compact.add_database(xapian.Database(db1path))
1579 db_to_compact.add_database(xapian.Database(db2path))
1580 db_to_compact.compact(db3path, 0, 0, c)
1581 log = '\n'.join(c.log)
1582 # Check we got some messages in the log
1583 expect('Starting postlist' in log, True)
1585 db3 = xapian.Database(db3path)
1586 expect([(item.term, item.termfreq) for item in db3.allterms()],
1587 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1588 expect(db3.get_metadata('key'), '1,2')
1589 expect(db3.get_metadata('key1'), '1')
1590 expect(db3.get_metadata('key2'), '2')
1592 finally:
1593 if db1 is not None:
1594 db1.close()
1595 if db2 is not None:
1596 db2.close()
1597 if db3 is not None:
1598 db3.close()
1600 shutil.rmtree(tmpdir)
1602 def test_custom_matchspy():
1603 class MSpy(xapian.MatchSpy):
1604 def __init__(self):
1605 xapian.MatchSpy.__init__(self)
1606 self.count = 0
1608 def __call__(self, doc, weight):
1609 self.count += 1
1611 mspy = MSpy()
1613 db = setup_database()
1614 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1616 enquire = xapian.Enquire(db)
1617 enquire.add_matchspy(mspy)
1618 enquire.set_query(query)
1619 mset = enquire.get_mset(0, 1)
1620 expect(len(mset), 1)
1621 expect(mspy.count >= 1, True)
1623 expect(db.get_doccount(), 5)
1625 def test_removed_features():
1626 ok = True
1627 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1628 doc = xapian.Document()
1629 enq = xapian.Enquire(db)
1630 eset = xapian.ESet()
1631 mset = xapian.MSet()
1632 query = xapian.Query()
1633 qp = xapian.QueryParser()
1634 titer = xapian._TermIterator()
1635 postiter = xapian._PostingIterator()
1637 def check_missing(obj, attr):
1638 expect_exception(AttributeError, None, getattr, obj, attr)
1640 check_missing(xapian, 'Stem_get_available_languages')
1641 check_missing(xapian, 'TermIterator')
1642 check_missing(xapian, 'PositionIterator')
1643 check_missing(xapian, 'PostingIterator')
1644 check_missing(xapian, 'ValueIterator')
1645 check_missing(xapian, 'MSetIterator')
1646 check_missing(xapian, 'ESetIterator')
1647 check_missing(db, 'allterms_begin')
1648 check_missing(db, 'allterms_end')
1649 check_missing(db, 'metadata_keys_begin')
1650 check_missing(db, 'metadata_keys_end')
1651 check_missing(db, 'synonym_keys_begin')
1652 check_missing(db, 'synonym_keys_end')
1653 check_missing(db, 'synonyms_begin')
1654 check_missing(db, 'synonyms_end')
1655 check_missing(db, 'spellings_begin')
1656 check_missing(db, 'spellings_end')
1657 check_missing(db, 'positionlist_begin')
1658 check_missing(db, 'positionlist_end')
1659 check_missing(db, 'postlist_begin')
1660 check_missing(db, 'postlist_end')
1661 check_missing(db, 'termlist_begin')
1662 check_missing(db, 'termlist_end')
1663 check_missing(doc, 'termlist_begin')
1664 check_missing(doc, 'termlist_end')
1665 check_missing(doc, 'values_begin')
1666 check_missing(doc, 'values_end')
1667 check_missing(enq, 'get_matching_terms_begin')
1668 check_missing(enq, 'get_matching_terms_end')
1669 check_missing(eset, 'begin')
1670 check_missing(eset, 'end')
1671 check_missing(mset, 'begin')
1672 check_missing(mset, 'end')
1673 check_missing(postiter, 'positionlist_begin')
1674 check_missing(postiter, 'positionlist_end')
1675 check_missing(query, 'get_terms_begin')
1676 check_missing(query, 'get_terms_end')
1677 check_missing(qp, 'stoplist_begin')
1678 check_missing(qp, 'stoplist_end')
1679 check_missing(qp, 'unstem_begin')
1680 check_missing(qp, 'unstem_end')
1681 check_missing(titer, 'positionlist_begin')
1682 check_missing(titer, 'positionlist_end')
1684 def test_repr():
1685 # repr() returned None in 1.4.0.
1686 expect(repr(xapian.Query('foo')) is None, False)
1687 expect(repr(xapian.AssertionError('foo')) is None, False)
1688 expect(repr(xapian.InvalidArgumentError('foo')) is None, False)
1689 expect(repr(xapian.InvalidOperationError('foo')) is None, False)
1690 expect(repr(xapian.UnimplementedError('foo')) is None, False)
1691 expect(repr(xapian.DatabaseError('foo')) is None, False)
1692 expect(repr(xapian.DatabaseCorruptError('foo')) is None, False)
1693 expect(repr(xapian.DatabaseCreateError('foo')) is None, False)
1694 expect(repr(xapian.DatabaseLockError('foo')) is None, False)
1695 expect(repr(xapian.DatabaseModifiedError('foo')) is None, False)
1696 expect(repr(xapian.DatabaseOpeningError('foo')) is None, False)
1697 expect(repr(xapian.DatabaseVersionError('foo')) is None, False)
1698 expect(repr(xapian.DocNotFoundError('foo')) is None, False)
1699 expect(repr(xapian.FeatureUnavailableError('foo')) is None, False)
1700 expect(repr(xapian.InternalError('foo')) is None, False)
1701 expect(repr(xapian.NetworkError('foo')) is None, False)
1702 expect(repr(xapian.NetworkTimeoutError('foo')) is None, False)
1703 expect(repr(xapian.QueryParserError('foo')) is None, False)
1704 expect(repr(xapian.SerialisationError('foo')) is None, False)
1705 expect(repr(xapian.RangeError('foo')) is None, False)
1706 expect(repr(xapian.WildcardError('foo')) is None, False)
1707 expect(repr(xapian.Document()) is None, False)
1708 expect(repr(xapian.Registry()) is None, False)
1709 expect(repr(xapian.Query()) is None, False)
1710 expect(repr(xapian.Stem('en')) is None, False)
1711 expect(repr(xapian.TermGenerator()) is None, False)
1712 expect(repr(xapian.MSet()) is None, False)
1713 expect(repr(xapian.ESet()) is None, False)
1714 expect(repr(xapian.RSet()) is None, False)
1715 expect(repr(xapian.MultiValueKeyMaker()) is None, False)
1716 expect(repr(xapian.SimpleStopper()) is None, False)
1717 expect(repr(xapian.RangeProcessor()) is None, False)
1718 expect(repr(xapian.DateRangeProcessor(1)) is None, False)
1719 expect(repr(xapian.NumberRangeProcessor(1)) is None, False)
1720 expect(repr(xapian.StringValueRangeProcessor(1)) is None, False)
1721 expect(repr(xapian.DateValueRangeProcessor(1)) is None, False)
1722 expect(repr(xapian.NumberValueRangeProcessor(1)) is None, False)
1723 expect(repr(xapian.QueryParser()) is None, False)
1724 expect(repr(xapian.BoolWeight()) is None, False)
1725 expect(repr(xapian.TfIdfWeight()) is None, False)
1726 expect(repr(xapian.BM25Weight()) is None, False)
1727 expect(repr(xapian.BM25PlusWeight()) is None, False)
1728 expect(repr(xapian.TradWeight()) is None, False)
1729 expect(repr(xapian.InL2Weight()) is None, False)
1730 expect(repr(xapian.IfB2Weight()) is None, False)
1731 expect(repr(xapian.IneB2Weight()) is None, False)
1732 expect(repr(xapian.BB2Weight()) is None, False)
1733 expect(repr(xapian.DLHWeight()) is None, False)
1734 expect(repr(xapian.PL2Weight()) is None, False)
1735 expect(repr(xapian.PL2PlusWeight()) is None, False)
1736 expect(repr(xapian.DPHWeight()) is None, False)
1737 expect(repr(xapian.LMWeight()) is None, False)
1738 expect(repr(xapian.CoordWeight()) is None, False)
1739 expect(repr(xapian.Compactor()) is None, False)
1740 expect(repr(xapian.ValuePostingSource(1)) is None, False)
1741 expect(repr(xapian.ValueWeightPostingSource(1)) is None, False)
1742 expect(repr(xapian.DecreasingValueWeightPostingSource(1)) is None, False)
1743 expect(repr(xapian.ValueMapPostingSource(1)) is None, False)
1744 expect(repr(xapian.FixedWeightPostingSource(1)) is None, False)
1745 expect(repr(xapian.ValueCountMatchSpy(1)) is None, False)
1746 expect(repr(xapian.LatLongCoord()) is None, False)
1747 expect(repr(xapian.LatLongCoords()) is None, False)
1748 expect(repr(xapian.GreatCircleMetric()) is None, False)
1749 expect(repr(xapian.Database()) is None, False)
1750 expect(repr(xapian.WritableDatabase()) is None, False)
1752 result = True
1754 # Run all tests (ie, callables with names starting "test_").
1755 def run():
1756 global result
1757 if not runtests(globals(), sys.argv[1:]):
1758 result = False
1760 print "Running tests without threads"
1761 run()
1763 if have_threads:
1764 print "Running tests with threads"
1766 # This testcase seems to just block when run in a thread, so just remove
1767 # it before running tests in a thread.
1768 del test_import_star
1770 t = threading.Thread(name='test runner', target=run)
1771 t.start()
1772 # Block until the thread has completed so the thread gets a chance to exit
1773 # with error status.
1774 t.join()
1776 if not result:
1777 sys.exit(1)
1779 # vim:syntax=python:set expandtab: