xapian-inspect: Support glass instead of chert
[xapian.git] / xapian-bindings / python / pythontest.py
blob85eacde5549f965d95568329a812c804249ba48f
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
74 return db
76 def test_exception_base():
77 """Check that xapian exceptions have Exception as a base class.
79 """
80 try:
81 raise xapian.InvalidOperationError("Test exception")
82 except Exception, e:
83 pass
85 def test_mset_iter():
86 """Test iterators over MSets.
88 """
89 db = setup_database()
90 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
92 enquire = xapian.Enquire(db)
93 enquire.set_query(query)
94 mset = enquire.get_mset(0, 10)
95 items = [item for item in mset]
96 expect(len(items), 5)
97 expect(len(mset), len(items), "Expected number of items to be length of mset")
99 context("testing returned item from mset")
100 expect(items[2].docid, 4)
101 expect(items[2].rank, 2)
102 expect(items[2].percent, 86)
103 expect(items[2].collapse_key, '')
104 expect(items[2].collapse_count, 0)
105 expect(items[2].document.get_data(), 'was it warm? three')
107 # Test coverage for mset.items
108 mset_items = mset.items
109 expect(len(mset), len(mset_items), "Expected number of items to be length of mset")
111 context("testing mset_items[2]")
112 expect(mset_items[2][xapian.MSET_DID], 4)
113 expect(mset_items[2][xapian.MSET_WT] > 0.0, True)
114 expect(mset_items[2][xapian.MSET_RANK], 2)
115 expect(mset_items[2][xapian.MSET_PERCENT], 86)
116 # MSET_DOCUMENT is documented but not implemented! FIXME: resolve this -
117 # if it has never worked, we may just want to remove the documentation for
118 # it.
119 #expect(mset_items[2][xapian.MSET_DOCUMENT].get_data(), 'was it warm? three')
121 # Check iterators for sub-msets against the whole mset.
122 for start in range(0, 6):
123 for maxitems in range(0, 6):
124 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
125 submset = enquire.get_mset(start, maxitems)
126 num = 0
127 for item in submset:
128 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
129 expect(item.rank, num + start)
131 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
132 hit = submset.get_hit(num)
133 expect(hit.docid, item.docid)
134 expect(hit.rank, item.rank)
135 expect(hit.percent, item.percent)
136 expect(hit.document.get_data(), item.document.get_data())
137 expect(hit.collapse_key, item.collapse_key)
138 expect(hit.collapse_count, item.collapse_count)
140 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
141 hit = mset.get_hit(num + start)
142 expect(hit.docid, item.docid)
143 expect(hit.rank, item.rank)
144 expect(hit.percent, item.percent)
145 expect(hit.document.get_data(), item.document.get_data())
146 expect(hit.collapse_key, item.collapse_key)
147 expect(hit.collapse_count, item.collapse_count)
149 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
150 expect(submset[num].docid, item.docid)
151 expect(submset[num].rank, item.rank)
152 expect(submset[num].percent, item.percent)
153 expect(submset[num].document.get_data(), item.document.get_data())
154 expect(submset[num].collapse_key, item.collapse_key)
155 expect(submset[num].collapse_count, item.collapse_count)
157 num += 1
159 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
160 # Test out-of-range access to mset:
161 expect_exception(IndexError, 'Mset index out of range',
162 submset.__getitem__, -10)
163 expect_exception(IndexError, 'Mset index out of range',
164 submset.__getitem__, 10)
165 expect_exception(IndexError, 'Mset index out of range',
166 submset.__getitem__, -1-len(submset))
167 expect_exception(IndexError, 'Mset index out of range',
168 submset.__getitem__, len(submset))
170 # Check that the item contents remain valid when the iterator has
171 # moved on.
172 saved_items = [item for item in submset]
173 for num in range(len(saved_items)):
174 item = saved_items[num]
175 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
176 expect(submset[num].docid, item.docid)
177 expect(submset[num].rank, item.rank)
178 expect(submset[num].percent, item.percent)
179 expect(submset[num].document.get_data(), item.document.get_data())
180 expect(submset[num].collapse_key, item.collapse_key)
181 expect(submset[num].collapse_count, item.collapse_count)
183 # Check that the right number of items exist in the mset.
184 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
185 items = [item for item in submset]
186 expect(len(items), min(maxitems, 5 - start))
187 expect(len(submset), min(maxitems, 5 - start))
189 def test_eset_iter():
190 """Test iterators over ESets.
193 db = setup_database()
194 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
195 rset = xapian.RSet()
196 rset.add_document(3)
198 context("getting eset items without a query")
199 enquire = xapian.Enquire(db)
200 eset = enquire.get_eset(10, rset)
201 items = [item for item in eset]
202 expect(len(items), 3)
203 expect(len(items), len(eset))
205 context("getting eset items with a query")
206 enquire = xapian.Enquire(db)
207 enquire.set_query(query)
208 eset = enquire.get_eset(10, rset)
209 items2 = [item for item in eset]
210 expect(len(items2), 2)
211 expect(len(items2), len(eset))
213 context("comparing eset items with a query to those without")
214 expect(items2[0].term, items[0].term)
215 expect(items2[1].term, items[2].term)
217 context("comparing eset weights with a query to those without")
218 expect(items2[0].weight, items[0].weight)
219 expect(items2[1].weight, items[2].weight)
221 def test_matchingterms_iter():
222 """Test Enquire.matching_terms iterator.
225 db = setup_database()
226 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
228 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
230 enquire = xapian.Enquire(db)
231 enquire.set_query(query)
232 mset = enquire.get_mset(0, 10)
234 for item in mset:
235 # Make a list of the term names
236 mterms = [term for term in enquire.matching_terms(item.docid)]
237 mterms2 = [term for term in enquire.matching_terms(item)]
238 expect(mterms, mterms2)
240 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
241 expect(mterms, ['it', 'two', 'warm', 'was'])
243 def test_queryterms_iter():
244 """Test Query term iterator.
247 db = setup_database()
248 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
250 # Make a list of the term names
251 terms = [term for term in query]
252 expect(terms, ['it', 'two', 'warm', 'was'])
254 def test_queryparser_stoplist_iter():
255 """Test QueryParser stoplist iterator.
258 stemmer = xapian.Stem('en')
260 # Check behaviour without having set a stoplist.
261 queryparser = xapian.QueryParser()
262 queryparser.set_stemmer(stemmer)
263 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
264 expect([term for term in queryparser.stoplist()], [])
265 query = queryparser.parse_query('to be or not to be is the questions')
266 expect([term for term in queryparser.stoplist()], [])
267 expect(str(query),
268 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
269 'Zis@7 OR Zthe@8 OR Zquestion@9))')
271 # Check behaviour with a stoplist, but no stemmer
272 queryparser = xapian.QueryParser()
273 stopper = xapian.SimpleStopper()
274 stopper.add('to')
275 stopper.add('not')
276 stopper.add('question')
277 queryparser.set_stopper(stopper)
278 expect([term for term in queryparser.stoplist()], [])
279 query = queryparser.parse_query('to be or not to be is the questions')
281 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
282 expect(str(query),
283 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
285 # Check behaviour with a stoplist and a stemmer
286 queryparser.set_stemmer(stemmer)
287 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
288 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to']) # Shouldn't have changed since previous query.
289 query = queryparser.parse_query('to be or not to be is the questions')
291 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
292 expect(str(query),
293 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
295 def test_queryparser_unstem_iter():
296 """Test QueryParser unstemlist iterator.
299 stemmer = xapian.Stem('en')
301 queryparser = xapian.QueryParser()
302 expect([term for term in queryparser.unstemlist('to')], [])
303 expect([term for term in queryparser.unstemlist('question')], [])
304 expect([term for term in queryparser.unstemlist('questions')], [])
305 query = queryparser.parse_query('to question questions')
307 expect([term for term in queryparser.unstemlist('to')], ['to'])
308 expect([term for term in queryparser.unstemlist('question')], ['question'])
309 expect([term for term in queryparser.unstemlist('questions')], ['questions'])
310 expect(str(query),
311 'Query((to@1 OR question@2 OR questions@3))')
314 queryparser = xapian.QueryParser()
315 queryparser.set_stemmer(stemmer)
316 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
317 expect([term for term in queryparser.unstemlist('Zto')], [])
318 expect([term for term in queryparser.unstemlist('Zquestion')], [])
319 expect([term for term in queryparser.unstemlist('Zquestions')], [])
320 query = queryparser.parse_query('to question questions')
322 expect([term for term in queryparser.unstemlist('Zto')], ['to'])
323 expect([term for term in queryparser.unstemlist('Zquestion')], ['question', 'questions'])
324 expect([term for term in queryparser.unstemlist('Zquestions')], [])
325 expect(str(query),
326 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
328 def test_allterms_iter():
329 """Test all-terms iterator on Database.
332 db = setup_database()
334 context("making a list of the term names and frequencies")
335 terms = []
336 freqs = []
337 for termitem in db:
338 terms.append(termitem.term)
339 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
340 freqs.append(termitem.termfreq)
341 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
343 context("checking that items are no longer valid once the iterator has moved on");
344 termitems = [termitem for termitem in db]
346 expect(len(termitems), len(terms))
347 for i in range(len(termitems)):
348 expect(termitems[i].term, terms[i])
350 expect(len(termitems), len(freqs))
351 for termitem in termitems:
352 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
354 context("checking that restricting the terms iterated with a prefix works")
355 prefix_terms = []
356 prefix_freqs = []
357 for i in range(len(terms)):
358 if terms[i][0] == 't':
359 prefix_terms.append(terms[i])
360 prefix_freqs.append(freqs[i])
361 i = 0
362 for termitem in db.allterms('t'):
363 expect(termitem.term, prefix_terms[i])
364 expect(termitem.termfreq, prefix_freqs[i])
365 i += 1
366 expect(len(prefix_terms), i)
368 def test_termlist_iter():
369 """Test termlist iterator on Database.
372 db = setup_database()
374 # Make lists of the item contents
375 terms = []
376 wdfs = []
377 freqs = []
378 positers = []
379 for termitem in db.termlist(3):
380 terms.append(termitem.term)
381 wdfs.append(termitem.wdf)
382 freqs.append(termitem.termfreq)
383 positers.append([pos for pos in termitem.positer])
385 expect(terms, ['it', 'two', 'warm', 'was'])
386 expect(wdfs, [1, 2, 1, 1])
387 expect(freqs, [5, 3, 4, 4])
388 expect(positers, [[2], [], [3], [1]])
390 # Test skip_to().
391 tliter = db.termlist(3)
393 # skip to an item before the first item.
394 termitem = tliter.skip_to('a')
395 expect((termitem.term, termitem.wdf, termitem.termfreq,
396 [pos for pos in termitem.positer]), ('it', 1, 5, [2]))
398 # skip forwards to an item.
399 termitem = tliter.skip_to('two')
400 expect((termitem.term, termitem.wdf, termitem.termfreq,
401 [pos for pos in termitem.positer]), ('two', 2, 3, []))
403 # skip to same place (should return same item)
404 termitem = tliter.skip_to('two')
405 expect((termitem.term, termitem.wdf, termitem.termfreq,
406 [pos for pos in termitem.positer]), ('two', 2, 3, []))
408 # next() after a skip_to(), should return next item.
409 termitem = next(tliter)
410 expect((termitem.term, termitem.wdf, termitem.termfreq,
411 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
413 # skip to same place (should return same item)
414 termitem = tliter.skip_to('warm')
415 expect((termitem.term, termitem.wdf, termitem.termfreq,
416 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
418 # skip backwards (should return same item)
419 termitem = tliter.skip_to('a')
421 # skip to after end.
422 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
423 # skip backwards (should still return StopIteration).
424 expect_exception(StopIteration, '', tliter.skip_to, 'a')
425 # next should continue to return StopIteration.
426 expect_exception(StopIteration, '', next, tliter)
429 # Make a list of the terms (so we can test if they're still valid
430 # once the iterator has moved on).
431 termitems = [termitem for termitem in db.termlist(3)]
433 expect(len(termitems), len(terms))
434 for i in range(len(termitems)):
435 expect(termitems[i].term, terms[i])
437 expect(len(termitems), len(wdfs))
438 for i in range(len(termitems)):
439 expect(termitems[i].wdf, wdfs[i])
441 expect(len(termitems), len(freqs))
442 for termitem in termitems:
443 expect_exception(xapian.InvalidOperationError,
444 'Iterator has moved, and does not support random access',
445 getattr, termitem, 'termfreq')
447 expect(len(termitems), len(freqs))
448 for termitem in termitems:
449 expect_exception(xapian.InvalidOperationError,
450 'Iterator has moved, and does not support random access',
451 getattr, termitem, 'positer')
453 def test_dbdocument_iter():
454 """Test document terms iterator for document taken from a database.
457 db = setup_database()
459 doc = db.get_document(3)
461 # Make lists of the item contents
462 terms = []
463 wdfs = []
464 freqs = []
465 positers = []
466 for termitem in doc:
467 terms.append(termitem.term)
468 wdfs.append(termitem.wdf)
469 freqs.append(termitem.termfreq)
470 positers.append([pos for pos in termitem.positer])
472 expect(terms, ['it', 'two', 'warm', 'was'])
473 expect(wdfs, [1, 2, 1, 1])
474 expect(freqs, [5, 3, 4, 4])
475 expect(positers, [[2], [], [3], [1]])
477 # Make a list of the terms (so we can test if they're still valid
478 # once the iterator has moved on).
479 termitems = [termitem for termitem in doc]
481 expect(len(termitems), len(terms))
482 for i in range(len(termitems)):
483 expect(termitems[i].term, terms[i])
485 expect(len(termitems), len(wdfs))
486 for i in range(len(termitems)):
487 expect(termitems[i].wdf, wdfs[i])
489 expect(len(termitems), len(freqs))
490 for termitem in termitems:
491 expect_exception(xapian.InvalidOperationError,
492 'Iterator has moved, and does not support random access',
493 getattr, termitem, 'termfreq')
495 expect(len(termitems), len(freqs))
496 for termitem in termitems:
497 expect_exception(xapian.InvalidOperationError,
498 'Iterator has moved, and does not support random access',
499 getattr, termitem, 'positer')
501 def test_newdocument_iter():
502 """Test document terms iterator for newly created document.
505 doc = xapian.Document()
506 doc.set_data("was it warm? two")
507 doc.add_posting("was", 1)
508 doc.add_posting("it", 2)
509 doc.add_posting("warm", 3)
510 doc.add_term("two", 2)
512 # Make lists of the item contents
513 terms = []
514 wdfs = []
515 positers = []
516 for termitem in doc:
517 terms.append(termitem.term)
518 wdfs.append(termitem.wdf)
519 expect_exception(xapian.InvalidOperationError,
520 "Can't get term frequency from a document termlist "
521 "which is not associated with a database.",
522 getattr, termitem, 'termfreq')
523 positers.append([pos for pos in termitem.positer])
525 expect(terms, ['it', 'two', 'warm', 'was'])
526 expect(wdfs, [1, 2, 1, 1])
527 expect(positers, [[2], [], [3], [1]])
529 # Make a list of the terms (so we can test if they're still valid
530 # once the iterator has moved on).
531 termitems = [termitem for termitem in doc]
533 expect(len(termitems), len(terms))
534 for i in range(len(termitems)):
535 expect(termitems[i].term, terms[i])
537 expect(len(termitems), len(wdfs))
538 for i in range(len(termitems)):
539 expect(termitems[i].wdf, wdfs[i])
541 for termitem in termitems:
542 expect_exception(xapian.InvalidOperationError,
543 'Iterator has moved, and does not support random access',
544 getattr, termitem, 'termfreq')
546 expect(len(termitems), len(positers))
547 for termitem in termitems:
548 expect_exception(xapian.InvalidOperationError,
549 'Iterator has moved, and does not support random access',
550 getattr, termitem, 'positer')
552 def test_postinglist_iter():
553 """Test postinglist iterator on Database.
556 db = setup_database()
558 # Make lists of the item contents
559 docids = []
560 doclengths = []
561 wdfs = []
562 positers = []
563 for posting in db.postlist('it'):
564 docids.append(posting.docid)
565 doclengths.append(posting.doclength)
566 wdfs.append(posting.wdf)
567 positers.append([pos for pos in posting.positer])
569 expect(docids, [1, 2, 3, 4, 5])
570 expect(doclengths, [3, 3, 5, 8, 19])
571 expect(wdfs, [1, 1, 1, 1, 8])
572 expect(positers, [[1], [2], [2], [2], [2, 7]])
574 # Test skip_to().
575 pliter = db.postlist('it')
577 # skip to an item before the first item.
578 posting = pliter.skip_to(0)
579 expect((posting.docid, posting.doclength, posting.wdf,
580 [pos for pos in posting.positer]), (1, 3, 1, [1]))
582 # skip forwards to an item.
583 posting = pliter.skip_to(3)
584 expect((posting.docid, posting.doclength, posting.wdf,
585 [pos for pos in posting.positer]), (3, 5, 1, [2]))
587 # skip to same place (should return same item)
588 posting = pliter.skip_to(3)
589 expect((posting.docid, posting.doclength, posting.wdf,
590 [pos for pos in posting.positer]), (3, 5, 1, [2]))
592 # next() after a skip_to(), should return next item.
593 posting = next(pliter)
594 expect((posting.docid, posting.doclength, posting.wdf,
595 [pos for pos in posting.positer]), (4, 8, 1, [2]))
597 # skip to same place (should return same item)
598 posting = pliter.skip_to(4)
599 expect((posting.docid, posting.doclength, posting.wdf,
600 [pos for pos in posting.positer]), (4, 8, 1, [2]))
602 # skip backwards (should return same item)
603 posting = pliter.skip_to(2)
604 expect((posting.docid, posting.doclength, posting.wdf,
605 [pos for pos in posting.positer]), (4, 8, 1, [2]))
607 # skip to after end.
608 expect_exception(StopIteration, '', pliter.skip_to, 6)
609 # skip backwards (should still return StopIteration).
610 expect_exception(StopIteration, '', pliter.skip_to, 6)
611 # next should continue to return StopIteration.
612 expect_exception(StopIteration, '', next, pliter)
615 # Make a list of the postings (so we can test if they're still valid once
616 # the iterator has moved on).
617 postings = [posting for posting in db.postlist('it')]
619 expect(len(postings), len(docids))
620 for i in range(len(postings)):
621 expect(postings[i].docid, docids[i])
623 expect(len(postings), len(doclengths))
624 for i in range(len(postings)):
625 expect(postings[i].doclength, doclengths[i])
627 expect(len(postings), len(wdfs))
628 for i in range(len(postings)):
629 expect(postings[i].wdf, wdfs[i])
631 expect(len(postings), len(positers))
632 for posting in postings:
633 expect_exception(xapian.InvalidOperationError,
634 'Iterator has moved, and does not support random access',
635 getattr, posting, 'positer')
637 def test_valuestream_iter():
638 """Test a valuestream iterator on Database.
641 db = setup_database()
643 # Check basic iteration
644 expect([(item.docid, item.value) for item in db.valuestream(0)],
645 [(3, '\xa4'), (4, '\xa2'), (5, '\xa4')])
646 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
647 expect([(item.docid, item.value) for item in db.valuestream(5)],
648 [(5, "five")])
649 expect([(item.docid, item.value) for item in db.valuestream(9)],
650 [(5, "nine")])
652 # Test skip_to() on iterator with no values, and behaviours when called
653 # after already returning StopIteration.
654 i = db.valuestream(1)
655 expect_exception(StopIteration, "", i.skip_to, 1)
656 expect_exception(StopIteration, "", i.skip_to, 1)
657 i = db.valuestream(1)
658 expect_exception(StopIteration, "", i.skip_to, 1)
659 expect_exception(StopIteration, "", i.next)
660 i = db.valuestream(1)
661 expect_exception(StopIteration, "", i.next)
662 expect_exception(StopIteration, "", i.skip_to, 1)
664 # Test that skipping to a value works, and that skipping doesn't have to
665 # advance.
666 i = db.valuestream(0)
667 item = i.skip_to(4)
668 expect((item.docid, item.value), (4, '\xa2'))
669 item = i.skip_to(4)
670 expect((item.docid, item.value), (4, '\xa2'))
671 item = i.skip_to(1)
672 expect((item.docid, item.value), (4, '\xa2'))
673 item = i.skip_to(5)
674 expect((item.docid, item.value), (5, '\xa4'))
675 expect_exception(StopIteration, "", i.skip_to, 6)
677 # Test that alternating skip_to() and next() works.
678 i = db.valuestream(0)
679 item = i.next()
680 expect((item.docid, item.value), (3, '\xa4'))
681 item = i.skip_to(4)
682 expect((item.docid, item.value), (4, '\xa2'))
683 item = i.next()
684 expect((item.docid, item.value), (5, '\xa4'))
685 expect_exception(StopIteration, "", i.skip_to, 6)
687 # Test that next works correctly after skip_to() called with an earlier
688 # item.
689 i = db.valuestream(0)
690 item = i.skip_to(4)
691 expect((item.docid, item.value), (4, '\xa2'))
692 item = i.skip_to(1)
693 expect((item.docid, item.value), (4, '\xa2'))
694 item = i.next()
695 expect((item.docid, item.value), (5, '\xa4'))
697 # Test that next works correctly after skipping to last item
698 i = db.valuestream(0)
699 item = i.skip_to(5)
700 expect((item.docid, item.value), (5, '\xa4'))
701 expect_exception(StopIteration, "", i.next)
703 def test_position_iter():
704 """Test position iterator for a document in a database.
707 db = setup_database()
709 doc = db.get_document(5)
711 # Make lists of the item contents
712 positions = [position for position in db.positionlist(5, 'it')]
714 expect(positions, [2, 7])
716 def test_value_iter():
717 """Test iterators over list of values in a document.
720 db = setup_database()
721 doc = db.get_document(5)
723 items = list(doc.values())
724 expect(len(items), 3)
725 expect(items[0].num, 0)
726 expect(items[0].value, xapian.sortable_serialise(2))
727 expect(items[1].num, 5)
728 expect(items[1].value, 'five')
729 expect(items[2].num, 9)
730 expect(items[2].value, 'nine')
732 def test_synonyms_iter():
733 """Test iterators over list of synonyms in a database.
736 dbpath = 'db_test_synonyms_iter'
737 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
739 db.add_synonym('hello', 'hi')
740 db.add_synonym('hello', 'howdy')
742 expect([item for item in db.synonyms('foo')], [])
743 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
744 expect([item for item in db.synonym_keys()], ['hello'])
745 expect([item for item in db.synonym_keys('foo')], [])
746 expect([item for item in db.synonym_keys('he')], ['hello'])
747 expect([item for item in db.synonym_keys('hello')], ['hello'])
749 dbr=xapian.Database(dbpath)
750 expect([item for item in dbr.synonyms('foo')], [])
751 expect([item for item in dbr.synonyms('hello')], [])
752 expect([item for item in dbr.synonym_keys()], [])
753 expect([item for item in dbr.synonym_keys('foo')], [])
754 expect([item for item in dbr.synonym_keys('he')], [])
755 expect([item for item in dbr.synonym_keys('hello')], [])
757 db.commit()
759 expect([item for item in db.synonyms('foo')], [])
760 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
761 expect([item for item in db.synonym_keys()], ['hello'])
762 expect([item for item in db.synonym_keys('foo')], [])
763 expect([item for item in db.synonym_keys('he')], ['hello'])
764 expect([item for item in db.synonym_keys('hello')], ['hello'])
766 dbr=xapian.Database(dbpath)
767 expect([item for item in dbr.synonyms('foo')] , [])
768 expect([item for item in dbr.synonyms('hello')], ['hi', 'howdy'])
769 expect([item for item in dbr.synonym_keys()], ['hello'])
770 expect([item for item in dbr.synonym_keys('foo')], [])
771 expect([item for item in dbr.synonym_keys('he')], ['hello'])
772 expect([item for item in dbr.synonym_keys('hello')], ['hello'])
774 db.close()
775 expect(xapian.Database.check(dbpath), 0);
776 dbr.close()
777 shutil.rmtree(dbpath)
779 def test_metadata_keys_iter():
780 """Test iterators over list of metadata keys in a database.
783 dbpath = 'db_test_metadata_iter'
784 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
786 db.set_metadata('author', 'richard')
787 db.set_metadata('item1', 'hello')
788 db.set_metadata('item1', 'hi')
789 db.set_metadata('item2', 'howdy')
790 db.set_metadata('item3', '')
791 db.set_metadata('item4', 'goodbye')
792 db.set_metadata('item4', '')
793 db.set_metadata('type', 'greeting')
795 expect([item for item in db.metadata_keys()],
796 ['author', 'item1', 'item2', 'type'])
797 expect([item for item in db.metadata_keys('foo')], [])
798 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
799 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
800 expect([item for item in db.metadata_keys('type')], ['type'])
802 dbr=xapian.Database(dbpath)
803 expect([item for item in dbr.metadata_keys()], [])
804 expect([item for item in dbr.metadata_keys('foo')], [])
805 expect([item for item in dbr.metadata_keys('item')], [])
806 expect([item for item in dbr.metadata_keys('it')], [])
807 expect([item for item in dbr.metadata_keys('type')], [])
809 db.commit()
810 expect([item for item in db.metadata_keys()],
811 ['author', 'item1', 'item2', 'type'])
812 expect([item for item in db.metadata_keys('foo')], [])
813 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
814 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
815 expect([item for item in db.metadata_keys('type')], ['type'])
817 dbr=xapian.Database(dbpath)
818 expect([item for item in dbr.metadata_keys()],
819 ['author', 'item1', 'item2', 'type'])
820 expect([item for item in dbr.metadata_keys('foo')], [])
821 expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2'])
822 expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2'])
823 expect([item for item in dbr.metadata_keys('type')], ['type'])
825 db.close()
826 expect(xapian.Database.check(dbpath), 0);
827 dbr.close()
828 shutil.rmtree(dbpath)
830 def test_spell():
831 """Test basic spelling correction features.
834 dbpath = 'db_test_spell'
835 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
837 db.add_spelling('hello')
838 db.add_spelling('mell', 2)
839 expect(db.get_spelling_suggestion('hell'), 'mell')
840 expect([(item.term, item.termfreq) for item in db.spellings()], [('hello', 1), ('mell', 2)])
841 dbr=xapian.Database(dbpath)
842 expect(dbr.get_spelling_suggestion('hell'), '')
843 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
844 db.commit()
845 dbr=xapian.Database(dbpath)
846 expect(db.get_spelling_suggestion('hell'), 'mell')
847 expect(dbr.get_spelling_suggestion('hell'), 'mell')
848 expect([(item.term, item.termfreq) for item in dbr.spellings()], [('hello', 1), ('mell', 2)])
850 db.close()
851 dbr.close()
852 shutil.rmtree(dbpath)
854 def test_queryparser_custom_vrp():
855 """Test QueryParser with a custom (in python) ValueRangeProcessor.
858 class MyVRP(xapian.ValueRangeProcessor):
859 def __init__(self):
860 xapian.ValueRangeProcessor.__init__(self)
862 def __call__(self, begin, end):
863 return (7, "A"+begin, "B"+end)
865 queryparser = xapian.QueryParser()
866 myvrp = MyVRP()
868 queryparser.add_valuerangeprocessor(myvrp)
869 query = queryparser.parse_query('5..8')
871 expect(str(query),
872 'Query(0 * VALUE_RANGE 7 A5 B8)')
874 def test_queryparser_custom_vrp_deallocation():
875 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
878 class MyVRP(xapian.ValueRangeProcessor):
879 def __init__(self):
880 xapian.ValueRangeProcessor.__init__(self)
882 def __call__(self, begin, end):
883 return (7, "A"+begin, "B"+end)
885 def make_parser():
886 queryparser = xapian.QueryParser()
887 myvrp = MyVRP()
888 queryparser.add_valuerangeprocessor(myvrp)
889 return queryparser
891 queryparser = make_parser()
892 query = queryparser.parse_query('5..8')
894 expect(str(query),
895 'Query(0 * VALUE_RANGE 7 A5 B8)')
897 def test_queryparser_custom_rp():
898 """Test QueryParser with a custom (in python) RangeProcessor.
901 class MyRP(xapian.RangeProcessor):
902 def __init__(self):
903 xapian.RangeProcessor.__init__(self)
905 def __call__(self, begin, end):
906 begin = "A" + begin
907 end = "B" + end
908 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
910 queryparser = xapian.QueryParser()
911 myrp = MyRP()
913 queryparser.add_rangeprocessor(myrp)
914 query = queryparser.parse_query('5..8')
916 expect(str(query),
917 'Query(0 * VALUE_RANGE 7 A5 B8)')
919 def test_queryparser_custom_rp_deallocation():
920 """Test that QueryParser doesn't delete RangeProcessors too soon.
923 class MyRP(xapian.RangeProcessor):
924 def __init__(self):
925 xapian.RangeProcessor.__init__(self)
927 def __call__(self, begin, end):
928 begin = "A" + begin
929 end = "B" + end
930 return xapian.Query(xapian.Query.OP_VALUE_RANGE, 7, begin, end)
932 def make_parser():
933 queryparser = xapian.QueryParser()
934 myrp = MyRP()
935 queryparser.add_rangeprocessor(myrp)
936 return queryparser
938 queryparser = make_parser()
939 query = queryparser.parse_query('5..8')
941 expect(str(query),
942 'Query(0 * VALUE_RANGE 7 A5 B8)')
944 def test_scale_weight():
945 """Test query OP_SCALE_WEIGHT feature.
948 db = setup_database()
949 for mult in (0, 1, 2.5):
950 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
951 mult)
952 query1 = xapian.Query("it")
953 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
955 enquire = xapian.Enquire(db)
956 enquire.set_query(query1)
957 mset1 = enquire.get_mset(0, 10)
958 enquire.set_query(query2)
959 mset2 = enquire.get_mset(0, 10)
960 if mult <= 0:
961 expected = [(0, item.docid) for item in mset1]
962 expected.sort()
963 else:
964 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
965 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
967 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
968 query1 = xapian.Query("it")
969 expect_exception(xapian.InvalidArgumentError,
970 "OP_SCALE_WEIGHT requires factor >= 0",
971 xapian.Query,
972 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
975 def test_weight_normalise():
976 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
978 This test first runs a search (asking for no results) to get the maximum
979 possible weight for a query, and then checks that the results of
980 MSet.get_max_possible() match this.
982 This tests that the get_max_possible() value is correct (though it isn't
983 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
984 compensate correctly.
987 db = setup_database()
988 for query in (
989 "it",
990 "was",
991 "it was",
992 "it was four",
993 "it was four five",
994 "\"was it warm\" four notpresent",
995 "notpresent",
997 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
998 qp = xapian.QueryParser()
999 query1 = qp.parse_query(query)
1000 enquire = xapian.Enquire(db)
1001 enquire.set_query(query1)
1002 mset1 = enquire.get_mset(0, 0)
1004 # Check the max_attained value is 0 - this gives us some reassurance
1005 # that the match didn't actually do the work of calculating any
1006 # results.
1007 expect(mset1.get_max_attained(), 0)
1009 max_possible = mset1.get_max_possible()
1010 if query == "notpresent":
1011 expect(max_possible, 0)
1012 continue
1013 mult = 1.0 / max_possible
1014 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
1016 enquire = xapian.Enquire(db)
1017 enquire.set_query(query2)
1018 mset2 = enquire.get_mset(0, 10)
1019 # max_possible should be 1 (excluding rounding errors) for mset2
1020 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
1021 for item in mset2:
1022 expect(item.weight > 0, True)
1023 expect(item.weight <= 1, True)
1026 def test_valuesetmatchdecider():
1027 """Simple tests of the ValueSetMatchDecider class
1030 md = xapian.ValueSetMatchDecider(0, True)
1031 doc = xapian.Document()
1032 expect(md(doc), False)
1034 md.add_value('foo')
1035 doc.add_value(0, 'foo')
1036 expect(md(doc), True)
1038 md.remove_value('foo')
1039 expect(md(doc), False)
1041 md = xapian.ValueSetMatchDecider(0, False)
1042 expect(md(doc), True)
1044 md.add_value('foo')
1045 expect(md(doc), False)
1048 def test_postingsource():
1049 """Simple test of the PostingSource class.
1052 class OddPostingSource(xapian.PostingSource):
1053 def __init__(self, max):
1054 xapian.PostingSource.__init__(self)
1055 self.max = max
1057 def init(self, db):
1058 self.current = -1
1059 self.weight = db.get_doccount() + 1
1060 self.set_maxweight(self.weight)
1062 def get_termfreq_min(self): return 0
1063 def get_termfreq_est(self): return int(self.max / 2)
1064 def get_termfreq_max(self): return self.max
1065 def next(self, minweight):
1066 self.current += 2
1067 self.weight -= 1.0;
1068 self.set_maxweight(self.weight)
1069 def at_end(self): return self.current > self.max
1070 def get_docid(self): return self.current
1071 def get_weight(self): return self.weight
1073 dbpath = 'db_test_postingsource'
1074 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1075 for id in range(10):
1076 doc = xapian.Document()
1077 db.add_document(doc)
1079 # Do a dance to check that the posting source doesn't get dereferenced too
1080 # soon in various cases.
1081 def mkenq(db):
1082 # First - check that it's kept when the source goes out of scope.
1083 def mkquery():
1084 source = OddPostingSource(10)
1085 # The posting source is inside a list to check that case is
1086 # correctly handled.
1087 return xapian.Query(xapian.Query.OP_OR,
1088 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1090 # Check that it's kept when the query goes out of scope.
1091 def submkenq():
1092 query = mkquery()
1093 enquire = xapian.Enquire(db)
1094 enquire.set_query(query)
1095 return enquire
1097 # Check it's kept when the query is retrieved from enquire and put into
1098 # a new enquire.
1099 def submkenq2():
1100 enq1 = submkenq()
1101 enquire = xapian.Enquire(db)
1102 enquire.set_query(enq1.get_query())
1103 return enquire
1105 return submkenq2()
1107 enquire = mkenq(db)
1108 mset = enquire.get_mset(0, 10)
1110 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1111 expect(mset[0].weight, db.get_doccount())
1113 db.close()
1114 expect(xapian.Database.check(dbpath), 0);
1115 shutil.rmtree(dbpath)
1117 def test_postingsource2():
1118 """Simple test of the PostingSource class.
1121 dbpath = 'db_test_postingsource2'
1122 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1123 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1124 for id in range(10):
1125 doc = xapian.Document()
1126 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1127 db.add_document(doc)
1129 source = xapian.ValueWeightPostingSource(1)
1130 query = xapian.Query(source)
1131 del source # Check that query keeps a reference to it.
1133 enquire = xapian.Enquire(db)
1134 enquire.set_query(query)
1135 mset = enquire.get_mset(0, 10)
1137 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1139 db.close()
1140 expect(xapian.Database.check(dbpath), 0);
1141 shutil.rmtree(dbpath)
1143 def test_postingsource3():
1144 """Test that ValuePostingSource can be usefully subclassed.
1147 dbpath = 'db_test_postingsource3'
1148 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1149 vals = (1, 3, 2, 4)
1150 for wt in vals:
1151 doc = xapian.Document()
1152 doc.add_value(1, xapian.sortable_serialise(wt))
1153 db.add_document(doc)
1155 class PyValuePostingSource(xapian.ValuePostingSource):
1156 def __init__(self, slot):
1157 xapian.ValuePostingSource.__init__(self, slot)
1159 def init(self, db):
1160 xapian.ValuePostingSource.init(self, db)
1161 self.current = -1
1162 slot = self.get_slot()
1163 ub = db.get_value_upper_bound(slot)
1164 self.set_maxweight(xapian.sortable_unserialise(ub) ** 3)
1166 def next(self, minweight):
1167 return xapian.ValuePostingSource.next(self, minweight)
1168 def get_weight(self):
1169 value = self.get_value()
1170 return xapian.sortable_unserialise(value) ** 3
1172 source = PyValuePostingSource(1)
1173 query = xapian.Query(source)
1174 #del source # Check that query keeps a reference to it.
1176 enquire = xapian.Enquire(db)
1177 enquire.set_query(query)
1178 mset = enquire.get_mset(0, 10)
1180 expect([item.docid for item in mset], [4, 2, 3, 1])
1182 db.close()
1183 expect(xapian.Database.check(dbpath), 0);
1184 shutil.rmtree(dbpath)
1186 def test_value_stats():
1187 """Simple test of being able to get value statistics.
1190 dbpath = 'db_test_value_stats'
1191 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1193 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1194 for id in range(10):
1195 doc = xapian.Document()
1196 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1197 db.add_document(doc)
1199 expect(db.get_value_freq(0), 0)
1200 expect(db.get_value_lower_bound(0), "")
1201 expect(db.get_value_upper_bound(0), "")
1202 expect(db.get_value_freq(1), 10)
1203 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1204 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1205 expect(db.get_value_freq(2), 0)
1206 expect(db.get_value_lower_bound(2), "")
1207 expect(db.get_value_upper_bound(2), "")
1209 db.close()
1210 expect(xapian.Database.check(dbpath), 0);
1211 shutil.rmtree(dbpath)
1213 def test_get_uuid():
1214 """Test getting UUIDs from databases.
1217 dbpath = 'db_test_get_uuid'
1218 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1219 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1220 dbr1 = xapian.Database(dbpath + "1")
1221 dbr2 = xapian.Database(dbpath + "2")
1222 expect(db1.get_uuid() != db2.get_uuid(), True)
1223 expect(db1.get_uuid(), dbr1.get_uuid())
1224 expect(db2.get_uuid(), dbr2.get_uuid())
1226 db = xapian.Database()
1227 db.add_database(db1)
1228 expect(db1.get_uuid(), db.get_uuid())
1230 db1.close()
1231 db2.close()
1232 dbr1.close()
1233 dbr2.close()
1234 db.close()
1235 shutil.rmtree(dbpath + "1")
1236 shutil.rmtree(dbpath + "2")
1238 def test_director_exception():
1239 """Test handling of an exception raised in a director.
1242 db = setup_database()
1243 query = xapian.Query('it')
1244 enq = xapian.Enquire(db)
1245 enq.set_query(query)
1246 class TestException(Exception):
1247 def __init__(self, a, b):
1248 Exception.__init__(self, a + b)
1250 rset = xapian.RSet()
1251 rset.add_document(1)
1252 class EDecider(xapian.ExpandDecider):
1253 def __call__(self, term):
1254 raise TestException("foo", "bar")
1255 edecider = EDecider()
1256 expect_exception(TestException, "foobar", edecider, "foo")
1257 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1259 class MDecider(xapian.MatchDecider):
1260 def __call__(self, doc):
1261 raise TestException("foo", "bar")
1262 mdecider = MDecider()
1263 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1264 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1266 def check_vals(db, vals):
1267 """Check that the values in slot 1 are as in vals.
1270 for docid in xrange(1, db.get_lastdocid() + 1):
1271 val = db.get_document(docid).get_value(1)
1272 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1274 def test_value_mods():
1275 """Test handling of modifications to values.
1278 dbpath = 'db_test_value_mods'
1279 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1280 random.seed(42)
1281 doccount = 1000
1282 vals = {}
1284 # Add a value to all the documents
1285 for num in xrange(1, doccount):
1286 doc=xapian.Document()
1287 val = 'val%d' % num
1288 doc.add_value(1, val)
1289 db.add_document(doc)
1290 vals[num] = val
1291 db.commit()
1292 check_vals(db, vals)
1294 # Modify one of the values (this is a regression test which failed with the
1295 # initial implementation of streaming values).
1296 doc = xapian.Document()
1297 val = 'newval0'
1298 doc.add_value(1, val)
1299 db.replace_document(2, doc)
1300 vals[2] = val
1301 db.commit()
1302 check_vals(db, vals)
1304 # Do some random modifications.
1305 for count in xrange(1, doccount * 2):
1306 docid = random.randint(1, doccount)
1307 doc = xapian.Document()
1309 if count % 5 == 0:
1310 val = ''
1311 else:
1312 val = 'newval%d' % count
1313 doc.add_value(1, val)
1314 db.replace_document(docid, doc)
1315 vals[docid] = val
1317 # Check the values before and after modification.
1318 check_vals(db, vals)
1319 db.commit()
1320 check_vals(db, vals)
1322 # Delete all the values which are non-empty, in a random order.
1323 keys = [key for key, val in vals.iteritems() if val != '']
1324 random.shuffle(keys)
1325 for key in keys:
1326 doc = xapian.Document()
1327 db.replace_document(key, doc)
1328 vals[key] = ''
1329 check_vals(db, vals)
1330 db.commit()
1331 check_vals(db, vals)
1333 db.close()
1334 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1335 shutil.rmtree(dbpath)
1337 def test_serialise_document():
1338 """Test serialisation of documents.
1341 doc = xapian.Document()
1342 doc.add_term('foo', 2)
1343 doc.add_value(1, 'bar')
1344 doc.set_data('baz')
1345 s = doc.serialise()
1346 doc2 = xapian.Document.unserialise(s)
1347 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1348 expect(len(list(doc.termlist())), 1)
1349 expect([(item.term, item.wdf) for item in doc.termlist()],
1350 [(item.term, item.wdf) for item in doc2.termlist()])
1351 expect([(item.num, item.value) for item in doc.values()],
1352 [(item.num, item.value) for item in doc2.values()])
1353 expect(doc.get_data(), doc2.get_data())
1354 expect(doc.get_data(), 'baz')
1356 db = setup_database()
1357 doc = db.get_document(1)
1358 s = doc.serialise()
1359 doc2 = xapian.Document.unserialise(s)
1360 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1361 expect(len(list(doc.termlist())), 3)
1362 expect([(item.term, item.wdf) for item in doc.termlist()],
1363 [(item.term, item.wdf) for item in doc2.termlist()])
1364 expect([(item.num, item.value) for item in doc.values()],
1365 [(item.num, item.value) for item in doc2.values()])
1366 expect(doc.get_data(), doc2.get_data())
1367 expect(doc.get_data(), 'is it cold?')
1369 def test_serialise_query():
1370 """Test serialisation of queries.
1373 q = xapian.Query()
1374 q2 = xapian.Query.unserialise(q.serialise())
1375 expect(str(q), str(q2))
1376 expect(str(q), 'Query()')
1378 q = xapian.Query('hello')
1379 q2 = xapian.Query.unserialise(q.serialise())
1380 expect(str(q), str(q2))
1381 expect(str(q), 'Query(hello)')
1383 q = xapian.Query(xapian.Query.OP_OR, ('hello', 'world'))
1384 q2 = xapian.Query.unserialise(q.serialise())
1385 expect(str(q), str(q2))
1386 expect(str(q), 'Query((hello OR world))')
1388 def test_preserve_query_parser_stopper():
1389 """Test preservation of stopper set on query parser.
1392 def make_qp():
1393 queryparser = xapian.QueryParser()
1394 stopper = xapian.SimpleStopper()
1395 stopper.add('to')
1396 stopper.add('not')
1397 queryparser.set_stopper(stopper)
1398 del stopper
1399 return queryparser
1400 queryparser = make_qp()
1401 query = queryparser.parse_query('to be')
1402 expect([term for term in queryparser.stoplist()], ['to'])
1404 def test_preserve_term_generator_stopper():
1405 """Test preservation of stopper set on term generator.
1408 def make_tg():
1409 termgen = xapian.TermGenerator()
1410 termgen.set_stemmer(xapian.Stem('en'))
1411 stopper = xapian.SimpleStopper()
1412 stopper.add('to')
1413 stopper.add('not')
1414 termgen.set_stopper(stopper)
1415 del stopper
1416 return termgen
1417 termgen = make_tg()
1419 termgen.index_text('to be')
1420 doc = termgen.get_document()
1421 terms = [term.term for term in doc.termlist()]
1422 terms.sort()
1423 expect(terms, ['Zbe', 'be', 'to'])
1425 def test_preserve_enquire_sorter():
1426 """Test preservation of sorter set on enquire.
1429 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1430 doc = xapian.Document()
1431 doc.add_term('foo')
1432 doc.add_value(1, '1')
1433 db.add_document(doc)
1434 db.add_document(doc)
1436 def make_enq1(db):
1437 enq = xapian.Enquire(db)
1438 sorter = xapian.MultiValueKeyMaker()
1439 enq.set_sort_by_key(sorter, False)
1440 del sorter
1441 return enq
1442 enq = make_enq1(db)
1443 enq.set_query(xapian.Query('foo'))
1444 enq.get_mset(0, 10)
1446 def make_enq2(db):
1447 enq = xapian.Enquire(db)
1448 sorter = xapian.MultiValueKeyMaker()
1449 enq.set_sort_by_key_then_relevance(sorter, False)
1450 del sorter
1451 return enq
1452 enq = make_enq2(db)
1453 enq.set_query(xapian.Query('foo'))
1454 enq.get_mset(0, 10)
1456 def make_enq3(db):
1457 enq = xapian.Enquire(db)
1458 sorter = xapian.MultiValueKeyMaker()
1459 enq.set_sort_by_relevance_then_key(sorter, False)
1460 del sorter
1461 return enq
1462 enq = make_enq3(db)
1463 enq.set_query(xapian.Query('foo'))
1464 enq.get_mset(0, 10)
1466 def test_matchspy():
1467 """Test use of matchspies.
1470 db = setup_database()
1471 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1472 enq = xapian.Enquire(db)
1473 enq.set_query(query)
1475 def set_matchspy_deref(enq):
1476 """Set a matchspy, and then drop the reference, to check that it
1477 doesn't get deleted too soon.
1479 spy = xapian.ValueCountMatchSpy(0)
1480 enq.add_matchspy(spy)
1481 del spy
1482 set_matchspy_deref(enq)
1483 mset = enq.get_mset(0, 10)
1484 expect(len(mset), 5)
1486 spy = xapian.ValueCountMatchSpy(0)
1487 enq.add_matchspy(spy)
1488 # Regression test for clear_matchspies() - used to always raise an
1489 # exception due to a copy and paste error in its definition.
1490 enq.clear_matchspies()
1491 mset = enq.get_mset(0, 10)
1492 expect([item for item in spy.values()], [])
1494 enq.add_matchspy(spy)
1495 mset = enq.get_mset(0, 10)
1496 expect(spy.get_total(), 5)
1497 expect([(item.term, item.termfreq) for item in spy.values()], [
1498 (xapian.sortable_serialise(1.5), 1),
1499 (xapian.sortable_serialise(2), 2),
1501 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1502 (xapian.sortable_serialise(2), 2),
1503 (xapian.sortable_serialise(1.5), 1),
1506 def test_import_star():
1507 """Test that "from xapian import *" works.
1509 This is a regression test - this failed in the 1.2.0 release.
1510 It's not normally good style to use it, but it should work anyway!
1513 import test_xapian_star
1515 def test_latlongcoords_iter():
1516 """Test LatLongCoordsIterator wrapping.
1519 coords = xapian.LatLongCoords()
1520 expect([c for c in coords], [])
1521 coords.append(xapian.LatLongCoord(0, 0))
1522 coords.append(xapian.LatLongCoord(0, 1))
1523 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1524 'Xapian::LatLongCoord(0, 1)'])
1527 def test_compactor():
1528 """Test that xapian.Compactor works.
1531 tmpdir = tempfile.mkdtemp()
1532 db1 = db2 = db3 = None
1533 try:
1534 db1path = os.path.join(tmpdir, 'db1')
1535 db2path = os.path.join(tmpdir, 'db2')
1536 db3path = os.path.join(tmpdir, 'db3')
1538 # Set up a couple of sample input databases
1539 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1540 doc1 = xapian.Document()
1541 doc1.add_term('Hello')
1542 doc1.add_term('Hello1')
1543 doc1.add_value(0, 'Val1')
1544 db1.set_metadata('key', '1')
1545 db1.set_metadata('key1', '1')
1546 db1.add_document(doc1)
1547 db1.commit()
1549 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1550 doc2 = xapian.Document()
1551 doc2.add_term('Hello')
1552 doc2.add_term('Hello2')
1553 doc2.add_value(0, 'Val2')
1554 db2.set_metadata('key', '2')
1555 db2.set_metadata('key2', '2')
1556 db2.add_document(doc2)
1557 db2.commit()
1559 # Compact with the default compactor
1560 # Metadata conflicts are resolved by picking the first value
1561 c = xapian.Compactor()
1562 c.add_source(db1path)
1563 c.add_source(db2path)
1564 c.set_destdir(db3path)
1565 c.compact()
1567 db3 = xapian.Database(db3path)
1568 expect([(item.term, item.termfreq) for item in db3.allterms()],
1569 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1570 expect(db3.get_document(1).get_value(0), 'Val1')
1571 expect(db3.get_document(2).get_value(0), 'Val2')
1572 expect(db3.get_metadata('key'), '1')
1573 expect(db3.get_metadata('key1'), '1')
1574 expect(db3.get_metadata('key2'), '2')
1576 context("testing a custom compactor which merges duplicate metadata")
1577 class MyCompactor(xapian.Compactor):
1578 def __init__(self):
1579 xapian.Compactor.__init__(self)
1580 self.log = []
1582 def set_status(self, table, status):
1583 if len(status) == 0:
1584 self.log.append('Starting %s' % table)
1585 else:
1586 self.log.append('%s: %s' % (table, status))
1588 def resolve_duplicate_metadata(self, key, vals):
1589 return ','.join(vals)
1591 c = MyCompactor()
1592 c.add_source(db1path)
1593 c.add_source(db2path)
1594 c.set_destdir(db3path)
1595 c.compact()
1596 log = '\n'.join(c.log)
1597 # Check we got some messages in the log
1598 expect('Starting postlist' in log, True)
1600 db3 = xapian.Database(db3path)
1601 expect([(item.term, item.termfreq) for item in db3.allterms()],
1602 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1603 expect(db3.get_metadata('key'), '1,2')
1604 expect(db3.get_metadata('key1'), '1')
1605 expect(db3.get_metadata('key2'), '2')
1607 finally:
1608 if db1 is not None:
1609 db1.close()
1610 if db2 is not None:
1611 db2.close()
1612 if db3 is not None:
1613 db3.close()
1615 shutil.rmtree(tmpdir)
1617 def test_leak_mset_items():
1618 """Test that items property of MSet doesn't leak
1621 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1622 doc = xapian.Document()
1623 doc.add_term('drip')
1624 db.add_document(doc)
1625 enq = xapian.Enquire(db)
1626 enq.set_query(xapian.Query('drip'))
1627 mset = enq.get_mset(0, 10)
1629 # Prior to 1.2.4 this next line leaked an object.
1630 mset.items
1632 def test_custom_matchspy():
1633 class MSpy(xapian.MatchSpy):
1634 def __init__(self):
1635 xapian.MatchSpy.__init__(self)
1636 self.count = 0
1638 def __call__(self, doc, weight):
1639 self.count += 1
1641 mspy = MSpy()
1643 db = setup_database()
1644 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1646 enquire = xapian.Enquire(db)
1647 enquire.add_matchspy(mspy)
1648 enquire.set_query(query)
1649 mset = enquire.get_mset(0, 1)
1650 expect(len(mset), 1)
1651 expect(mspy.count >= 1, True)
1653 expect(db.get_doccount(), 5)
1655 def test_removed_features():
1656 ok = True
1657 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
1658 doc = xapian.Document()
1659 enq = xapian.Enquire(db)
1660 eset = xapian.ESet()
1661 mset = xapian.MSet()
1662 query = xapian.Query()
1663 qp = xapian.QueryParser()
1664 titer = xapian._TermIterator()
1665 postiter = xapian._PostingIterator()
1667 def check_missing(obj, attr):
1668 expect_exception(AttributeError, None, getattr, obj, attr)
1670 check_missing(xapian, 'Stem_get_available_languages')
1671 check_missing(xapian, 'TermIterator')
1672 check_missing(xapian, 'PositionIterator')
1673 check_missing(xapian, 'PostingIterator')
1674 check_missing(xapian, 'ValueIterator')
1675 check_missing(xapian, 'MSetIterator')
1676 check_missing(xapian, 'ESetIterator')
1677 check_missing(db, 'allterms_begin')
1678 check_missing(db, 'allterms_end')
1679 check_missing(db, 'metadata_keys_begin')
1680 check_missing(db, 'metadata_keys_end')
1681 check_missing(db, 'synonym_keys_begin')
1682 check_missing(db, 'synonym_keys_end')
1683 check_missing(db, 'synonyms_begin')
1684 check_missing(db, 'synonyms_end')
1685 check_missing(db, 'spellings_begin')
1686 check_missing(db, 'spellings_end')
1687 check_missing(db, 'positionlist_begin')
1688 check_missing(db, 'positionlist_end')
1689 check_missing(db, 'postlist_begin')
1690 check_missing(db, 'postlist_end')
1691 check_missing(db, 'termlist_begin')
1692 check_missing(db, 'termlist_end')
1693 check_missing(doc, 'termlist_begin')
1694 check_missing(doc, 'termlist_end')
1695 check_missing(doc, 'values_begin')
1696 check_missing(doc, 'values_end')
1697 check_missing(enq, 'get_matching_terms_begin')
1698 check_missing(enq, 'get_matching_terms_end')
1699 check_missing(eset, 'begin')
1700 check_missing(eset, 'end')
1701 check_missing(mset, 'begin')
1702 check_missing(mset, 'end')
1703 check_missing(postiter, 'positionlist_begin')
1704 check_missing(postiter, 'positionlist_end')
1705 check_missing(query, 'get_terms_begin')
1706 check_missing(query, 'get_terms_end')
1707 check_missing(qp, 'stoplist_begin')
1708 check_missing(qp, 'stoplist_end')
1709 check_missing(qp, 'unstem_begin')
1710 check_missing(qp, 'unstem_end')
1711 check_missing(titer, 'positionlist_begin')
1712 check_missing(titer, 'positionlist_end')
1714 def test_repr():
1715 # repr() returned None in 1.4.0.
1716 expect(repr(xapian.Query('foo')) is None, False)
1717 expect(repr(xapian.AssertionError('foo')) is None, False)
1718 expect(repr(xapian.InvalidArgumentError('foo')) is None, False)
1719 expect(repr(xapian.InvalidOperationError('foo')) is None, False)
1720 expect(repr(xapian.UnimplementedError('foo')) is None, False)
1721 expect(repr(xapian.DatabaseError('foo')) is None, False)
1722 expect(repr(xapian.DatabaseCorruptError('foo')) is None, False)
1723 expect(repr(xapian.DatabaseCreateError('foo')) is None, False)
1724 expect(repr(xapian.DatabaseLockError('foo')) is None, False)
1725 expect(repr(xapian.DatabaseModifiedError('foo')) is None, False)
1726 expect(repr(xapian.DatabaseOpeningError('foo')) is None, False)
1727 expect(repr(xapian.DatabaseVersionError('foo')) is None, False)
1728 expect(repr(xapian.DocNotFoundError('foo')) is None, False)
1729 expect(repr(xapian.FeatureUnavailableError('foo')) is None, False)
1730 expect(repr(xapian.InternalError('foo')) is None, False)
1731 expect(repr(xapian.NetworkError('foo')) is None, False)
1732 expect(repr(xapian.NetworkTimeoutError('foo')) is None, False)
1733 expect(repr(xapian.QueryParserError('foo')) is None, False)
1734 expect(repr(xapian.SerialisationError('foo')) is None, False)
1735 expect(repr(xapian.RangeError('foo')) is None, False)
1736 expect(repr(xapian.WildcardError('foo')) is None, False)
1737 expect(repr(xapian.Document()) is None, False)
1738 expect(repr(xapian.Registry()) is None, False)
1739 expect(repr(xapian.Query()) is None, False)
1740 expect(repr(xapian.Stem('en')) is None, False)
1741 expect(repr(xapian.TermGenerator()) is None, False)
1742 expect(repr(xapian.MSet()) is None, False)
1743 expect(repr(xapian.ESet()) is None, False)
1744 expect(repr(xapian.RSet()) is None, False)
1745 expect(repr(xapian.MultiValueKeyMaker()) is None, False)
1746 expect(repr(xapian.SimpleStopper()) is None, False)
1747 expect(repr(xapian.RangeProcessor()) is None, False)
1748 expect(repr(xapian.DateRangeProcessor(1)) is None, False)
1749 expect(repr(xapian.NumberRangeProcessor(1)) is None, False)
1750 expect(repr(xapian.StringValueRangeProcessor(1)) is None, False)
1751 expect(repr(xapian.DateValueRangeProcessor(1)) is None, False)
1752 expect(repr(xapian.NumberValueRangeProcessor(1)) is None, False)
1753 expect(repr(xapian.QueryParser()) is None, False)
1754 expect(repr(xapian.BoolWeight()) is None, False)
1755 expect(repr(xapian.TfIdfWeight()) is None, False)
1756 expect(repr(xapian.BM25Weight()) is None, False)
1757 expect(repr(xapian.BM25PlusWeight()) is None, False)
1758 expect(repr(xapian.TradWeight()) is None, False)
1759 expect(repr(xapian.InL2Weight()) is None, False)
1760 expect(repr(xapian.IfB2Weight()) is None, False)
1761 expect(repr(xapian.IneB2Weight()) is None, False)
1762 expect(repr(xapian.BB2Weight()) is None, False)
1763 expect(repr(xapian.DLHWeight()) is None, False)
1764 expect(repr(xapian.PL2Weight()) is None, False)
1765 expect(repr(xapian.PL2PlusWeight()) is None, False)
1766 expect(repr(xapian.DPHWeight()) is None, False)
1767 expect(repr(xapian.LMWeight()) is None, False)
1768 expect(repr(xapian.CoordWeight()) is None, False)
1769 expect(repr(xapian.Compactor()) is None, False)
1770 expect(repr(xapian.ValuePostingSource(1)) is None, False)
1771 expect(repr(xapian.ValueWeightPostingSource(1)) is None, False)
1772 expect(repr(xapian.DecreasingValueWeightPostingSource(1)) is None, False)
1773 expect(repr(xapian.ValueMapPostingSource(1)) is None, False)
1774 expect(repr(xapian.FixedWeightPostingSource(1)) is None, False)
1775 expect(repr(xapian.ValueCountMatchSpy(1)) is None, False)
1776 expect(repr(xapian.LatLongCoord()) is None, False)
1777 expect(repr(xapian.LatLongCoords()) is None, False)
1778 expect(repr(xapian.GreatCircleMetric()) is None, False)
1779 expect(repr(xapian.Database()) is None, False)
1780 expect(repr(xapian.WritableDatabase()) is None, False)
1782 result = True
1784 # Run all tests (ie, callables with names starting "test_").
1785 def run():
1786 global result
1787 if not runtests(globals(), sys.argv[1:]):
1788 result = False
1790 print "Running tests without threads"
1791 run()
1793 if have_threads:
1794 print "Running tests with threads"
1796 # This testcase seems to just block when run in a thread, so just remove
1797 # it before running tests in a thread.
1798 del test_import_star
1800 t = threading.Thread(name='test runner', target=run)
1801 t.start()
1802 # Block until the thread has completed so the thread gets a chance to exit
1803 # with error status.
1804 t.join()
1806 if not result:
1807 sys.exit(1)
1809 # vim:syntax=python:set expandtab: