Cache compiled regexps used in $transform
[xapian.git] / xapian-bindings / python / pythontest.py
blobb2ba0d9f53fb07ddd77df453300db9e6de62869d
1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 # USA
22 import os
23 import random
24 import shutil
25 import sys
26 import tempfile
27 import xapian
29 try:
30 import threading
31 have_threads = True
32 except ImportError:
33 have_threads = False
35 from testsuite import *
37 def setup_database():
38 """Set up and return an inmemory database with 5 documents.
40 """
41 db = xapian.inmemory_open()
43 doc = xapian.Document()
44 doc.set_data("is it cold?")
45 doc.add_term("is")
46 doc.add_posting("it", 1)
47 doc.add_posting("cold", 2)
48 db.add_document(doc)
50 doc = xapian.Document()
51 doc.set_data("was it warm?")
52 doc.add_posting("was", 1)
53 doc.add_posting("it", 2)
54 doc.add_posting("warm", 3)
55 db.add_document(doc)
56 doc.set_data("was it warm? two")
57 doc.add_term("two", 2)
58 doc.add_value(0, xapian.sortable_serialise(2))
59 db.add_document(doc)
60 doc.set_data("was it warm? three")
61 doc.add_term("three", 3)
62 doc.add_value(0, xapian.sortable_serialise(1.5))
63 db.add_document(doc)
64 doc.set_data("was it warm? four it")
65 doc.add_term("four", 4)
66 doc.add_term("it", 6)
67 doc.add_posting("it", 7)
68 doc.add_value(5, 'five')
69 doc.add_value(9, 'nine')
70 doc.add_value(0, xapian.sortable_serialise(2))
71 db.add_document(doc)
73 expect(db.get_doccount(), 5)
74 return db
76 def test_exception_base():
77 """Check that xapian exceptions have Exception as a base class.
79 """
80 try:
81 raise xapian.InvalidOperationError("Test exception")
82 except Exception, e:
83 pass
85 def test_mset_iter():
86 """Test iterators over MSets.
88 """
89 db = setup_database()
90 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
92 enquire = xapian.Enquire(db)
93 enquire.set_query(query)
94 mset = enquire.get_mset(0, 10)
95 items = [item for item in mset]
96 expect(len(items), 5)
97 expect(len(mset), len(items), "Expected number of items to be length of mset")
99 context("testing returned item from mset")
100 expect(items[2].docid, 4)
101 expect(items[2].rank, 2)
102 expect(items[2].percent, 86)
103 expect(items[2].collapse_key, '')
104 expect(items[2].collapse_count, 0)
105 expect(items[2].document.get_data(), 'was it warm? three')
107 # Test coverage for mset.items
108 mset_items = mset.items
109 expect(len(mset), len(mset_items), "Expected number of items to be length of mset")
111 context("testing mset_items[2]")
112 expect(mset_items[2][xapian.MSET_DID], 4)
113 expect(mset_items[2][xapian.MSET_WT] > 0.0, True)
114 expect(mset_items[2][xapian.MSET_RANK], 2)
115 expect(mset_items[2][xapian.MSET_PERCENT], 86)
116 # MSET_DOCUMENT is documented but not implemented! FIXME: resolve this -
117 # if it has never worked, we may just want to remove the documentation for
118 # it.
119 #expect(mset_items[2][xapian.MSET_DOCUMENT].get_data(), 'was it warm? three')
121 # Check iterators for sub-msets against the whole mset.
122 for start in range(0, 6):
123 for maxitems in range(0, 6):
124 context("checking iterators for sub-mset from %d, maxitems %d" % (start, maxitems))
125 submset = enquire.get_mset(start, maxitems)
126 num = 0
127 for item in submset:
128 context("testing hit %d for sub-mset from %d, maxitems %d" % (num, start, maxitems))
129 expect(item.rank, num + start)
131 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num, start, maxitems))
132 hit = submset.get_hit(num)
133 expect(hit.docid, item.docid)
134 expect(hit.rank, item.rank)
135 expect(hit.percent, item.percent)
136 expect(hit.document.get_data(), item.document.get_data())
137 expect(hit.collapse_key, item.collapse_key)
138 expect(hit.collapse_count, item.collapse_count)
140 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num, start, maxitems))
141 hit = mset.get_hit(num + start)
142 expect(hit.docid, item.docid)
143 expect(hit.rank, item.rank)
144 expect(hit.percent, item.percent)
145 expect(hit.document.get_data(), item.document.get_data())
146 expect(hit.collapse_key, item.collapse_key)
147 expect(hit.collapse_count, item.collapse_count)
149 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num, start, maxitems))
150 expect(submset[num].docid, item.docid)
151 expect(submset[num].rank, item.rank)
152 expect(submset[num].percent, item.percent)
153 expect(submset[num].document.get_data(), item.document.get_data())
154 expect(submset[num].collapse_key, item.collapse_key)
155 expect(submset[num].collapse_count, item.collapse_count)
157 num += 1
159 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start, maxitems))
160 # Test out-of-range access to mset:
161 expect_exception(IndexError, 'Mset index out of range',
162 submset.__getitem__, -10)
163 expect_exception(IndexError, 'Mset index out of range',
164 submset.__getitem__, 10)
165 expect_exception(IndexError, 'Mset index out of range',
166 submset.__getitem__, -1-len(submset))
167 expect_exception(IndexError, 'Mset index out of range',
168 submset.__getitem__, len(submset))
170 # Check that the item contents remain valid when the iterator has
171 # moved on.
172 saved_items = [item for item in submset]
173 for num in range(len(saved_items)):
174 item = saved_items[num]
175 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num, start, maxitems))
176 expect(submset[num].docid, item.docid)
177 expect(submset[num].rank, item.rank)
178 expect(submset[num].percent, item.percent)
179 expect(submset[num].document.get_data(), item.document.get_data())
180 expect(submset[num].collapse_key, item.collapse_key)
181 expect(submset[num].collapse_count, item.collapse_count)
183 # Check that the right number of items exist in the mset.
184 context("checking length of sub-mset from %d, maxitems %d" % (start, maxitems))
185 items = [item for item in submset]
186 expect(len(items), min(maxitems, 5 - start))
187 expect(len(submset), min(maxitems, 5 - start))
189 def test_eset_iter():
190 """Test iterators over ESets.
193 db = setup_database()
194 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
195 rset = xapian.RSet()
196 rset.add_document(3)
198 context("getting eset items without a query")
199 enquire = xapian.Enquire(db)
200 eset = enquire.get_eset(10, rset)
201 items = [item for item in eset]
202 expect(len(items), 3)
203 expect(len(items), len(eset))
205 context("getting eset items with a query")
206 enquire = xapian.Enquire(db)
207 enquire.set_query(query)
208 eset = enquire.get_eset(10, rset)
209 items2 = [item for item in eset]
210 expect(len(items2), 2)
211 expect(len(items2), len(eset))
213 context("comparing eset items with a query to those without")
214 expect(items2[0].term, items[0].term)
215 expect(items2[1].term, items[2].term)
217 context("comparing eset weights with a query to those without")
218 expect(items2[0].weight, items[0].weight)
219 expect(items2[1].weight, items[2].weight)
221 def test_matchingterms_iter():
222 """Test Enquire.matching_terms iterator.
225 db = setup_database()
226 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
228 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
230 enquire = xapian.Enquire(db)
231 enquire.set_query(query)
232 mset = enquire.get_mset(0, 10)
234 for item in mset:
235 # Make a list of the term names
236 mterms = [term for term in enquire.matching_terms(item.docid)]
237 mterms2 = [term for term in enquire.matching_terms(item)]
238 expect(mterms, mterms2)
240 mterms = [term for term in enquire.matching_terms(mset.get_hit(0))]
241 expect(mterms, ['it', 'two', 'warm', 'was'])
243 def test_queryterms_iter():
244 """Test Query term iterator.
247 db = setup_database()
248 query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))
250 # Make a list of the term names
251 terms = [term for term in query]
252 expect(terms, ['it', 'two', 'warm', 'was'])
254 def test_queryparser_stoplist_iter():
255 """Test QueryParser stoplist iterator.
258 stemmer = xapian.Stem('en')
260 # Check behaviour without having set a stoplist.
261 queryparser = xapian.QueryParser()
262 queryparser.set_stemmer(stemmer)
263 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
264 expect([term for term in queryparser.stoplist()], [])
265 query = queryparser.parse_query('to be or not to be is the questions')
266 expect([term for term in queryparser.stoplist()], [])
267 expect(str(query),
268 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
269 'Zis@7 OR Zthe@8 OR Zquestion@9))')
271 # Check behaviour with a stoplist, but no stemmer
272 queryparser = xapian.QueryParser()
273 stopper = xapian.SimpleStopper()
274 stopper.add('to')
275 stopper.add('not')
276 stopper.add('question')
277 queryparser.set_stopper(stopper)
278 expect([term for term in queryparser.stoplist()], [])
279 query = queryparser.parse_query('to be or not to be is the questions')
281 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
282 expect(str(query),
283 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
285 # Check behaviour with a stoplist and a stemmer
286 queryparser.set_stemmer(stemmer)
287 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
288 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to']) # Shouldn't have changed since previous query.
289 query = queryparser.parse_query('to be or not to be is the questions')
291 expect([term for term in queryparser.stoplist()], ['to', 'not', 'to'])
292 expect(str(query),
293 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
295 def test_queryparser_unstem_iter():
296 """Test QueryParser unstemlist iterator.
299 stemmer = xapian.Stem('en')
301 queryparser = xapian.QueryParser()
302 expect([term for term in queryparser.unstemlist('to')], [])
303 expect([term for term in queryparser.unstemlist('question')], [])
304 expect([term for term in queryparser.unstemlist('questions')], [])
305 query = queryparser.parse_query('to question questions')
307 expect([term for term in queryparser.unstemlist('to')], ['to'])
308 expect([term for term in queryparser.unstemlist('question')], ['question'])
309 expect([term for term in queryparser.unstemlist('questions')], ['questions'])
310 expect(str(query),
311 'Query((to@1 OR question@2 OR questions@3))')
314 queryparser = xapian.QueryParser()
315 queryparser.set_stemmer(stemmer)
316 queryparser.set_stemming_strategy(queryparser.STEM_SOME)
317 expect([term for term in queryparser.unstemlist('Zto')], [])
318 expect([term for term in queryparser.unstemlist('Zquestion')], [])
319 expect([term for term in queryparser.unstemlist('Zquestions')], [])
320 query = queryparser.parse_query('to question questions')
322 expect([term for term in queryparser.unstemlist('Zto')], ['to'])
323 expect([term for term in queryparser.unstemlist('Zquestion')], ['question', 'questions'])
324 expect([term for term in queryparser.unstemlist('Zquestions')], [])
325 expect(str(query),
326 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
328 def test_allterms_iter():
329 """Test all-terms iterator on Database.
332 db = setup_database()
334 context("making a list of the term names and frequencies")
335 terms = []
336 freqs = []
337 for termitem in db:
338 terms.append(termitem.term)
339 expect_exception(xapian.InvalidOperationError, 'Iterator does not support wdfs', getattr, termitem, 'wdf')
340 freqs.append(termitem.termfreq)
341 expect_exception(xapian.InvalidOperationError, 'Iterator does not support position lists', getattr, termitem, 'positer')
343 context("checking that items are no longer valid once the iterator has moved on");
344 termitems = [termitem for termitem in db]
346 expect(len(termitems), len(terms))
347 for i in range(len(termitems)):
348 expect(termitems[i].term, terms[i])
350 expect(len(termitems), len(freqs))
351 for termitem in termitems:
352 expect_exception(xapian.InvalidOperationError, 'Iterator has moved, and does not support random access', getattr, termitem, 'termfreq')
354 context("checking that restricting the terms iterated with a prefix works")
355 prefix_terms = []
356 prefix_freqs = []
357 for i in range(len(terms)):
358 if terms[i][0] == 't':
359 prefix_terms.append(terms[i])
360 prefix_freqs.append(freqs[i])
361 i = 0
362 for termitem in db.allterms('t'):
363 expect(termitem.term, prefix_terms[i])
364 expect(termitem.termfreq, prefix_freqs[i])
365 i += 1
366 expect(len(prefix_terms), i)
368 def test_termlist_iter():
369 """Test termlist iterator on Database.
372 db = setup_database()
374 # Make lists of the item contents
375 terms = []
376 wdfs = []
377 freqs = []
378 positers = []
379 for termitem in db.termlist(3):
380 terms.append(termitem.term)
381 wdfs.append(termitem.wdf)
382 freqs.append(termitem.termfreq)
383 positers.append([pos for pos in termitem.positer])
385 expect(terms, ['it', 'two', 'warm', 'was'])
386 expect(wdfs, [1, 2, 1, 1])
387 expect(freqs, [5, 3, 4, 4])
388 expect(positers, [[2], [], [3], [1]])
390 # Test skip_to().
391 tliter = db.termlist(3)
393 # skip to an item before the first item.
394 termitem = tliter.skip_to('a')
395 expect((termitem.term, termitem.wdf, termitem.termfreq,
396 [pos for pos in termitem.positer]), ('it', 1, 5, [2]))
398 # skip forwards to an item.
399 termitem = tliter.skip_to('two')
400 expect((termitem.term, termitem.wdf, termitem.termfreq,
401 [pos for pos in termitem.positer]), ('two', 2, 3, []))
403 # skip to same place (should return same item)
404 termitem = tliter.skip_to('two')
405 expect((termitem.term, termitem.wdf, termitem.termfreq,
406 [pos for pos in termitem.positer]), ('two', 2, 3, []))
408 # next() after a skip_to(), should return next item.
409 termitem = next(tliter)
410 expect((termitem.term, termitem.wdf, termitem.termfreq,
411 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
413 # skip to same place (should return same item)
414 termitem = tliter.skip_to('warm')
415 expect((termitem.term, termitem.wdf, termitem.termfreq,
416 [pos for pos in termitem.positer]), ('warm', 1, 4, [3]))
418 # skip backwards (should return same item)
419 termitem = tliter.skip_to('a')
421 # skip to after end.
422 expect_exception(StopIteration, '', tliter.skip_to, 'zoo')
423 # skip backwards (should still return StopIteration).
424 expect_exception(StopIteration, '', tliter.skip_to, 'a')
425 # next should continue to return StopIteration.
426 expect_exception(StopIteration, '', next, tliter)
429 # Make a list of the terms (so we can test if they're still valid
430 # once the iterator has moved on).
431 termitems = [termitem for termitem in db.termlist(3)]
433 expect(len(termitems), len(terms))
434 for i in range(len(termitems)):
435 expect(termitems[i].term, terms[i])
437 expect(len(termitems), len(wdfs))
438 for i in range(len(termitems)):
439 expect(termitems[i].wdf, wdfs[i])
441 expect(len(termitems), len(freqs))
442 for termitem in termitems:
443 expect_exception(xapian.InvalidOperationError,
444 'Iterator has moved, and does not support random access',
445 getattr, termitem, 'termfreq')
447 expect(len(termitems), len(freqs))
448 for termitem in termitems:
449 expect_exception(xapian.InvalidOperationError,
450 'Iterator has moved, and does not support random access',
451 getattr, termitem, 'positer')
453 def test_dbdocument_iter():
454 """Test document terms iterator for document taken from a database.
457 db = setup_database()
459 doc = db.get_document(3)
461 # Make lists of the item contents
462 terms = []
463 wdfs = []
464 freqs = []
465 positers = []
466 for termitem in doc:
467 terms.append(termitem.term)
468 wdfs.append(termitem.wdf)
469 freqs.append(termitem.termfreq)
470 positers.append([pos for pos in termitem.positer])
472 expect(terms, ['it', 'two', 'warm', 'was'])
473 expect(wdfs, [1, 2, 1, 1])
474 expect(freqs, [5, 3, 4, 4])
475 expect(positers, [[2], [], [3], [1]])
477 # Make a list of the terms (so we can test if they're still valid
478 # once the iterator has moved on).
479 termitems = [termitem for termitem in doc]
481 expect(len(termitems), len(terms))
482 for i in range(len(termitems)):
483 expect(termitems[i].term, terms[i])
485 expect(len(termitems), len(wdfs))
486 for i in range(len(termitems)):
487 expect(termitems[i].wdf, wdfs[i])
489 expect(len(termitems), len(freqs))
490 for termitem in termitems:
491 expect_exception(xapian.InvalidOperationError,
492 'Iterator has moved, and does not support random access',
493 getattr, termitem, 'termfreq')
495 expect(len(termitems), len(freqs))
496 for termitem in termitems:
497 expect_exception(xapian.InvalidOperationError,
498 'Iterator has moved, and does not support random access',
499 getattr, termitem, 'positer')
501 def test_newdocument_iter():
502 """Test document terms iterator for newly created document.
505 doc = xapian.Document()
506 doc.set_data("was it warm? two")
507 doc.add_posting("was", 1)
508 doc.add_posting("it", 2)
509 doc.add_posting("warm", 3)
510 doc.add_term("two", 2)
512 # Make lists of the item contents
513 terms = []
514 wdfs = []
515 positers = []
516 for termitem in doc:
517 terms.append(termitem.term)
518 wdfs.append(termitem.wdf)
519 expect_exception(xapian.InvalidOperationError,
520 "Can't get term frequency from a document termlist "
521 "which is not associated with a database.",
522 getattr, termitem, 'termfreq')
523 positers.append([pos for pos in termitem.positer])
525 expect(terms, ['it', 'two', 'warm', 'was'])
526 expect(wdfs, [1, 2, 1, 1])
527 expect(positers, [[2], [], [3], [1]])
529 # Make a list of the terms (so we can test if they're still valid
530 # once the iterator has moved on).
531 termitems = [termitem for termitem in doc]
533 expect(len(termitems), len(terms))
534 for i in range(len(termitems)):
535 expect(termitems[i].term, terms[i])
537 expect(len(termitems), len(wdfs))
538 for i in range(len(termitems)):
539 expect(termitems[i].wdf, wdfs[i])
541 for termitem in termitems:
542 expect_exception(xapian.InvalidOperationError,
543 'Iterator has moved, and does not support random access',
544 getattr, termitem, 'termfreq')
546 expect(len(termitems), len(positers))
547 for termitem in termitems:
548 expect_exception(xapian.InvalidOperationError,
549 'Iterator has moved, and does not support random access',
550 getattr, termitem, 'positer')
552 def test_postinglist_iter():
553 """Test postinglist iterator on Database.
556 db = setup_database()
558 # Make lists of the item contents
559 docids = []
560 doclengths = []
561 wdfs = []
562 positers = []
563 for posting in db.postlist('it'):
564 docids.append(posting.docid)
565 doclengths.append(posting.doclength)
566 wdfs.append(posting.wdf)
567 positers.append([pos for pos in posting.positer])
569 expect(docids, [1, 2, 3, 4, 5])
570 expect(doclengths, [3, 3, 5, 8, 19])
571 expect(wdfs, [1, 1, 1, 1, 8])
572 expect(positers, [[1], [2], [2], [2], [2, 7]])
574 # Test skip_to().
575 pliter = db.postlist('it')
577 # skip to an item before the first item.
578 posting = pliter.skip_to(0)
579 expect((posting.docid, posting.doclength, posting.wdf,
580 [pos for pos in posting.positer]), (1, 3, 1, [1]))
582 # skip forwards to an item.
583 posting = pliter.skip_to(3)
584 expect((posting.docid, posting.doclength, posting.wdf,
585 [pos for pos in posting.positer]), (3, 5, 1, [2]))
587 # skip to same place (should return same item)
588 posting = pliter.skip_to(3)
589 expect((posting.docid, posting.doclength, posting.wdf,
590 [pos for pos in posting.positer]), (3, 5, 1, [2]))
592 # next() after a skip_to(), should return next item.
593 posting = next(pliter)
594 expect((posting.docid, posting.doclength, posting.wdf,
595 [pos for pos in posting.positer]), (4, 8, 1, [2]))
597 # skip to same place (should return same item)
598 posting = pliter.skip_to(4)
599 expect((posting.docid, posting.doclength, posting.wdf,
600 [pos for pos in posting.positer]), (4, 8, 1, [2]))
602 # skip backwards (should return same item)
603 posting = pliter.skip_to(2)
604 expect((posting.docid, posting.doclength, posting.wdf,
605 [pos for pos in posting.positer]), (4, 8, 1, [2]))
607 # skip to after end.
608 expect_exception(StopIteration, '', pliter.skip_to, 6)
609 # skip backwards (should still return StopIteration).
610 expect_exception(StopIteration, '', pliter.skip_to, 6)
611 # next should continue to return StopIteration.
612 expect_exception(StopIteration, '', next, pliter)
615 # Make a list of the postings (so we can test if they're still valid once
616 # the iterator has moved on).
617 postings = [posting for posting in db.postlist('it')]
619 expect(len(postings), len(docids))
620 for i in range(len(postings)):
621 expect(postings[i].docid, docids[i])
623 expect(len(postings), len(doclengths))
624 for i in range(len(postings)):
625 expect(postings[i].doclength, doclengths[i])
627 expect(len(postings), len(wdfs))
628 for i in range(len(postings)):
629 expect(postings[i].wdf, wdfs[i])
631 expect(len(postings), len(positers))
632 for posting in postings:
633 expect_exception(xapian.InvalidOperationError,
634 'Iterator has moved, and does not support random access',
635 getattr, posting, 'positer')
637 def test_valuestream_iter():
638 """Test a valuestream iterator on Database.
641 db = setup_database()
643 # Check basic iteration
644 expect([(item.docid, item.value) for item in db.valuestream(0)],
645 [(3, '\xa4'), (4, '\xa2'), (5, '\xa4')])
646 expect([(item.docid, item.value) for item in db.valuestream(1)], [])
647 expect([(item.docid, item.value) for item in db.valuestream(5)],
648 [(5, "five")])
649 expect([(item.docid, item.value) for item in db.valuestream(9)],
650 [(5, "nine")])
652 # Test skip_to() on iterator with no values, and behaviours when called
653 # after already returning StopIteration.
654 i = db.valuestream(1)
655 expect_exception(StopIteration, "", i.skip_to, 1)
656 expect_exception(StopIteration, "", i.skip_to, 1)
657 i = db.valuestream(1)
658 expect_exception(StopIteration, "", i.skip_to, 1)
659 expect_exception(StopIteration, "", i.next)
660 i = db.valuestream(1)
661 expect_exception(StopIteration, "", i.next)
662 expect_exception(StopIteration, "", i.skip_to, 1)
664 # Test that skipping to a value works, and that skipping doesn't have to
665 # advance.
666 i = db.valuestream(0)
667 item = i.skip_to(4)
668 expect((item.docid, item.value), (4, '\xa2'))
669 item = i.skip_to(4)
670 expect((item.docid, item.value), (4, '\xa2'))
671 item = i.skip_to(1)
672 expect((item.docid, item.value), (4, '\xa2'))
673 item = i.skip_to(5)
674 expect((item.docid, item.value), (5, '\xa4'))
675 expect_exception(StopIteration, "", i.skip_to, 6)
677 # Test that alternating skip_to() and next() works.
678 i = db.valuestream(0)
679 item = i.next()
680 expect((item.docid, item.value), (3, '\xa4'))
681 item = i.skip_to(4)
682 expect((item.docid, item.value), (4, '\xa2'))
683 item = i.next()
684 expect((item.docid, item.value), (5, '\xa4'))
685 expect_exception(StopIteration, "", i.skip_to, 6)
687 # Test that next works correctly after skip_to() called with an earlier
688 # item.
689 i = db.valuestream(0)
690 item = i.skip_to(4)
691 expect((item.docid, item.value), (4, '\xa2'))
692 item = i.skip_to(1)
693 expect((item.docid, item.value), (4, '\xa2'))
694 item = i.next()
695 expect((item.docid, item.value), (5, '\xa4'))
697 # Test that next works correctly after skipping to last item
698 i = db.valuestream(0)
699 item = i.skip_to(5)
700 expect((item.docid, item.value), (5, '\xa4'))
701 expect_exception(StopIteration, "", i.next)
703 def test_position_iter():
704 """Test position iterator for a document in a database.
707 db = setup_database()
709 doc = db.get_document(5)
711 # Make lists of the item contents
712 positions = [position for position in db.positionlist(5, 'it')]
714 expect(positions, [2, 7])
716 def test_value_iter():
717 """Test iterators over list of values in a document.
720 db = setup_database()
721 doc = db.get_document(5)
723 items = list(doc.values())
724 expect(len(items), 3)
725 expect(items[0].num, 0)
726 expect(items[0].value, xapian.sortable_serialise(2))
727 expect(items[1].num, 5)
728 expect(items[1].value, 'five')
729 expect(items[2].num, 9)
730 expect(items[2].value, 'nine')
732 def test_synonyms_iter():
733 """Test iterators over list of synonyms in a database.
736 dbpath = 'db_test_synonyms_iter'
737 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
739 db.add_synonym('hello', 'hi')
740 db.add_synonym('hello', 'howdy')
742 expect([item for item in db.synonyms('foo')], [])
743 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
744 expect([item for item in db.synonym_keys()], ['hello'])
745 expect([item for item in db.synonym_keys('foo')], [])
746 expect([item for item in db.synonym_keys('he')], ['hello'])
747 expect([item for item in db.synonym_keys('hello')], ['hello'])
749 dbr=xapian.Database(dbpath)
750 expect([item for item in dbr.synonyms('foo')], [])
751 expect([item for item in dbr.synonyms('hello')], [])
752 expect([item for item in dbr.synonym_keys()], [])
753 expect([item for item in dbr.synonym_keys('foo')], [])
754 expect([item for item in dbr.synonym_keys('he')], [])
755 expect([item for item in dbr.synonym_keys('hello')], [])
757 db.commit()
759 expect([item for item in db.synonyms('foo')], [])
760 expect([item for item in db.synonyms('hello')], ['hi', 'howdy'])
761 expect([item for item in db.synonym_keys()], ['hello'])
762 expect([item for item in db.synonym_keys('foo')], [])
763 expect([item for item in db.synonym_keys('he')], ['hello'])
764 expect([item for item in db.synonym_keys('hello')], ['hello'])
766 dbr=xapian.Database(dbpath)
767 expect([item for item in dbr.synonyms('foo')] , [])
768 expect([item for item in dbr.synonyms('hello')], ['hi', 'howdy'])
769 expect([item for item in dbr.synonym_keys()], ['hello'])
770 expect([item for item in dbr.synonym_keys('foo')], [])
771 expect([item for item in dbr.synonym_keys('he')], ['hello'])
772 expect([item for item in dbr.synonym_keys('hello')], ['hello'])
774 db.close()
775 expect(xapian.Database.check(dbpath), 0);
776 dbr.close()
777 shutil.rmtree(dbpath)
779 def test_metadata_keys_iter():
780 """Test iterators over list of metadata keys in a database.
783 dbpath = 'db_test_metadata_iter'
784 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
786 db.set_metadata('author', 'richard')
787 db.set_metadata('item1', 'hello')
788 db.set_metadata('item1', 'hi')
789 db.set_metadata('item2', 'howdy')
790 db.set_metadata('item3', '')
791 db.set_metadata('item4', 'goodbye')
792 db.set_metadata('item4', '')
793 db.set_metadata('type', 'greeting')
795 expect([item for item in db.metadata_keys()],
796 ['author', 'item1', 'item2', 'type'])
797 expect([item for item in db.metadata_keys('foo')], [])
798 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
799 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
800 expect([item for item in db.metadata_keys('type')], ['type'])
802 dbr=xapian.Database(dbpath)
803 expect([item for item in dbr.metadata_keys()], [])
804 expect([item for item in dbr.metadata_keys('foo')], [])
805 expect([item for item in dbr.metadata_keys('item')], [])
806 expect([item for item in dbr.metadata_keys('it')], [])
807 expect([item for item in dbr.metadata_keys('type')], [])
809 db.commit()
810 expect([item for item in db.metadata_keys()],
811 ['author', 'item1', 'item2', 'type'])
812 expect([item for item in db.metadata_keys('foo')], [])
813 expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
814 expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
815 expect([item for item in db.metadata_keys('type')], ['type'])
817 dbr=xapian.Database(dbpath)
818 expect([item for item in dbr.metadata_keys()],
819 ['author', 'item1', 'item2', 'type'])
820 expect([item for item in dbr.metadata_keys('foo')], [])
821 expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2'])
822 expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2'])
823 expect([item for item in dbr.metadata_keys('type')], ['type'])
825 db.close()
826 expect(xapian.Database.check(dbpath), 0);
827 dbr.close()
828 shutil.rmtree(dbpath)
830 def test_spell():
831 """Test basic spelling correction features.
834 dbpath = 'db_test_spell'
835 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
837 db.add_spelling('hello')
838 db.add_spelling('mell', 2)
839 expect(db.get_spelling_suggestion('hell'), 'mell')
840 expect([(item.term, item.termfreq) for item in db.spellings()], [('hello', 1), ('mell', 2)])
841 dbr=xapian.Database(dbpath)
842 expect(dbr.get_spelling_suggestion('hell'), '')
843 expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
844 db.commit()
845 dbr=xapian.Database(dbpath)
846 expect(db.get_spelling_suggestion('hell'), 'mell')
847 expect(dbr.get_spelling_suggestion('hell'), 'mell')
848 expect([(item.term, item.termfreq) for item in dbr.spellings()], [('hello', 1), ('mell', 2)])
850 db.close()
851 dbr.close()
852 shutil.rmtree(dbpath)
854 def test_queryparser_custom_vrp():
855 """Test QueryParser with a custom (in python) ValueRangeProcessor.
858 class MyVRP(xapian.ValueRangeProcessor):
859 def __init__(self):
860 xapian.ValueRangeProcessor.__init__(self)
862 def __call__(self, begin, end):
863 return (7, "A"+begin, "B"+end)
865 queryparser = xapian.QueryParser()
866 myvrp = MyVRP()
868 queryparser.add_valuerangeprocessor(myvrp)
869 query = queryparser.parse_query('5..8')
871 expect(str(query),
872 'Query(0 * VALUE_RANGE 7 A5 B8)')
874 def test_queryparser_custom_vrp_deallocation():
875 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
878 class MyVRP(xapian.ValueRangeProcessor):
879 def __init__(self):
880 xapian.ValueRangeProcessor.__init__(self)
882 def __call__(self, begin, end):
883 return (7, "A"+begin, "B"+end)
885 def make_parser():
886 queryparser = xapian.QueryParser()
887 myvrp = MyVRP()
888 queryparser.add_valuerangeprocessor(myvrp)
889 return queryparser
891 queryparser = make_parser()
892 query = queryparser.parse_query('5..8')
894 expect(str(query),
895 'Query(0 * VALUE_RANGE 7 A5 B8)')
897 def test_scale_weight():
898 """Test query OP_SCALE_WEIGHT feature.
901 db = setup_database()
902 for mult in (0, 1, 2.5):
903 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
904 mult)
905 query1 = xapian.Query("it")
906 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
908 enquire = xapian.Enquire(db)
909 enquire.set_query(query1)
910 mset1 = enquire.get_mset(0, 10)
911 enquire.set_query(query2)
912 mset2 = enquire.get_mset(0, 10)
913 if mult <= 0:
914 expected = [(0, item.docid) for item in mset1]
915 expected.sort()
916 else:
917 expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1]
918 expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected)
920 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
921 query1 = xapian.Query("it")
922 expect_exception(xapian.InvalidArgumentError,
923 "OP_SCALE_WEIGHT requires factor >= 0",
924 xapian.Query,
925 xapian.Query.OP_SCALE_WEIGHT, query1, -1)
928 def test_weight_normalise():
929 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
931 This test first runs a search (asking for no results) to get the maximum
932 possible weight for a query, and then checks that the results of
933 MSet.get_max_possible() match this.
935 This tests that the get_max_possible() value is correct (though it isn't
936 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
937 compensate correctly.
940 db = setup_database()
941 for query in (
942 "it",
943 "was",
944 "it was",
945 "it was four",
946 "it was four five",
947 "\"was it warm\" four notpresent",
948 "notpresent",
950 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query)
951 qp = xapian.QueryParser()
952 query1 = qp.parse_query(query)
953 enquire = xapian.Enquire(db)
954 enquire.set_query(query1)
955 mset1 = enquire.get_mset(0, 0)
957 # Check the max_attained value is 0 - this gives us some reassurance
958 # that the match didn't actually do the work of calculating any
959 # results.
960 expect(mset1.get_max_attained(), 0)
962 max_possible = mset1.get_max_possible()
963 if query == "notpresent":
964 expect(max_possible, 0)
965 continue
966 mult = 1.0 / max_possible
967 query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)
969 enquire = xapian.Enquire(db)
970 enquire.set_query(query2)
971 mset2 = enquire.get_mset(0, 10)
972 # max_possible should be 1 (excluding rounding errors) for mset2
973 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
974 for item in mset2:
975 expect(item.weight > 0, True)
976 expect(item.weight <= 1, True)
979 def test_valuesetmatchdecider():
980 """Simple tests of the ValueSetMatchDecider class
983 md = xapian.ValueSetMatchDecider(0, True)
984 doc = xapian.Document()
985 expect(md(doc), False)
987 md.add_value('foo')
988 doc.add_value(0, 'foo')
989 expect(md(doc), True)
991 md.remove_value('foo')
992 expect(md(doc), False)
994 md = xapian.ValueSetMatchDecider(0, False)
995 expect(md(doc), True)
997 md.add_value('foo')
998 expect(md(doc), False)
1001 def test_postingsource():
1002 """Simple test of the PostingSource class.
1005 class OddPostingSource(xapian.PostingSource):
1006 def __init__(self, max):
1007 xapian.PostingSource.__init__(self)
1008 self.max = max
1010 def init(self, db):
1011 self.current = -1
1013 def get_termfreq_min(self): return 0
1014 def get_termfreq_est(self): return int(self.max / 2)
1015 def get_termfreq_max(self): return self.max
1016 def next(self, minweight):
1017 self.current += 2
1018 def at_end(self): return self.current > self.max
1019 def get_docid(self): return self.current
1021 dbpath = 'db_test_postingsource'
1022 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1023 for id in range(10):
1024 doc = xapian.Document()
1025 db.add_document(doc)
1027 # Do a dance to check that the posting source doesn't get dereferenced too
1028 # soon in various cases.
1029 def mkenq(db):
1030 # First - check that it's kept when the source goes out of scope.
1031 def mkquery():
1032 source = OddPostingSource(10)
1033 # The posting source is inside a list to check that case is
1034 # correctly handled.
1035 return xapian.Query(xapian.Query.OP_OR,
1036 ["terM wHich wilL NoT maTch", xapian.Query(source)])
1038 # Check that it's kept when the query goes out of scope.
1039 def submkenq():
1040 query = mkquery()
1041 enquire = xapian.Enquire(db)
1042 enquire.set_query(query)
1043 return enquire
1045 # Check it's kept when the query is retrieved from enquire and put into
1046 # a new enquire.
1047 def submkenq2():
1048 enq1 = submkenq()
1049 enquire = xapian.Enquire(db)
1050 enquire.set_query(enq1.get_query())
1051 return enquire
1053 return submkenq2()
1055 enquire = mkenq(db)
1056 mset = enquire.get_mset(0, 10)
1058 expect([item.docid for item in mset], [1, 3, 5, 7, 9])
1060 db.close()
1061 expect(xapian.Database.check(dbpath), 0);
1062 shutil.rmtree(dbpath)
1064 def test_postingsource2():
1065 """Simple test of the PostingSource class.
1068 dbpath = 'db_test_postingsource2'
1069 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1070 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1071 for id in range(10):
1072 doc = xapian.Document()
1073 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1074 db.add_document(doc)
1076 source = xapian.ValueWeightPostingSource(1)
1077 query = xapian.Query(source)
1078 del source # Check that query keeps a reference to it.
1080 enquire = xapian.Enquire(db)
1081 enquire.set_query(query)
1082 mset = enquire.get_mset(0, 10)
1084 expect([item.docid for item in mset], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1086 db.close()
1087 expect(xapian.Database.check(dbpath), 0);
1088 shutil.rmtree(dbpath)
1090 def test_value_stats():
1091 """Simple test of being able to get value statistics.
1094 dbpath = 'db_test_value_stats'
1095 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1097 vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1098 for id in range(10):
1099 doc = xapian.Document()
1100 doc.add_value(1, xapian.sortable_serialise(vals[id]))
1101 db.add_document(doc)
1103 expect(db.get_value_freq(0), 0)
1104 expect(db.get_value_lower_bound(0), "")
1105 expect(db.get_value_upper_bound(0), "")
1106 expect(db.get_value_freq(1), 10)
1107 expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
1108 expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
1109 expect(db.get_value_freq(2), 0)
1110 expect(db.get_value_lower_bound(2), "")
1111 expect(db.get_value_upper_bound(2), "")
1113 db.close()
1114 expect(xapian.Database.check(dbpath), 0);
1115 shutil.rmtree(dbpath)
1117 def test_get_uuid():
1118 """Test getting UUIDs from databases.
1121 dbpath = 'db_test_get_uuid'
1122 db1 = xapian.WritableDatabase(dbpath + "1", xapian.DB_CREATE_OR_OVERWRITE)
1123 db2 = xapian.WritableDatabase(dbpath + "2", xapian.DB_CREATE_OR_OVERWRITE)
1124 dbr1 = xapian.Database(dbpath + "1")
1125 dbr2 = xapian.Database(dbpath + "2")
1126 expect(db1.get_uuid() != db2.get_uuid(), True)
1127 expect(db1.get_uuid(), dbr1.get_uuid())
1128 expect(db2.get_uuid(), dbr2.get_uuid())
1130 db = xapian.Database()
1131 db.add_database(db1)
1132 expect(db1.get_uuid(), db.get_uuid())
1134 db1.close()
1135 db2.close()
1136 dbr1.close()
1137 dbr2.close()
1138 db.close()
1139 shutil.rmtree(dbpath + "1")
1140 shutil.rmtree(dbpath + "2")
1142 def test_director_exception():
1143 """Test handling of an exception raised in a director.
1146 db = setup_database()
1147 query = xapian.Query('it')
1148 enq = xapian.Enquire(db)
1149 enq.set_query(query)
1150 class TestException(Exception):
1151 def __init__(self, a, b):
1152 Exception.__init__(self, a + b)
1154 rset = xapian.RSet()
1155 rset.add_document(1)
1156 class EDecider(xapian.ExpandDecider):
1157 def __call__(self, term):
1158 raise TestException("foo", "bar")
1159 edecider = EDecider()
1160 expect_exception(TestException, "foobar", edecider, "foo")
1161 expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)
1163 class MDecider(xapian.MatchDecider):
1164 def __call__(self, doc):
1165 raise TestException("foo", "bar")
1166 mdecider = MDecider()
1167 expect_exception(TestException, "foobar", mdecider, xapian.Document())
1168 expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
1170 def check_vals(db, vals):
1171 """Check that the values in slot 1 are as in vals.
1174 for docid in xrange(1, db.get_lastdocid() + 1):
1175 val = db.get_document(docid).get_value(1)
1176 expect(val, vals[docid], "Expected stored value in doc %d" % docid)
1178 def test_value_mods():
1179 """Test handling of modifications to values.
1182 dbpath = 'db_test_value_mods'
1183 db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
1184 random.seed(42)
1185 doccount = 1000
1186 vals = {}
1188 # Add a value to all the documents
1189 for num in xrange(1, doccount):
1190 doc=xapian.Document()
1191 val = 'val%d' % num
1192 doc.add_value(1, val)
1193 db.add_document(doc)
1194 vals[num] = val
1195 db.commit()
1196 check_vals(db, vals)
1198 # Modify one of the values (this is a regression test which failed with the
1199 # initial implementation of streaming values).
1200 doc = xapian.Document()
1201 val = 'newval0'
1202 doc.add_value(1, val)
1203 db.replace_document(2, doc)
1204 vals[2] = val
1205 db.commit()
1206 check_vals(db, vals)
1208 # Do some random modifications.
1209 for count in xrange(1, doccount * 2):
1210 docid = random.randint(1, doccount)
1211 doc = xapian.Document()
1213 if count % 5 == 0:
1214 val = ''
1215 else:
1216 val = 'newval%d' % count
1217 doc.add_value(1, val)
1218 db.replace_document(docid, doc)
1219 vals[docid] = val
1221 # Check the values before and after modification.
1222 check_vals(db, vals)
1223 db.commit()
1224 check_vals(db, vals)
1226 # Delete all the values which are non-empty, in a random order.
1227 keys = [key for key, val in vals.iteritems() if val != '']
1228 random.shuffle(keys)
1229 for key in keys:
1230 doc = xapian.Document()
1231 db.replace_document(key, doc)
1232 vals[key] = ''
1233 check_vals(db, vals)
1234 db.commit()
1235 check_vals(db, vals)
1237 db.close()
1238 expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals)
1239 shutil.rmtree(dbpath)
1241 def test_serialise_document():
1242 """Test serialisation of documents.
1245 doc = xapian.Document()
1246 doc.add_term('foo', 2)
1247 doc.add_value(1, 'bar')
1248 doc.set_data('baz')
1249 s = doc.serialise()
1250 doc2 = xapian.Document.unserialise(s)
1251 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1252 expect(len(list(doc.termlist())), 1)
1253 expect([(item.term, item.wdf) for item in doc.termlist()],
1254 [(item.term, item.wdf) for item in doc2.termlist()])
1255 expect([(item.num, item.value) for item in doc.values()],
1256 [(item.num, item.value) for item in doc2.values()])
1257 expect(doc.get_data(), doc2.get_data())
1258 expect(doc.get_data(), 'baz')
1260 db = setup_database()
1261 doc = db.get_document(1)
1262 s = doc.serialise()
1263 doc2 = xapian.Document.unserialise(s)
1264 expect(len(list(doc.termlist())), len(list(doc2.termlist())))
1265 expect(len(list(doc.termlist())), 3)
1266 expect([(item.term, item.wdf) for item in doc.termlist()],
1267 [(item.term, item.wdf) for item in doc2.termlist()])
1268 expect([(item.num, item.value) for item in doc.values()],
1269 [(item.num, item.value) for item in doc2.values()])
1270 expect(doc.get_data(), doc2.get_data())
1271 expect(doc.get_data(), 'is it cold?')
1273 def test_serialise_query():
1274 """Test serialisation of queries.
1277 q = xapian.Query()
1278 q2 = xapian.Query.unserialise(q.serialise())
1279 expect(str(q), str(q2))
1280 expect(str(q), 'Query()')
1282 q = xapian.Query('hello')
1283 q2 = xapian.Query.unserialise(q.serialise())
1284 expect(str(q), str(q2))
1285 expect(str(q), 'Query(hello)')
1287 q = xapian.Query(xapian.Query.OP_OR, ('hello', 'world'))
1288 q2 = xapian.Query.unserialise(q.serialise())
1289 expect(str(q), str(q2))
1290 expect(str(q), 'Query((hello OR world))')
1292 def test_preserve_query_parser_stopper():
1293 """Test preservation of stopper set on query parser.
1296 def make_qp():
1297 queryparser = xapian.QueryParser()
1298 stopper = xapian.SimpleStopper()
1299 stopper.add('to')
1300 stopper.add('not')
1301 queryparser.set_stopper(stopper)
1302 del stopper
1303 return queryparser
1304 queryparser = make_qp()
1305 query = queryparser.parse_query('to be')
1306 expect([term for term in queryparser.stoplist()], ['to'])
1308 def test_preserve_term_generator_stopper():
1309 """Test preservation of stopper set on term generator.
1312 def make_tg():
1313 termgen = xapian.TermGenerator()
1314 termgen.set_stemmer(xapian.Stem('en'))
1315 stopper = xapian.SimpleStopper()
1316 stopper.add('to')
1317 stopper.add('not')
1318 termgen.set_stopper(stopper)
1319 del stopper
1320 return termgen
1321 termgen = make_tg()
1323 termgen.index_text('to be')
1324 doc = termgen.get_document()
1325 terms = [term.term for term in doc.termlist()]
1326 terms.sort()
1327 expect(terms, ['Zbe', 'be', 'to'])
1329 def test_preserve_enquire_sorter():
1330 """Test preservation of sorter set on enquire.
1333 db = xapian.inmemory_open()
1334 doc = xapian.Document()
1335 doc.add_term('foo')
1336 doc.add_value(1, '1')
1337 db.add_document(doc)
1338 db.add_document(doc)
1340 def make_enq1(db):
1341 enq = xapian.Enquire(db)
1342 sorter = xapian.MultiValueKeyMaker()
1343 enq.set_sort_by_key(sorter, False)
1344 del sorter
1345 return enq
1346 enq = make_enq1(db)
1347 enq.set_query(xapian.Query('foo'))
1348 enq.get_mset(0, 10)
1350 def make_enq2(db):
1351 enq = xapian.Enquire(db)
1352 sorter = xapian.MultiValueKeyMaker()
1353 enq.set_sort_by_key_then_relevance(sorter, False)
1354 del sorter
1355 return enq
1356 enq = make_enq2(db)
1357 enq.set_query(xapian.Query('foo'))
1358 enq.get_mset(0, 10)
1360 def make_enq3(db):
1361 enq = xapian.Enquire(db)
1362 sorter = xapian.MultiValueKeyMaker()
1363 enq.set_sort_by_relevance_then_key(sorter, False)
1364 del sorter
1365 return enq
1366 enq = make_enq3(db)
1367 enq.set_query(xapian.Query('foo'))
1368 enq.get_mset(0, 10)
1370 def test_matchspy():
1371 """Test use of matchspies.
1374 db = setup_database()
1375 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1376 enq = xapian.Enquire(db)
1377 enq.set_query(query)
1379 def set_matchspy_deref(enq):
1380 """Set a matchspy, and then drop the reference, to check that it
1381 doesn't get deleted too soon.
1383 spy = xapian.ValueCountMatchSpy(0)
1384 enq.add_matchspy(spy)
1385 del spy
1386 set_matchspy_deref(enq)
1387 mset = enq.get_mset(0, 10)
1388 expect(len(mset), 5)
1390 spy = xapian.ValueCountMatchSpy(0)
1391 enq.add_matchspy(spy)
1392 # Regression test for clear_matchspies() - used to always raise an
1393 # exception due to a copy and paste error in its definition.
1394 enq.clear_matchspies()
1395 mset = enq.get_mset(0, 10)
1396 expect([item for item in spy.values()], [])
1398 enq.add_matchspy(spy)
1399 mset = enq.get_mset(0, 10)
1400 expect(spy.get_total(), 5)
1401 expect([(item.term, item.termfreq) for item in spy.values()], [
1402 (xapian.sortable_serialise(1.5), 1),
1403 (xapian.sortable_serialise(2), 2),
1405 expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
1406 (xapian.sortable_serialise(2), 2),
1407 (xapian.sortable_serialise(1.5), 1),
1410 def test_import_star():
1411 """Test that "from xapian import *" works.
1413 This is a regression test - this failed in the 1.2.0 release.
1414 It's not normally good style to use it, but it should work anyway!
1417 import test_xapian_star
1419 def test_latlongcoords_iter():
1420 """Test LatLongCoordsIterator wrapping.
1423 coords = xapian.LatLongCoords()
1424 expect([c for c in coords], [])
1425 coords.append(xapian.LatLongCoord(0, 0))
1426 coords.append(xapian.LatLongCoord(0, 1))
1427 expect([str(c) for c in coords], ['Xapian::LatLongCoord(0, 0)',
1428 'Xapian::LatLongCoord(0, 1)'])
1431 def test_compactor():
1432 """Test that xapian.Compactor works.
1435 tmpdir = tempfile.mkdtemp()
1436 db1 = db2 = db3 = None
1437 try:
1438 db1path = os.path.join(tmpdir, 'db1')
1439 db2path = os.path.join(tmpdir, 'db2')
1440 db3path = os.path.join(tmpdir, 'db3')
1442 # Set up a couple of sample input databases
1443 db1 = xapian.WritableDatabase(db1path, xapian.DB_CREATE_OR_OVERWRITE)
1444 doc1 = xapian.Document()
1445 doc1.add_term('Hello')
1446 doc1.add_term('Hello1')
1447 doc1.add_value(0, 'Val1')
1448 db1.set_metadata('key', '1')
1449 db1.set_metadata('key1', '1')
1450 db1.add_document(doc1)
1451 db1.flush()
1453 db2 = xapian.WritableDatabase(db2path, xapian.DB_CREATE_OR_OVERWRITE)
1454 doc2 = xapian.Document()
1455 doc2.add_term('Hello')
1456 doc2.add_term('Hello2')
1457 doc2.add_value(0, 'Val2')
1458 db2.set_metadata('key', '2')
1459 db2.set_metadata('key2', '2')
1460 db2.add_document(doc2)
1461 db2.flush()
1463 # Compact with the default compactor
1464 # Metadata conflicts are resolved by picking the first value
1465 c = xapian.Compactor()
1466 c.add_source(db1path)
1467 c.add_source(db2path)
1468 c.set_destdir(db3path)
1469 c.compact()
1471 db3 = xapian.Database(db3path)
1472 expect([(item.term, item.termfreq) for item in db3.allterms()],
1473 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1474 expect(db3.get_document(1).get_value(0), 'Val1')
1475 expect(db3.get_document(2).get_value(0), 'Val2')
1476 expect(db3.get_metadata('key'), '1')
1477 expect(db3.get_metadata('key1'), '1')
1478 expect(db3.get_metadata('key2'), '2')
1480 context("testing a custom compactor which merges duplicate metadata")
1481 class MyCompactor(xapian.Compactor):
1482 def __init__(self):
1483 xapian.Compactor.__init__(self)
1484 self.log = []
1486 def set_status(self, table, status):
1487 if len(status) == 0:
1488 self.log.append('Starting %s' % table)
1489 else:
1490 self.log.append('%s: %s' % (table, status))
1492 def resolve_duplicate_metadata(self, key, vals):
1493 return ','.join(vals)
1495 c = MyCompactor()
1496 c.add_source(db1path)
1497 c.add_source(db2path)
1498 c.set_destdir(db3path)
1499 c.compact()
1500 log = '\n'.join(c.log)
1501 # Check we got some messages in the log
1502 expect('Starting postlist' in log, True)
1504 db3 = xapian.Database(db3path)
1505 expect([(item.term, item.termfreq) for item in db3.allterms()],
1506 [('Hello', 2), ('Hello1', 1), ('Hello2', 1)])
1507 expect(db3.get_metadata('key'), '1,2')
1508 expect(db3.get_metadata('key1'), '1')
1509 expect(db3.get_metadata('key2'), '2')
1511 finally:
1512 if db1 is not None:
1513 db1.close()
1514 if db2 is not None:
1515 db2.close()
1516 if db3 is not None:
1517 db3.close()
1519 shutil.rmtree(tmpdir)
1521 def test_leak_mset_items():
1522 """Test that items property of MSet doesn't leak
1525 db = xapian.inmemory_open()
1526 doc = xapian.Document()
1527 doc.add_term('drip')
1528 db.add_document(doc)
1529 enq = xapian.Enquire(db)
1530 enq.set_query(xapian.Query('drip'))
1531 mset = enq.get_mset(0, 10)
1533 # Prior to 1.2.4 this next line leaked an object.
1534 mset.items
1536 def test_custom_matchspy():
1537 class MSpy(xapian.MatchSpy):
1538 def __init__(self):
1539 xapian.MatchSpy.__init__(self)
1540 self.count = 0
1542 def __call__(self, doc, weight):
1543 self.count += 1
1545 mspy = MSpy()
1547 db = setup_database()
1548 query = xapian.Query(xapian.Query.OP_OR, "was", "it")
1550 enquire = xapian.Enquire(db)
1551 enquire.add_matchspy(mspy)
1552 enquire.set_query(query)
1553 mset = enquire.get_mset(0, 1)
1554 expect(len(mset), 1)
1555 expect(mspy.count >= 1, True)
1557 expect(db.get_doccount(), 5)
1559 def test_removed_features():
1560 ok = True
1561 db = xapian.inmemory_open()
1562 doc = xapian.Document()
1563 enq = xapian.Enquire(db)
1564 eset = xapian.ESet()
1565 mset = xapian.MSet()
1566 query = xapian.Query()
1567 qp = xapian.QueryParser()
1568 titer = xapian._TermIterator()
1569 postiter = xapian._PostingIterator()
1571 def check_missing(obj, attr):
1572 expect_exception(AttributeError, None, getattr, obj, attr)
1574 check_missing(xapian, 'Stem_get_available_languages')
1575 check_missing(xapian, 'TermIterator')
1576 check_missing(xapian, 'PositionIterator')
1577 check_missing(xapian, 'PostingIterator')
1578 check_missing(xapian, 'ValueIterator')
1579 check_missing(xapian, 'MSetIterator')
1580 check_missing(xapian, 'ESetIterator')
1581 check_missing(db, 'allterms_begin')
1582 check_missing(db, 'allterms_end')
1583 check_missing(db, 'metadata_keys_begin')
1584 check_missing(db, 'metadata_keys_end')
1585 check_missing(db, 'synonym_keys_begin')
1586 check_missing(db, 'synonym_keys_end')
1587 check_missing(db, 'synonyms_begin')
1588 check_missing(db, 'synonyms_end')
1589 check_missing(db, 'spellings_begin')
1590 check_missing(db, 'spellings_end')
1591 check_missing(db, 'positionlist_begin')
1592 check_missing(db, 'positionlist_end')
1593 check_missing(db, 'postlist_begin')
1594 check_missing(db, 'postlist_end')
1595 check_missing(db, 'termlist_begin')
1596 check_missing(db, 'termlist_end')
1597 check_missing(doc, 'termlist_begin')
1598 check_missing(doc, 'termlist_end')
1599 check_missing(doc, 'values_begin')
1600 check_missing(doc, 'values_end')
1601 check_missing(enq, 'get_matching_terms_begin')
1602 check_missing(enq, 'get_matching_terms_end')
1603 check_missing(eset, 'begin')
1604 check_missing(eset, 'end')
1605 check_missing(mset, 'begin')
1606 check_missing(mset, 'end')
1607 check_missing(postiter, 'positionlist_begin')
1608 check_missing(postiter, 'positionlist_end')
1609 check_missing(query, 'get_terms_begin')
1610 check_missing(query, 'get_terms_end')
1611 check_missing(qp, 'stoplist_begin')
1612 check_missing(qp, 'stoplist_end')
1613 check_missing(qp, 'unstem_begin')
1614 check_missing(qp, 'unstem_end')
1615 check_missing(titer, 'positionlist_begin')
1616 check_missing(titer, 'positionlist_end')
1618 result = True
1620 # Run all tests (ie, callables with names starting "test_").
1621 def run():
1622 global result
1623 if not runtests(globals(), sys.argv[1:]):
1624 result = False
1626 print "Running tests without threads"
1627 run()
1629 if have_threads:
1630 print "Running tests with threads"
1632 # This testcase seems to just block when run in a thread, so just remove
1633 # it before running tests in a thread.
1634 del test_import_star
1636 t = threading.Thread(name='test runner', target=run)
1637 t.start()
1638 # Block until the thread has completed so the thread gets a chance to exit
1639 # with error status.
1640 t.join()
1642 if not result:
1643 sys.exit(1)
1645 # vim:syntax=python:set expandtab: