1 # Tests of Python-specific parts of the xapian bindings.
3 # Copyright (C) 2007 Lemur Consulting Ltd
4 # Copyright (C) 2008,2009,2010,2011,2013,2014,2015,2016 Olly Betts
5 # Copyright (C) 2010,2011 Richard Boulton
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
35 from testsuite
import *
38 """Set up and return an inmemory database with 5 documents.
41 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
43 doc
= xapian
.Document()
44 doc
.set_data("is it cold?")
46 doc
.add_posting("it", 1)
47 doc
.add_posting("cold", 2)
50 doc
= xapian
.Document()
51 doc
.set_data("was it warm?")
52 doc
.add_posting("was", 1)
53 doc
.add_posting("it", 2)
54 doc
.add_posting("warm", 3)
56 doc
.set_data("was it warm? two")
57 doc
.add_term("two", 2)
58 doc
.add_value(0, xapian
.sortable_serialise(2))
60 doc
.set_data("was it warm? three")
61 doc
.add_term("three", 3)
62 doc
.add_value(0, xapian
.sortable_serialise(1.5))
64 doc
.set_data("was it warm? four it")
65 doc
.add_term("four", 4)
67 doc
.add_posting("it", 7)
68 doc
.add_value(5, 'five')
69 doc
.add_value(9, 'nine')
70 doc
.add_value(0, xapian
.sortable_serialise(2))
73 expect(db
.get_doccount(), 5)
75 # Test that str is rejected by sortable_unserialise().
77 xapian
.sortable_unserialise("unicode")
78 except TypeError as e
:
79 expect(str(e
), 'expected bytes, str found')
83 def test_exception_base():
84 """Check that xapian exceptions have Exception as a base class.
88 raise xapian
.InvalidOperationError("Test exception")
89 except Exception as e
:
93 """Test iterators over MSets.
97 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
99 enquire
= xapian
.Enquire(db
)
100 enquire
.set_query(query
)
101 mset
= enquire
.get_mset(0, 10)
102 items
= [item
for item
in mset
]
103 expect(len(items
), 5)
104 expect(len(mset
), len(items
), "Expected number of items to be length of mset")
106 context("testing returned item from mset")
107 expect(items
[2].docid
, 4)
108 expect(items
[2].rank
, 2)
109 expect(items
[2].percent
, 86)
110 expect(items
[2].collapse_key
, b
'')
111 expect(items
[2].collapse_count
, 0)
112 expect(items
[2].document
.get_data(), b
'was it warm? three')
114 # Check iterators for sub-msets against the whole mset.
115 for start
in range(0, 6):
116 for maxitems
in range(0, 6):
117 context("checking iterators for sub-mset from %d, maxitems %d" % (start
, maxitems
))
118 submset
= enquire
.get_mset(start
, maxitems
)
121 context("testing hit %d for sub-mset from %d, maxitems %d" % (num
, start
, maxitems
))
122 expect(item
.rank
, num
+ start
)
124 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit" % (num
, start
, maxitems
))
125 hit
= submset
.get_hit(num
)
126 expect(hit
.docid
, item
.docid
)
127 expect(hit
.rank
, item
.rank
)
128 expect(hit
.percent
, item
.percent
)
129 expect(hit
.document
.get_data(), item
.document
.get_data())
130 expect(hit
.collapse_key
, item
.collapse_key
)
131 expect(hit
.collapse_count
, item
.collapse_count
)
133 context("comparing iterator item %d for sub-mset from %d, maxitems %d against hit from whole mset" % (num
, start
, maxitems
))
134 hit
= mset
.get_hit(num
+ start
)
135 expect(hit
.docid
, item
.docid
)
136 expect(hit
.rank
, item
.rank
)
137 expect(hit
.percent
, item
.percent
)
138 expect(hit
.document
.get_data(), item
.document
.get_data())
139 expect(hit
.collapse_key
, item
.collapse_key
)
140 expect(hit
.collapse_count
, item
.collapse_count
)
142 context("comparing iterator item %d for sub-mset from %d, maxitems %d against direct access with []" % (num
, start
, maxitems
))
143 expect(submset
[num
].docid
, item
.docid
)
144 expect(submset
[num
].rank
, item
.rank
)
145 expect(submset
[num
].percent
, item
.percent
)
146 expect(submset
[num
].document
.get_data(), item
.document
.get_data())
147 expect(submset
[num
].collapse_key
, item
.collapse_key
)
148 expect(submset
[num
].collapse_count
, item
.collapse_count
)
152 context("Checking out of range access to mset, for sub-mset from %d, maxitems %d" % (start
, maxitems
))
153 # Test out-of-range access to mset:
154 expect_exception(IndexError, 'Mset index out of range',
155 submset
.__getitem
__, -10)
156 expect_exception(IndexError, 'Mset index out of range',
157 submset
.__getitem
__, 10)
158 expect_exception(IndexError, 'Mset index out of range',
159 submset
.__getitem
__, -1-len(submset
))
160 expect_exception(IndexError, 'Mset index out of range',
161 submset
.__getitem
__, len(submset
))
163 # Check that the item contents remain valid when the iterator has
165 saved_items
= [item
for item
in submset
]
166 for num
in range(len(saved_items
)):
167 item
= saved_items
[num
]
168 context("comparing iterator item %d for sub-mset mset from %d, maxitems %d against saved item" % (num
, start
, maxitems
))
169 expect(submset
[num
].docid
, item
.docid
)
170 expect(submset
[num
].rank
, item
.rank
)
171 expect(submset
[num
].percent
, item
.percent
)
172 expect(submset
[num
].document
.get_data(), item
.document
.get_data())
173 expect(submset
[num
].collapse_key
, item
.collapse_key
)
174 expect(submset
[num
].collapse_count
, item
.collapse_count
)
176 # Check that the right number of items exist in the mset.
177 context("checking length of sub-mset from %d, maxitems %d" % (start
, maxitems
))
178 items
= [item
for item
in submset
]
179 expect(len(items
), min(maxitems
, 5 - start
))
180 expect(len(submset
), min(maxitems
, 5 - start
))
182 def test_eset_iter():
183 """Test iterators over ESets.
186 db
= setup_database()
187 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
191 context("getting eset items without a query")
192 enquire
= xapian
.Enquire(db
)
193 eset
= enquire
.get_eset(10, rset
)
194 items
= [item
for item
in eset
]
195 expect(len(items
), 3)
196 expect(len(items
), len(eset
))
198 context("getting eset items with a query")
199 enquire
= xapian
.Enquire(db
)
200 enquire
.set_query(query
)
201 eset
= enquire
.get_eset(10, rset
)
202 items2
= [item
for item
in eset
]
203 expect(len(items2
), 2)
204 expect(len(items2
), len(eset
))
206 context("comparing eset items with a query to those without")
207 expect(items2
[0].term
, items
[0].term
)
208 expect(items2
[1].term
, items
[2].term
)
210 context("comparing eset weights with a query to those without")
211 expect(items2
[0].weight
, items
[0].weight
)
212 expect(items2
[1].weight
, items
[2].weight
)
214 def test_matchingterms_iter():
215 """Test Enquire.matching_terms iterator.
218 db
= setup_database()
219 query
= xapian
.Query(xapian
.Query
.OP_OR
, ("was", "it", "warm", "two"))
221 # Prior to 1.2.4 Enquire.matching_terms() leaked references to its members.
223 enquire
= xapian
.Enquire(db
)
224 enquire
.set_query(query
)
225 mset
= enquire
.get_mset(0, 10)
228 # Make a list of the term names
229 mterms
= [term
for term
in enquire
.matching_terms(item
.docid
)]
230 mterms2
= [term
for term
in enquire
.matching_terms(item
)]
231 expect(mterms
, mterms2
)
233 mterms
= [term
for term
in enquire
.matching_terms(mset
.get_hit(0))]
234 expect(mterms
, [b
'it', b
'two', b
'warm', b
'was'])
236 def test_queryterms_iter():
237 """Test Query term iterator.
240 db
= setup_database()
241 query
= xapian
.Query(xapian
.Query
.OP_OR
, ("was", "it", "warm", "two"))
243 # Make a list of the term names
244 terms
= [term
for term
in query
]
245 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
247 def test_queryparser_stoplist_iter():
248 """Test QueryParser stoplist iterator.
251 stemmer
= xapian
.Stem('en')
253 # Check behaviour without having set a stoplist.
254 queryparser
= xapian
.QueryParser()
255 queryparser
.set_stemmer(stemmer
)
256 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
257 expect([term
for term
in queryparser
.stoplist()], [])
258 query
= queryparser
.parse_query('to be or not to be is the questions')
259 expect([term
for term
in queryparser
.stoplist()], [])
261 'Query((Zto@1 OR Zbe@2 OR Zor@3 OR Znot@4 OR Zto@5 OR Zbe@6 OR '
262 'Zis@7 OR Zthe@8 OR Zquestion@9))')
264 # Check behaviour with a stoplist, but no stemmer
265 queryparser
= xapian
.QueryParser()
266 stopper
= xapian
.SimpleStopper()
269 stopper
.add('question')
270 queryparser
.set_stopper(stopper
)
271 expect([term
for term
in queryparser
.stoplist()], [])
272 query
= queryparser
.parse_query('to be or not to be is the questions')
274 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to'])
276 'Query((be@2 OR or@3 OR be@6 OR is@7 OR the@8 OR questions@9))')
278 # Check behaviour with a stoplist and a stemmer
279 queryparser
.set_stemmer(stemmer
)
280 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
281 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to']) # Shouldn't have changed since previous query.
282 query
= queryparser
.parse_query('to be or not to be is the questions')
284 expect([term
for term
in queryparser
.stoplist()], [b
'to', b
'not', b
'to'])
286 'Query((Zbe@2 OR Zor@3 OR Zbe@6 OR Zis@7 OR Zthe@8 OR Zquestion@9))')
288 def test_queryparser_unstem_iter():
289 """Test QueryParser unstemlist iterator.
292 stemmer
= xapian
.Stem('en')
294 queryparser
= xapian
.QueryParser()
295 expect([term
for term
in queryparser
.unstemlist('to')], [])
296 expect([term
for term
in queryparser
.unstemlist('question')], [])
297 expect([term
for term
in queryparser
.unstemlist('questions')], [])
298 query
= queryparser
.parse_query('to question questions')
300 expect([term
for term
in queryparser
.unstemlist('to')], [b
'to'])
301 expect([term
for term
in queryparser
.unstemlist('question')], [b
'question'])
302 expect([term
for term
in queryparser
.unstemlist('questions')], [b
'questions'])
304 'Query((to@1 OR question@2 OR questions@3))')
307 queryparser
= xapian
.QueryParser()
308 queryparser
.set_stemmer(stemmer
)
309 queryparser
.set_stemming_strategy(queryparser
.STEM_SOME
)
310 expect([term
for term
in queryparser
.unstemlist('Zto')], [])
311 expect([term
for term
in queryparser
.unstemlist('Zquestion')], [])
312 expect([term
for term
in queryparser
.unstemlist('Zquestions')], [])
313 query
= queryparser
.parse_query('to question questions')
315 expect([term
for term
in queryparser
.unstemlist('Zto')], [b
'to'])
316 expect([term
for term
in queryparser
.unstemlist('Zquestion')], [b
'question', b
'questions'])
317 expect([term
for term
in queryparser
.unstemlist('Zquestions')], [])
319 'Query((Zto@1 OR Zquestion@2 OR Zquestion@3))')
321 def test_allterms_iter():
322 """Test all-terms iterator on Database.
325 db
= setup_database()
327 context("making a list of the term names and frequencies")
331 terms
.append(termitem
.term
)
332 expect_exception(xapian
.InvalidOperationError
, 'Iterator does not support wdfs', getattr, termitem
, 'wdf')
333 freqs
.append(termitem
.termfreq
)
334 expect_exception(xapian
.InvalidOperationError
, 'Iterator does not support position lists', getattr, termitem
, 'positer')
336 context("checking that items are no longer valid once the iterator has moved on")
337 termitems
= [termitem
for termitem
in db
]
339 expect(len(termitems
), len(terms
))
340 for i
in range(len(termitems
)):
341 expect(termitems
[i
].term
, terms
[i
])
343 expect(len(termitems
), len(freqs
))
344 for termitem
in termitems
:
345 expect_exception(xapian
.InvalidOperationError
, 'Iterator has moved, and does not support random access', getattr, termitem
, 'termfreq')
347 context("checking that restricting the terms iterated with a prefix works")
350 for i
in range(len(terms
)):
351 if terms
[i
].startswith(b
't'):
352 prefix_terms
.append(terms
[i
])
353 prefix_freqs
.append(freqs
[i
])
355 for termitem
in db
.allterms('t'):
356 expect(termitem
.term
, prefix_terms
[i
])
357 expect(termitem
.termfreq
, prefix_freqs
[i
])
359 expect(len(prefix_terms
), i
)
361 def test_termlist_iter():
362 """Test termlist iterator on Database.
365 db
= setup_database()
367 # Make lists of the item contents
372 for termitem
in db
.termlist(3):
373 terms
.append(termitem
.term
)
374 wdfs
.append(termitem
.wdf
)
375 freqs
.append(termitem
.termfreq
)
376 positers
.append([pos
for pos
in termitem
.positer
])
378 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
379 expect(wdfs
, [1, 2, 1, 1])
380 expect(freqs
, [5, 3, 4, 4])
381 expect(positers
, [[2], [], [3], [1]])
384 tliter
= db
.termlist(3)
386 # skip to an item before the first item.
387 termitem
= tliter
.skip_to('a')
388 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
389 [pos
for pos
in termitem
.positer
]), (b
'it', 1, 5, [2]))
391 # skip forwards to an item.
392 termitem
= tliter
.skip_to('two')
393 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
394 [pos
for pos
in termitem
.positer
]), (b
'two', 2, 3, []))
396 # skip to same place (should return same item)
397 termitem
= tliter
.skip_to('two')
398 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
399 [pos
for pos
in termitem
.positer
]), (b
'two', 2, 3, []))
401 # next() after a skip_to(), should return next item.
402 termitem
= next(tliter
)
403 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
404 [pos
for pos
in termitem
.positer
]), (b
'warm', 1, 4, [3]))
406 # skip to same place (should return same item)
407 termitem
= tliter
.skip_to('warm')
408 expect((termitem
.term
, termitem
.wdf
, termitem
.termfreq
,
409 [pos
for pos
in termitem
.positer
]), (b
'warm', 1, 4, [3]))
411 # skip backwards (should return same item)
412 termitem
= tliter
.skip_to('a')
415 expect_exception(StopIteration, '', tliter
.skip_to
, 'zoo')
416 # skip backwards (should still return StopIteration).
417 expect_exception(StopIteration, '', tliter
.skip_to
, 'a')
418 # next should continue to return StopIteration.
419 expect_exception(StopIteration, '', next
, tliter
)
422 # Make a list of the terms (so we can test if they're still valid
423 # once the iterator has moved on).
424 termitems
= [termitem
for termitem
in db
.termlist(3)]
426 expect(len(termitems
), len(terms
))
427 for i
in range(len(termitems
)):
428 expect(termitems
[i
].term
, terms
[i
])
430 expect(len(termitems
), len(wdfs
))
431 for i
in range(len(termitems
)):
432 expect(termitems
[i
].wdf
, wdfs
[i
])
434 expect(len(termitems
), len(freqs
))
435 for termitem
in termitems
:
436 expect_exception(xapian
.InvalidOperationError
,
437 'Iterator has moved, and does not support random access',
438 getattr, termitem
, 'termfreq')
440 expect(len(termitems
), len(freqs
))
441 for termitem
in termitems
:
442 expect_exception(xapian
.InvalidOperationError
,
443 'Iterator has moved, and does not support random access',
444 getattr, termitem
, 'positer')
446 def test_dbdocument_iter():
447 """Test document terms iterator for document taken from a database.
450 db
= setup_database()
452 doc
= db
.get_document(3)
454 # Make lists of the item contents
460 terms
.append(termitem
.term
)
461 wdfs
.append(termitem
.wdf
)
462 freqs
.append(termitem
.termfreq
)
463 positers
.append([pos
for pos
in termitem
.positer
])
465 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
466 expect(wdfs
, [1, 2, 1, 1])
467 expect(freqs
, [5, 3, 4, 4])
468 expect(positers
, [[2], [], [3], [1]])
470 # Make a list of the terms (so we can test if they're still valid
471 # once the iterator has moved on).
472 termitems
= [termitem
for termitem
in doc
]
474 expect(len(termitems
), len(terms
))
475 for i
in range(len(termitems
)):
476 expect(termitems
[i
].term
, terms
[i
])
478 expect(len(termitems
), len(wdfs
))
479 for i
in range(len(termitems
)):
480 expect(termitems
[i
].wdf
, wdfs
[i
])
482 expect(len(termitems
), len(freqs
))
483 for termitem
in termitems
:
484 expect_exception(xapian
.InvalidOperationError
,
485 'Iterator has moved, and does not support random access',
486 getattr, termitem
, 'termfreq')
488 expect(len(termitems
), len(freqs
))
489 for termitem
in termitems
:
490 expect_exception(xapian
.InvalidOperationError
,
491 'Iterator has moved, and does not support random access',
492 getattr, termitem
, 'positer')
494 def test_newdocument_iter():
495 """Test document terms iterator for newly created document.
498 doc
= xapian
.Document()
499 doc
.set_data("was it warm? two")
500 doc
.add_posting("was", 1)
501 doc
.add_posting("it", 2)
502 doc
.add_posting("warm", 3)
503 doc
.add_term("two", 2)
505 # Make lists of the item contents
510 terms
.append(termitem
.term
)
511 wdfs
.append(termitem
.wdf
)
512 expect_exception(xapian
.InvalidOperationError
,
513 "get_termfreq() not valid for a TermIterator from a "
514 "Document which is not associated with a database",
515 getattr, termitem
, 'termfreq')
516 positers
.append([pos
for pos
in termitem
.positer
])
518 expect(terms
, [b
'it', b
'two', b
'warm', b
'was'])
519 expect(wdfs
, [1, 2, 1, 1])
520 expect(positers
, [[2], [], [3], [1]])
522 # Make a list of the terms (so we can test if they're still valid
523 # once the iterator has moved on).
524 termitems
= [termitem
for termitem
in doc
]
526 expect(len(termitems
), len(terms
))
527 for i
in range(len(termitems
)):
528 expect(termitems
[i
].term
, terms
[i
])
530 expect(len(termitems
), len(wdfs
))
531 for i
in range(len(termitems
)):
532 expect(termitems
[i
].wdf
, wdfs
[i
])
534 for termitem
in termitems
:
535 expect_exception(xapian
.InvalidOperationError
,
536 'Iterator has moved, and does not support random access',
537 getattr, termitem
, 'termfreq')
539 expect(len(termitems
), len(positers
))
540 for termitem
in termitems
:
541 expect_exception(xapian
.InvalidOperationError
,
542 'Iterator has moved, and does not support random access',
543 getattr, termitem
, 'positer')
545 def test_postinglist_iter():
546 """Test postinglist iterator on Database.
549 db
= setup_database()
551 # Make lists of the item contents
556 for posting
in db
.postlist('it'):
557 docids
.append(posting
.docid
)
558 doclengths
.append(posting
.doclength
)
559 wdfs
.append(posting
.wdf
)
560 positers
.append([pos
for pos
in posting
.positer
])
562 expect(docids
, [1, 2, 3, 4, 5])
563 expect(doclengths
, [3, 3, 5, 8, 19])
564 expect(wdfs
, [1, 1, 1, 1, 8])
565 expect(positers
, [[1], [2], [2], [2], [2, 7]])
568 pliter
= db
.postlist('it')
570 # skip to an item before the first item.
571 posting
= pliter
.skip_to(0)
572 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
573 [pos
for pos
in posting
.positer
]), (1, 3, 1, [1]))
575 # skip forwards to an item.
576 posting
= pliter
.skip_to(3)
577 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
578 [pos
for pos
in posting
.positer
]), (3, 5, 1, [2]))
580 # skip to same place (should return same item)
581 posting
= pliter
.skip_to(3)
582 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
583 [pos
for pos
in posting
.positer
]), (3, 5, 1, [2]))
585 # next() after a skip_to(), should return next item.
586 posting
= next(pliter
)
587 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
588 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
590 # skip to same place (should return same item)
591 posting
= pliter
.skip_to(4)
592 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
593 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
595 # skip backwards (should return same item)
596 posting
= pliter
.skip_to(2)
597 expect((posting
.docid
, posting
.doclength
, posting
.wdf
,
598 [pos
for pos
in posting
.positer
]), (4, 8, 1, [2]))
601 expect_exception(StopIteration, '', pliter
.skip_to
, 6)
602 # skip backwards (should still return StopIteration).
603 expect_exception(StopIteration, '', pliter
.skip_to
, 6)
604 # next should continue to return StopIteration.
605 expect_exception(StopIteration, '', next
, pliter
)
608 # Make a list of the postings (so we can test if they're still valid once
609 # the iterator has moved on).
610 postings
= [posting
for posting
in db
.postlist('it')]
612 expect(len(postings
), len(docids
))
613 for i
in range(len(postings
)):
614 expect(postings
[i
].docid
, docids
[i
])
616 expect(len(postings
), len(doclengths
))
617 for i
in range(len(postings
)):
618 expect(postings
[i
].doclength
, doclengths
[i
])
620 expect(len(postings
), len(wdfs
))
621 for i
in range(len(postings
)):
622 expect(postings
[i
].wdf
, wdfs
[i
])
624 expect(len(postings
), len(positers
))
625 for posting
in postings
:
626 expect_exception(xapian
.InvalidOperationError
,
627 'Iterator has moved, and does not support random access',
628 getattr, posting
, 'positer')
630 def test_valuestream_iter():
631 """Test a valuestream iterator on Database.
634 db
= setup_database()
636 # Check basic iteration
637 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(0)],
638 [(3, b
'\xa4'), (4, b
'\xa2'), (5, b
'\xa4')])
639 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(1)], [])
640 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(5)],
642 expect([(item
.docid
, item
.value
) for item
in db
.valuestream(9)],
645 # Test skip_to() on iterator with no values, and behaviours when called
646 # after already returning StopIteration.
647 i
= db
.valuestream(1)
648 expect_exception(StopIteration, "", i
.skip_to
, 1)
649 expect_exception(StopIteration, "", i
.skip_to
, 1)
650 i
= db
.valuestream(1)
651 expect_exception(StopIteration, "", i
.skip_to
, 1)
652 expect_exception(StopIteration, "", i
.__next
__)
653 i
= db
.valuestream(1)
654 expect_exception(StopIteration, "", i
.__next
__)
655 expect_exception(StopIteration, "", i
.skip_to
, 1)
657 # Test that skipping to a value works, and that skipping doesn't have to
659 i
= db
.valuestream(0)
661 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
663 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
665 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
667 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
668 expect_exception(StopIteration, "", i
.skip_to
, 6)
670 # Test that alternating skip_to() and next() works.
671 i
= db
.valuestream(0)
673 expect((item
.docid
, item
.value
), (3, b
'\xa4'))
675 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
677 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
678 expect_exception(StopIteration, "", i
.skip_to
, 6)
680 # Test that next works correctly after skip_to() called with an earlier
682 i
= db
.valuestream(0)
684 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
686 expect((item
.docid
, item
.value
), (4, b
'\xa2'))
688 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
690 # Test that next works correctly after skipping to last item
691 i
= db
.valuestream(0)
693 expect((item
.docid
, item
.value
), (5, b
'\xa4'))
694 expect_exception(StopIteration, "", i
.__next
__)
696 def test_position_iter():
697 """Test position iterator for a document in a database.
700 db
= setup_database()
702 doc
= db
.get_document(5)
704 # Make lists of the item contents
705 positions
= [position
for position
in db
.positionlist(5, 'it')]
707 expect(positions
, [2, 7])
709 def test_value_iter():
710 """Test iterators over list of values in a document.
713 db
= setup_database()
714 doc
= db
.get_document(5)
716 items
= list(doc
.values())
717 expect(len(items
), 3)
718 expect(items
[0].num
, 0)
719 expect(items
[0].value
, xapian
.sortable_serialise(2))
720 expect(items
[1].num
, 5)
721 expect(items
[1].value
, b
'five')
722 expect(items
[2].num
, 9)
723 expect(items
[2].value
, b
'nine')
725 def test_synonyms_iter():
726 """Test iterators over list of synonyms in a database.
729 dbpath
= 'db_test_synonyms_iter'
730 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
732 db
.add_synonym('hello', 'hi')
733 db
.add_synonym('hello', 'howdy')
735 expect([item
for item
in db
.synonyms('foo')], [])
736 expect([item
for item
in db
.synonyms('hello')], [b
'hi', b
'howdy'])
737 expect([item
for item
in db
.synonym_keys()], [b
'hello'])
738 expect([item
for item
in db
.synonym_keys('foo')], [])
739 expect([item
for item
in db
.synonym_keys('he')], [b
'hello'])
740 expect([item
for item
in db
.synonym_keys('hello')], [b
'hello'])
742 dbr
=xapian
.Database(dbpath
)
743 expect([item
for item
in dbr
.synonyms('foo')], [])
744 expect([item
for item
in dbr
.synonyms('hello')], [])
745 expect([item
for item
in dbr
.synonym_keys()], [])
746 expect([item
for item
in dbr
.synonym_keys('foo')], [])
747 expect([item
for item
in dbr
.synonym_keys('he')], [])
748 expect([item
for item
in dbr
.synonym_keys('hello')], [])
752 expect([item
for item
in db
.synonyms('foo')], [])
753 expect([item
for item
in db
.synonyms('hello')], [b
'hi', b
'howdy'])
754 expect([item
for item
in db
.synonym_keys()], [b
'hello'])
755 expect([item
for item
in db
.synonym_keys('foo')], [])
756 expect([item
for item
in db
.synonym_keys('he')], [b
'hello'])
757 expect([item
for item
in db
.synonym_keys('hello')], [b
'hello'])
759 dbr
=xapian
.Database(dbpath
)
760 expect([item
for item
in dbr
.synonyms('foo')] , [])
761 expect([item
for item
in dbr
.synonyms('hello')], [b
'hi', b
'howdy'])
762 expect([item
for item
in dbr
.synonym_keys()], [b
'hello'])
763 expect([item
for item
in dbr
.synonym_keys('foo')], [])
764 expect([item
for item
in dbr
.synonym_keys('he')], [b
'hello'])
765 expect([item
for item
in dbr
.synonym_keys('hello')], [b
'hello'])
768 expect(xapian
.Database
.check(dbpath
), 0)
770 shutil
.rmtree(dbpath
)
772 def test_metadata_keys_iter():
773 """Test iterators over list of metadata keys in a database.
776 dbpath
= 'db_test_metadata_iter'
777 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
779 db
.set_metadata('author', 'richard')
780 db
.set_metadata('item1', 'hello')
781 db
.set_metadata('item1', 'hi')
782 db
.set_metadata('item2', 'howdy')
783 db
.set_metadata('item3', '')
784 db
.set_metadata('item4', 'goodbye')
785 db
.set_metadata('item4', '')
786 db
.set_metadata('type', 'greeting')
788 expect([item
for item
in db
.metadata_keys()],
789 [b
'author', b
'item1', b
'item2', b
'type'])
790 expect([item
for item
in db
.metadata_keys('foo')], [])
791 expect([item
for item
in db
.metadata_keys('item')], [b
'item1', b
'item2'])
792 expect([item
for item
in db
.metadata_keys('it')], [b
'item1', b
'item2'])
793 expect([item
for item
in db
.metadata_keys('type')], [b
'type'])
795 dbr
=xapian
.Database(dbpath
)
796 expect([item
for item
in dbr
.metadata_keys()], [])
797 expect([item
for item
in dbr
.metadata_keys('foo')], [])
798 expect([item
for item
in dbr
.metadata_keys('item')], [])
799 expect([item
for item
in dbr
.metadata_keys('it')], [])
800 expect([item
for item
in dbr
.metadata_keys('type')], [])
803 expect([item
for item
in db
.metadata_keys()],
804 [b
'author', b
'item1', b
'item2', b
'type'])
805 expect([item
for item
in db
.metadata_keys('foo')], [])
806 expect([item
for item
in db
.metadata_keys('item')], [b
'item1', b
'item2'])
807 expect([item
for item
in db
.metadata_keys('it')], [b
'item1', b
'item2'])
808 expect([item
for item
in db
.metadata_keys('type')], [b
'type'])
810 dbr
=xapian
.Database(dbpath
)
811 expect([item
for item
in dbr
.metadata_keys()],
812 [b
'author', b
'item1', b
'item2', b
'type'])
813 expect([item
for item
in dbr
.metadata_keys('foo')], [])
814 expect([item
for item
in dbr
.metadata_keys('item')], [b
'item1', b
'item2'])
815 expect([item
for item
in dbr
.metadata_keys('it')], [b
'item1', b
'item2'])
816 expect([item
for item
in dbr
.metadata_keys('type')], [b
'type'])
819 expect(xapian
.Database
.check(dbpath
), 0)
821 shutil
.rmtree(dbpath
)
824 """Test basic spelling correction features.
827 dbpath
= 'db_test_spell'
828 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
830 db
.add_spelling('hello')
831 db
.add_spelling('mell', 2)
832 expect(db
.get_spelling_suggestion('hell'), b
'mell')
833 expect([(item
.term
, item
.termfreq
) for item
in db
.spellings()], [(b
'hello', 1), (b
'mell', 2)])
834 dbr
=xapian
.Database(dbpath
)
835 expect(dbr
.get_spelling_suggestion('hell'), b
'')
836 expect([(item
.term
, item
.termfreq
) for item
in dbr
.spellings()], [])
838 dbr
=xapian
.Database(dbpath
)
839 expect(db
.get_spelling_suggestion('hell'), b
'mell')
840 expect(dbr
.get_spelling_suggestion('hell'), b
'mell')
841 expect([(item
.term
, item
.termfreq
) for item
in dbr
.spellings()], [(b
'hello', 1), (b
'mell', 2)])
844 expect(xapian
.Database
.check(dbpath
), 0)
846 shutil
.rmtree(dbpath
)
848 def test_queryparser_custom_vrp():
849 """Test QueryParser with a custom (in python) ValueRangeProcessor.
852 class MyVRP(xapian
.ValueRangeProcessor
):
854 xapian
.ValueRangeProcessor
.__init
__(self
)
856 def __call__(self
, begin
, end
):
857 return (7, "A"+begin
, "B"+end
)
859 queryparser
= xapian
.QueryParser()
862 queryparser
.add_valuerangeprocessor(myvrp
)
863 query
= queryparser
.parse_query('5..8')
866 'Query(VALUE_RANGE 7 A5 B8)')
868 def test_queryparser_custom_vrp_deallocation():
869 """Test that QueryParser doesn't delete ValueRangeProcessors too soon.
872 class MyVRP(xapian
.ValueRangeProcessor
):
874 xapian
.ValueRangeProcessor
.__init
__(self
)
876 def __call__(self
, begin
, end
):
877 return (7, "A"+begin
, "B"+end
)
880 queryparser
= xapian
.QueryParser()
882 queryparser
.add_valuerangeprocessor(myvrp
)
885 queryparser
= make_parser()
886 query
= queryparser
.parse_query('5..8')
889 'Query(VALUE_RANGE 7 A5 B8)')
891 def test_queryparser_custom_rp():
892 """Test QueryParser with a custom (in python) RangeProcessor.
895 class MyRP(xapian
.RangeProcessor
):
897 xapian
.RangeProcessor
.__init
__(self
)
899 def __call__(self
, begin
, end
):
900 begin
= "A" + begin
.decode('utf-8')
901 end
= "B" + end
.decode('utf-8')
902 return xapian
.Query(xapian
.Query
.OP_VALUE_RANGE
, 7, begin
, end
)
904 queryparser
= xapian
.QueryParser()
907 queryparser
.add_rangeprocessor(myrp
)
908 query
= queryparser
.parse_query('5..8')
911 'Query(VALUE_RANGE 7 A5 B8)')
913 def test_queryparser_custom_rp_deallocation():
914 """Test that QueryParser doesn't delete RangeProcessors too soon.
917 class MyRP(xapian
.RangeProcessor
):
919 xapian
.RangeProcessor
.__init
__(self
)
921 def __call__(self
, begin
, end
):
922 begin
= "A" + begin
.decode('utf-8')
923 end
= "B" + end
.decode('utf-8')
924 return xapian
.Query(xapian
.Query
.OP_VALUE_RANGE
, 7, begin
, end
)
927 queryparser
= xapian
.QueryParser()
929 queryparser
.add_rangeprocessor(myrp
)
932 queryparser
= make_parser()
933 query
= queryparser
.parse_query('5..8')
936 'Query(VALUE_RANGE 7 A5 B8)')
938 def test_scale_weight():
939 """Test query OP_SCALE_WEIGHT feature.
942 db
= setup_database()
943 for mult
in (0, 1, 2.5):
944 context("checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
946 query1
= xapian
.Query("it")
947 query2
= xapian
.Query(xapian
.Query
.OP_SCALE_WEIGHT
, query1
, mult
)
949 enquire
= xapian
.Enquire(db
)
950 enquire
.set_query(query1
)
951 mset1
= enquire
.get_mset(0, 10)
952 enquire
.set_query(query2
)
953 mset2
= enquire
.get_mset(0, 10)
955 expected
= [(0, item
.docid
) for item
in mset1
]
958 expected
= [(int(item
.weight
* mult
* 1000000), item
.docid
) for item
in mset1
]
959 expect([(int(item
.weight
* 1000000), item
.docid
) for item
in mset2
], expected
)
961 context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
962 query1
= xapian
.Query("it")
963 expect_exception(xapian
.InvalidArgumentError
,
964 "OP_SCALE_WEIGHT requires factor >= 0",
966 xapian
.Query
.OP_SCALE_WEIGHT
, query1
, -1)
969 def test_weight_normalise():
970 """Test normalising of query weights using the OP_SCALE_WEIGHT feature.
972 This test first runs a search (asking for no results) to get the maximum
973 possible weight for a query, and then checks that the results of
974 MSet.get_max_possible() match this.
976 This tests that the get_max_possible() value is correct (though it isn't
977 guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
978 compensate correctly.
981 db
= setup_database()
988 "\"was it warm\" four notpresent",
991 context("checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query
)
992 qp
= xapian
.QueryParser()
993 query1
= qp
.parse_query(query
)
994 enquire
= xapian
.Enquire(db
)
995 enquire
.set_query(query1
)
996 mset1
= enquire
.get_mset(0, 0)
998 # Check the max_attained value is 0 - this gives us some reassurance
999 # that the match didn't actually do the work of calculating any
1001 expect(mset1
.get_max_attained(), 0)
1003 max_possible
= mset1
.get_max_possible()
1004 if query
== "notpresent":
1005 expect(max_possible
, 0)
1007 mult
= 1.0 / max_possible
1008 query2
= xapian
.Query(xapian
.Query
.OP_SCALE_WEIGHT
, query1
, mult
)
1010 enquire
= xapian
.Enquire(db
)
1011 enquire
.set_query(query2
)
1012 mset2
= enquire
.get_mset(0, 10)
1013 # max_possible should be 1 (excluding rounding errors) for mset2
1014 expect(int(mset2
.get_max_possible() * 1000000.0 + 0.5), 1000000)
1016 expect(item
.weight
> 0, True)
1017 expect(item
.weight
<= 1, True)
1020 def test_valuesetmatchdecider():
1021 """Simple tests of the ValueSetMatchDecider class
1024 md
= xapian
.ValueSetMatchDecider(0, True)
1025 doc
= xapian
.Document()
1026 expect(md(doc
), False)
1029 doc
.add_value(0, 'foo')
1030 expect(md(doc
), True)
1032 md
.remove_value('foo')
1033 expect(md(doc
), False)
1035 md
= xapian
.ValueSetMatchDecider(0, False)
1036 expect(md(doc
), True)
1039 expect(md(doc
), False)
1042 def test_postingsource():
1043 """Simple test of the PostingSource class.
1046 class OddPostingSource(xapian
.PostingSource
):
1047 def __init__(self
, max):
1048 xapian
.PostingSource
.__init
__(self
)
1053 self
.weight
= db
.get_doccount() + 1
1054 self
.set_maxweight(self
.weight
)
1056 def get_termfreq_min(self
): return 0
1057 def get_termfreq_est(self
): return int(self
.max / 2)
1058 def get_termfreq_max(self
): return self
.max
1059 def __next__(self
, minweight
):
1062 self
.set_maxweight(self
.weight
)
1063 def at_end(self
): return self
.current
> self
.max
1064 def get_docid(self
): return self
.current
1065 def get_weight(self
): return self
.weight
1067 dbpath
= 'db_test_postingsource'
1068 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1069 for id in range(10):
1070 doc
= xapian
.Document()
1071 db
.add_document(doc
)
1073 # Do a dance to check that the posting source doesn't get dereferenced too
1074 # soon in various cases.
1076 # First - check that it's kept when the source goes out of scope.
1078 source
= OddPostingSource(10)
1079 # The posting source is inside a list to check that case is
1080 # correctly handled.
1081 return xapian
.Query(xapian
.Query
.OP_OR
,
1082 ["terM wHich wilL NoT maTch", xapian
.Query(source
)])
1084 # Check that it's kept when the query goes out of scope.
1087 enquire
= xapian
.Enquire(db
)
1088 enquire
.set_query(query
)
1091 # Check it's kept when the query is retrieved from enquire and put into
1095 enquire
= xapian
.Enquire(db
)
1096 enquire
.set_query(enq1
.get_query())
1102 mset
= enquire
.get_mset(0, 10)
1104 expect([item
.docid
for item
in mset
], [1, 3, 5, 7, 9])
1105 expect(mset
[0].weight
, db
.get_doccount())
1108 expect(xapian
.Database
.check(dbpath
), 0)
1109 shutil
.rmtree(dbpath
)
1111 def test_postingsource2():
1112 """Simple test of the PostingSource class.
1115 dbpath
= 'db_test_postingsource2'
1116 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1117 vals
= (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1118 for id in range(10):
1119 doc
= xapian
.Document()
1120 doc
.add_value(1, xapian
.sortable_serialise(vals
[id]))
1121 db
.add_document(doc
)
1123 source
= xapian
.ValueWeightPostingSource(1)
1124 query
= xapian
.Query(source
)
1125 del source
# Check that query keeps a reference to it.
1127 enquire
= xapian
.Enquire(db
)
1128 enquire
.set_query(query
)
1129 mset
= enquire
.get_mset(0, 10)
1131 expect([item
.docid
for item
in mset
], [2, 1, 5, 3, 4, 8, 9, 6, 7, 10])
1134 shutil
.rmtree(dbpath
)
1136 def test_postingsource3():
1137 """Test that ValuePostingSource can be usefully subclassed.
1140 dbpath
= 'db_test_postingsource3'
1141 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1144 doc
= xapian
.Document()
1145 doc
.add_value(1, xapian
.sortable_serialise(wt
))
1146 db
.add_document(doc
)
1148 class PyValuePostingSource(xapian
.ValuePostingSource
):
1149 def __init__(self
, slot
):
1150 xapian
.ValuePostingSource
.__init
__(self
, slot
)
1153 xapian
.ValuePostingSource
.init(self
, db
)
1155 slot
= self
.get_slot()
1156 ub
= db
.get_value_upper_bound(slot
)
1157 self
.set_maxweight(xapian
.sortable_unserialise(ub
) ** 3)
1159 def next(self
, minweight
):
1160 return xapian
.ValuePostingSource
.next(self
, minweight
)
1161 def get_weight(self
):
1162 value
= self
.get_value()
1163 return xapian
.sortable_unserialise(value
) ** 3
1165 source
= PyValuePostingSource(1)
1166 query
= xapian
.Query(source
)
1167 #del source # Check that query keeps a reference to it.
1169 enquire
= xapian
.Enquire(db
)
1170 enquire
.set_query(query
)
1171 mset
= enquire
.get_mset(0, 10)
1173 expect([item
.docid
for item
in mset
], [4, 2, 3, 1])
1176 expect(xapian
.Database
.check(dbpath
), 0)
1177 shutil
.rmtree(dbpath
)
1179 def test_value_stats():
1180 """Simple test of being able to get value statistics.
1183 dbpath
= 'db_test_value_stats'
1184 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1186 vals
= (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
1187 for id in range(10):
1188 doc
= xapian
.Document()
1189 doc
.add_value(1, xapian
.sortable_serialise(vals
[id]))
1190 db
.add_document(doc
)
1192 expect(db
.get_value_freq(0), 0)
1193 expect(db
.get_value_lower_bound(0), b
"")
1194 expect(db
.get_value_upper_bound(0), b
"")
1195 expect(db
.get_value_freq(1), 10)
1196 expect(db
.get_value_lower_bound(1), xapian
.sortable_serialise(0))
1197 expect(db
.get_value_upper_bound(1), xapian
.sortable_serialise(9))
1198 expect(db
.get_value_freq(2), 0)
1199 expect(db
.get_value_lower_bound(2), b
"")
1200 expect(db
.get_value_upper_bound(2), b
"")
1203 expect(xapian
.Database
.check(dbpath
), 0)
1204 shutil
.rmtree(dbpath
)
1206 def test_get_uuid():
1207 """Test getting UUIDs from databases.
1210 dbpath
= 'db_test_get_uuid'
1211 db1
= xapian
.WritableDatabase(dbpath
+ "1", xapian
.DB_CREATE_OR_OVERWRITE
)
1212 db2
= xapian
.WritableDatabase(dbpath
+ "2", xapian
.DB_CREATE_OR_OVERWRITE
)
1213 dbr1
= xapian
.Database(dbpath
+ "1")
1214 dbr2
= xapian
.Database(dbpath
+ "2")
1215 expect(db1
.get_uuid() != db2
.get_uuid(), True)
1216 expect(db1
.get_uuid(), dbr1
.get_uuid())
1217 expect(db2
.get_uuid(), dbr2
.get_uuid())
1219 db
= xapian
.Database()
1220 db
.add_database(db1
)
1221 expect(db1
.get_uuid(), db
.get_uuid())
1228 shutil
.rmtree(dbpath
+ "1")
1229 shutil
.rmtree(dbpath
+ "2")
1231 def test_director_exception():
1232 """Test handling of an exception raised in a director.
1235 db
= setup_database()
1236 query
= xapian
.Query('it')
1237 enq
= xapian
.Enquire(db
)
1238 enq
.set_query(query
)
1239 class TestException(Exception):
1240 def __init__(self
, a
, b
):
1241 Exception.__init
__(self
, a
+ b
)
1243 rset
= xapian
.RSet()
1244 rset
.add_document(1)
1245 class EDecider(xapian
.ExpandDecider
):
1246 def __call__(self
, term
):
1247 raise TestException("foo", "bar")
1248 edecider
= EDecider()
1249 expect_exception(TestException
, "foobar", edecider
, "foo")
1250 expect_exception(TestException
, "foobar", enq
.get_eset
, 10, rset
, edecider
)
1252 class MDecider(xapian
.MatchDecider
):
1253 def __call__(self
, doc
):
1254 raise TestException("foo", "bar")
1255 mdecider
= MDecider()
1256 expect_exception(TestException
, "foobar", mdecider
, xapian
.Document())
1257 expect_exception(TestException
, "foobar", enq
.get_mset
, 0, 10, None, mdecider
)
1259 def check_vals(db
, vals
):
1260 """Check that the values in slot 1 are as in vals.
1263 for docid
in range(1, db
.get_lastdocid() + 1):
1264 val
= db
.get_document(docid
).get_value(1)
1265 expect(val
, vals
[docid
], "Expected stored value in doc %d" % docid
)
1267 def test_value_mods():
1268 """Test handling of modifications to values.
1271 dbpath
= 'db_test_value_mods'
1272 db
= xapian
.WritableDatabase(dbpath
, xapian
.DB_CREATE_OR_OVERWRITE
)
1277 # Add a value to all the documents
1278 for num
in range(1, doccount
):
1279 doc
=xapian
.Document()
1280 val
= ('val%d' % num
).encode('utf-8')
1281 doc
.add_value(1, val
)
1282 db
.add_document(doc
)
1285 check_vals(db
, vals
)
1287 # Modify one of the values (this is a regression test which failed with the
1288 # initial implementation of streaming values).
1289 doc
= xapian
.Document()
1291 doc
.add_value(1, val
)
1292 db
.replace_document(2, doc
)
1295 check_vals(db
, vals
)
1297 # Do some random modifications.
1298 for count
in range(1, doccount
* 2):
1299 docid
= random
.randint(1, doccount
)
1300 doc
= xapian
.Document()
1305 val
= ('newval%d' % count
).encode('utf-8')
1306 doc
.add_value(1, val
)
1307 db
.replace_document(docid
, doc
)
1310 # Check the values before and after modification.
1311 check_vals(db
, vals
)
1313 check_vals(db
, vals
)
1315 # Delete all the values which are non-empty, in a random order.
1316 keys
= [key
for key
, val
in vals
.items() if val
!= '']
1317 random
.shuffle(keys
)
1319 doc
= xapian
.Document()
1320 db
.replace_document(key
, doc
)
1322 check_vals(db
, vals
)
1324 check_vals(db
, vals
)
1327 expect_exception(xapian
.DatabaseError
, "Database has been closed", check_vals
, db
, vals
)
1328 shutil
.rmtree(dbpath
)
1330 def test_serialise_document():
1331 """Test serialisation of documents.
1334 doc
= xapian
.Document()
1335 doc
.add_term('foo', 2)
1336 doc
.add_value(1, b
'bar')
1339 doc2
= xapian
.Document
.unserialise(s
)
1340 expect(len(list(doc
.termlist())), len(list(doc2
.termlist())))
1341 expect(len(list(doc
.termlist())), 1)
1342 expect([(item
.term
, item
.wdf
) for item
in doc
.termlist()],
1343 [(item
.term
, item
.wdf
) for item
in doc2
.termlist()])
1344 expect([(item
.num
, item
.value
) for item
in list(doc
.values())],
1345 [(item
.num
, item
.value
) for item
in list(doc2
.values())])
1346 expect(doc
.get_data(), doc2
.get_data())
1347 expect(doc
.get_data(), b
'baz')
1349 db
= setup_database()
1350 doc
= db
.get_document(1)
1352 doc2
= xapian
.Document
.unserialise(s
)
1353 expect(len(list(doc
.termlist())), len(list(doc2
.termlist())))
1354 expect(len(list(doc
.termlist())), 3)
1355 expect([(item
.term
, item
.wdf
) for item
in doc
.termlist()],
1356 [(item
.term
, item
.wdf
) for item
in doc2
.termlist()])
1357 expect([(item
.num
, item
.value
) for item
in list(doc
.values())],
1358 [(item
.num
, item
.value
) for item
in list(doc2
.values())])
1359 expect(doc
.get_data(), doc2
.get_data())
1360 expect(doc
.get_data(), b
'is it cold?')
1362 def test_serialise_query():
1363 """Test serialisation of queries.
1367 q2
= xapian
.Query
.unserialise(q
.serialise())
1368 expect(str(q
), str(q2
))
1369 expect(str(q
), 'Query()')
1371 q
= xapian
.Query('hello')
1372 q2
= xapian
.Query
.unserialise(q
.serialise())
1373 expect(str(q
), str(q2
))
1374 expect(str(q
), 'Query(hello)')
1376 q
= xapian
.Query(xapian
.Query
.OP_OR
, ('hello', b
'world'))
1377 q2
= xapian
.Query
.unserialise(q
.serialise())
1378 expect(str(q
), str(q2
))
1379 expect(str(q
), 'Query((hello OR world))')
1381 def test_preserve_query_parser_stopper():
1382 """Test preservation of stopper set on query parser.
1386 queryparser
= xapian
.QueryParser()
1387 stopper
= xapian
.SimpleStopper()
1390 queryparser
.set_stopper(stopper
)
1393 queryparser
= make_qp()
1394 query
= queryparser
.parse_query('to be')
1395 expect([term
for term
in queryparser
.stoplist()], [b
'to'])
1397 def test_preserve_term_generator_stopper():
1398 """Test preservation of stopper set on term generator.
1402 termgen
= xapian
.TermGenerator()
1403 termgen
.set_stemmer(xapian
.Stem('en'))
1404 stopper
= xapian
.SimpleStopper()
1407 termgen
.set_stopper(stopper
)
1412 termgen
.index_text('to be')
1413 doc
= termgen
.get_document()
1414 terms
= [term
.term
for term
in doc
.termlist()]
1416 expect(terms
, [b
'Zbe', b
'be', b
'to'])
1418 def test_preserve_enquire_sorter():
1419 """Test preservation of sorter set on enquire.
1422 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
1423 doc
= xapian
.Document()
1425 doc
.add_value(1, '1')
1426 db
.add_document(doc
)
1427 db
.add_document(doc
)
1430 enq
= xapian
.Enquire(db
)
1431 sorter
= xapian
.MultiValueKeyMaker()
1432 enq
.set_sort_by_key(sorter
, False)
1436 enq
.set_query(xapian
.Query('foo'))
1440 enq
= xapian
.Enquire(db
)
1441 sorter
= xapian
.MultiValueKeyMaker()
1442 enq
.set_sort_by_key_then_relevance(sorter
, False)
1446 enq
.set_query(xapian
.Query('foo'))
1450 enq
= xapian
.Enquire(db
)
1451 sorter
= xapian
.MultiValueKeyMaker()
1452 enq
.set_sort_by_relevance_then_key(sorter
, False)
1456 enq
.set_query(xapian
.Query('foo'))
1459 def test_matchspy():
1460 """Test use of matchspies.
1463 db
= setup_database()
1464 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
1465 enq
= xapian
.Enquire(db
)
1466 enq
.set_query(query
)
1468 def set_matchspy_deref(enq
):
1469 """Set a matchspy, and then drop the reference, to check that it
1470 doesn't get deleted too soon.
1472 spy
= xapian
.ValueCountMatchSpy(0)
1473 enq
.add_matchspy(spy
)
1475 set_matchspy_deref(enq
)
1476 mset
= enq
.get_mset(0, 10)
1477 expect(len(mset
), 5)
1479 spy
= xapian
.ValueCountMatchSpy(0)
1480 enq
.add_matchspy(spy
)
1481 # Regression test for clear_matchspies() - used to always raise an
1482 # exception due to a copy and paste error in its definition.
1483 enq
.clear_matchspies()
1484 mset
= enq
.get_mset(0, 10)
1485 expect([item
for item
in list(spy
.values())], [])
1487 enq
.add_matchspy(spy
)
1488 mset
= enq
.get_mset(0, 10)
1489 expect(spy
.get_total(), 5)
1490 expect([(item
.term
, item
.termfreq
) for item
in list(spy
.values())], [
1491 (xapian
.sortable_serialise(1.5), 1),
1492 (xapian
.sortable_serialise(2), 2),
1494 expect([(item
.term
, item
.termfreq
) for item
in spy
.top_values(10)], [
1495 (xapian
.sortable_serialise(2), 2),
1496 (xapian
.sortable_serialise(1.5), 1),
1499 def test_import_star():
1500 """Test that "from xapian import *" works.
1502 This is a regression test - this failed in the 1.2.0 release.
1503 It's not normally good style to use it, but it should work anyway!
1506 import test_xapian_star
1508 def test_latlongcoords_iter():
1509 """Test LatLongCoordsIterator wrapping.
1512 coords
= xapian
.LatLongCoords()
1513 expect([c
for c
in coords
], [])
1514 coords
.append(xapian
.LatLongCoord(0, 0))
1515 coords
.append(xapian
.LatLongCoord(0, 1))
1516 expect([str(c
) for c
in coords
], ['Xapian::LatLongCoord(0, 0)',
1517 'Xapian::LatLongCoord(0, 1)'])
1520 def test_compactor():
1521 """Test that xapian.Compactor works.
1524 tmpdir
= tempfile
.mkdtemp()
1525 db1
= db2
= db3
= None
1527 db1path
= os
.path
.join(tmpdir
, 'db1')
1528 db2path
= os
.path
.join(tmpdir
, 'db2')
1529 db3path
= os
.path
.join(tmpdir
, 'db3')
1531 # Set up a couple of sample input databases
1532 db1
= xapian
.WritableDatabase(db1path
, xapian
.DB_CREATE_OR_OVERWRITE
)
1533 doc1
= xapian
.Document()
1534 doc1
.add_term('Hello')
1535 doc1
.add_term('Hello1')
1536 doc1
.add_value(0, 'Val1')
1537 db1
.set_metadata('key', '1')
1538 db1
.set_metadata('key1', '1')
1539 db1
.add_document(doc1
)
1542 db2
= xapian
.WritableDatabase(db2path
, xapian
.DB_CREATE_OR_OVERWRITE
)
1543 doc2
= xapian
.Document()
1544 doc2
.add_term('Hello')
1545 doc2
.add_term('Hello2')
1546 doc2
.add_value(0, 'Val2')
1547 db2
.set_metadata('key', '2')
1548 db2
.set_metadata('key2', '2')
1549 db2
.add_document(doc2
)
1552 db_to_compact
= xapian
.Database()
1553 db_to_compact
.add_database(xapian
.Database(db1path
))
1554 db_to_compact
.add_database(xapian
.Database(db2path
))
1555 # Compact with the default compactor
1556 # Metadata conflicts are resolved by picking the first value
1557 db_to_compact
.compact(db3path
)
1559 db3
= xapian
.Database(db3path
)
1560 expect([(item
.term
, item
.termfreq
) for item
in db3
.allterms()],
1561 [(b
'Hello', 2), (b
'Hello1', 1), (b
'Hello2', 1)])
1562 expect(db3
.get_document(1).get_value(0), b
'Val1')
1563 expect(db3
.get_document(2).get_value(0), b
'Val2')
1564 expect(db3
.get_metadata('key'), b
'1')
1565 expect(db3
.get_metadata('key1'), b
'1')
1566 expect(db3
.get_metadata('key2'), b
'2')
1568 context("testing a custom compactor which merges duplicate metadata")
1569 class MyCompactor(xapian
.Compactor
):
1571 xapian
.Compactor
.__init
__(self
)
1574 def set_status(self
, table
, status
):
1575 if len(status
) == 0:
1576 self
.log
.append('Starting %s' % table
.decode('utf-8'))
1578 self
.log
.append('%s: %s' % (table
.decode('utf-8'), status
.decode('utf-8')))
1580 def resolve_duplicate_metadata(self
, key
, vals
):
1581 return b
','.join(vals
)
1584 db_to_compact
= xapian
.Database()
1585 db_to_compact
.add_database(xapian
.Database(db1path
))
1586 db_to_compact
.add_database(xapian
.Database(db2path
))
1587 db_to_compact
.compact(db3path
, 0, 0, c
)
1588 log
= '\n'.join(c
.log
)
1589 # Check we got some messages in the log
1590 expect('Starting postlist' in log
, True)
1592 db3
= xapian
.Database(db3path
)
1593 expect([(item
.term
, item
.termfreq
) for item
in db3
.allterms()],
1594 [(b
'Hello', 2), (b
'Hello1', 1), (b
'Hello2', 1)])
1595 expect(db3
.get_metadata('key'), b
'1,2')
1596 expect(db3
.get_metadata('key1'), b
'1')
1597 expect(db3
.get_metadata('key2'), b
'2')
1607 shutil
.rmtree(tmpdir
)
1609 def test_custom_matchspy():
1610 class MSpy(xapian
.MatchSpy
):
1612 xapian
.MatchSpy
.__init
__(self
)
1615 def __call__(self
, doc
, weight
):
1620 db
= setup_database()
1621 query
= xapian
.Query(xapian
.Query
.OP_OR
, "was", "it")
1623 enquire
= xapian
.Enquire(db
)
1624 enquire
.add_matchspy(mspy
)
1625 enquire
.set_query(query
)
1626 mset
= enquire
.get_mset(0, 1)
1627 expect(len(mset
), 1)
1628 expect(mspy
.count
>= 1, True)
1630 expect(db
.get_doccount(), 5)
1632 def test_removed_features():
1634 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
1635 doc
= xapian
.Document()
1636 enq
= xapian
.Enquire(db
)
1637 eset
= xapian
.ESet()
1638 mset
= xapian
.MSet()
1639 query
= xapian
.Query()
1640 qp
= xapian
.QueryParser()
1641 titer
= xapian
._TermIterator
()
1642 postiter
= xapian
._PostingIterator
()
1644 def check_missing(obj
, attr
):
1645 expect_exception(AttributeError, None, getattr, obj
, attr
)
1647 check_missing(xapian
, 'Stem_get_available_languages')
1648 check_missing(xapian
, 'TermIterator')
1649 check_missing(xapian
, 'PositionIterator')
1650 check_missing(xapian
, 'PostingIterator')
1651 check_missing(xapian
, 'ValueIterator')
1652 check_missing(xapian
, 'MSetIterator')
1653 check_missing(xapian
, 'ESetIterator')
1654 check_missing(db
, 'allterms_begin')
1655 check_missing(db
, 'allterms_end')
1656 check_missing(db
, 'metadata_keys_begin')
1657 check_missing(db
, 'metadata_keys_end')
1658 check_missing(db
, 'synonym_keys_begin')
1659 check_missing(db
, 'synonym_keys_end')
1660 check_missing(db
, 'synonyms_begin')
1661 check_missing(db
, 'synonyms_end')
1662 check_missing(db
, 'spellings_begin')
1663 check_missing(db
, 'spellings_end')
1664 check_missing(db
, 'positionlist_begin')
1665 check_missing(db
, 'positionlist_end')
1666 check_missing(db
, 'postlist_begin')
1667 check_missing(db
, 'postlist_end')
1668 check_missing(db
, 'termlist_begin')
1669 check_missing(db
, 'termlist_end')
1670 check_missing(doc
, 'termlist_begin')
1671 check_missing(doc
, 'termlist_end')
1672 check_missing(doc
, 'values_begin')
1673 check_missing(doc
, 'values_end')
1674 check_missing(enq
, 'get_matching_terms_begin')
1675 check_missing(enq
, 'get_matching_terms_end')
1676 check_missing(eset
, 'begin')
1677 check_missing(eset
, 'end')
1678 check_missing(mset
, 'begin')
1679 check_missing(mset
, 'end')
1680 check_missing(postiter
, 'positionlist_begin')
1681 check_missing(postiter
, 'positionlist_end')
1682 check_missing(query
, 'get_terms_begin')
1683 check_missing(query
, 'get_terms_end')
1684 check_missing(qp
, 'stoplist_begin')
1685 check_missing(qp
, 'stoplist_end')
1686 check_missing(qp
, 'unstem_begin')
1687 check_missing(qp
, 'unstem_end')
1688 check_missing(titer
, 'positionlist_begin')
1689 check_missing(titer
, 'positionlist_end')
1692 # repr() returned None in 1.4.0.
1693 expect(repr(xapian
.Query('foo')) is None, False)
1694 expect(repr(xapian
.AssertionError('foo')) is None, False)
1695 expect(repr(xapian
.InvalidArgumentError('foo')) is None, False)
1696 expect(repr(xapian
.InvalidOperationError('foo')) is None, False)
1697 expect(repr(xapian
.UnimplementedError('foo')) is None, False)
1698 expect(repr(xapian
.DatabaseError('foo')) is None, False)
1699 expect(repr(xapian
.DatabaseCorruptError('foo')) is None, False)
1700 expect(repr(xapian
.DatabaseCreateError('foo')) is None, False)
1701 expect(repr(xapian
.DatabaseLockError('foo')) is None, False)
1702 expect(repr(xapian
.DatabaseModifiedError('foo')) is None, False)
1703 expect(repr(xapian
.DatabaseOpeningError('foo')) is None, False)
1704 expect(repr(xapian
.DatabaseVersionError('foo')) is None, False)
1705 expect(repr(xapian
.DocNotFoundError('foo')) is None, False)
1706 expect(repr(xapian
.FeatureUnavailableError('foo')) is None, False)
1707 expect(repr(xapian
.InternalError('foo')) is None, False)
1708 expect(repr(xapian
.NetworkError('foo')) is None, False)
1709 expect(repr(xapian
.NetworkTimeoutError('foo')) is None, False)
1710 expect(repr(xapian
.QueryParserError('foo')) is None, False)
1711 expect(repr(xapian
.SerialisationError('foo')) is None, False)
1712 expect(repr(xapian
.RangeError('foo')) is None, False)
1713 expect(repr(xapian
.WildcardError('foo')) is None, False)
1714 expect(repr(xapian
.Document()) is None, False)
1715 expect(repr(xapian
.Registry()) is None, False)
1716 expect(repr(xapian
.Query()) is None, False)
1717 expect(repr(xapian
.Stem('en')) is None, False)
1718 expect(repr(xapian
.TermGenerator()) is None, False)
1719 expect(repr(xapian
.MSet()) is None, False)
1720 expect(repr(xapian
.ESet()) is None, False)
1721 expect(repr(xapian
.RSet()) is None, False)
1722 expect(repr(xapian
.MultiValueKeyMaker()) is None, False)
1723 expect(repr(xapian
.SimpleStopper()) is None, False)
1724 expect(repr(xapian
.RangeProcessor()) is None, False)
1725 expect(repr(xapian
.DateRangeProcessor(1)) is None, False)
1726 expect(repr(xapian
.NumberRangeProcessor(1)) is None, False)
1727 expect(repr(xapian
.StringValueRangeProcessor(1)) is None, False)
1728 expect(repr(xapian
.DateValueRangeProcessor(1)) is None, False)
1729 expect(repr(xapian
.NumberValueRangeProcessor(1)) is None, False)
1730 expect(repr(xapian
.QueryParser()) is None, False)
1731 expect(repr(xapian
.BoolWeight()) is None, False)
1732 expect(repr(xapian
.TfIdfWeight()) is None, False)
1733 expect(repr(xapian
.BM25Weight()) is None, False)
1734 expect(repr(xapian
.BM25PlusWeight()) is None, False)
1735 expect(repr(xapian
.TradWeight()) is None, False)
1736 expect(repr(xapian
.InL2Weight()) is None, False)
1737 expect(repr(xapian
.IfB2Weight()) is None, False)
1738 expect(repr(xapian
.IneB2Weight()) is None, False)
1739 expect(repr(xapian
.BB2Weight()) is None, False)
1740 expect(repr(xapian
.DLHWeight()) is None, False)
1741 expect(repr(xapian
.PL2Weight()) is None, False)
1742 expect(repr(xapian
.PL2PlusWeight()) is None, False)
1743 expect(repr(xapian
.DPHWeight()) is None, False)
1744 expect(repr(xapian
.LMWeight()) is None, False)
1745 expect(repr(xapian
.CoordWeight()) is None, False)
1746 expect(repr(xapian
.Compactor()) is None, False)
1747 expect(repr(xapian
.ValuePostingSource(1)) is None, False)
1748 expect(repr(xapian
.ValueWeightPostingSource(1)) is None, False)
1749 expect(repr(xapian
.DecreasingValueWeightPostingSource(1)) is None, False)
1750 expect(repr(xapian
.ValueMapPostingSource(1)) is None, False)
1751 expect(repr(xapian
.FixedWeightPostingSource(1)) is None, False)
1752 expect(repr(xapian
.ValueCountMatchSpy(1)) is None, False)
1753 expect(repr(xapian
.LatLongCoord()) is None, False)
1754 expect(repr(xapian
.LatLongCoords()) is None, False)
1755 expect(repr(xapian
.GreatCircleMetric()) is None, False)
1756 expect(repr(xapian
.Database()) is None, False)
1757 expect(repr(xapian
.WritableDatabase()) is None, False)
1761 # Run all tests (ie, callables with names starting "test_").
1764 if not runtests(globals(), sys
.argv
[1:]):
1767 print("Running tests without threads")
1771 print("Running tests with threads")
1773 # This testcase seems to just block when run in a thread under Python 3
1774 # on some plaforms. It fails with 3.2.3 on Debian wheezy, but passes
1775 # with the exact same package version on Debian unstable not long after
1776 # the jessie release. The issue it's actually serving to regression
1777 # test for is covered by running it without threads, so just disable it
1778 # rather than risk test failures that don't seem to indicate a problem
1780 del test_import_star
1782 t
= threading
.Thread(name
='test runner', target
=run
)
1784 # Block until the thread has completed so the thread gets a chance to exit
1785 # with error status.
1791 # vim:syntax=python:set expandtab: