1 # Simple test to ensure that we can load the xapian module and exercise basic
2 # functionality successfully.
4 # Copyright (C) 2004,2005,2006,2007,2008,2010,2011,2012,2014,2015,2016,2017 Olly Betts
5 # Copyright (C) 2007 Lemur Consulting Ltd
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation; either version 2 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 from testsuite
import *
30 # Stemmer which strips English vowels.
31 class MyStemmer(xapian
.StemImplementation
):
35 super(MyStemmer
, self
).__init
__()
36 mystemmers
.add(mystemmer_id
)
37 self
._id
= mystemmer_id
40 def __call__(self
, s
):
42 return re
.sub(r
'[aeiou]', '', s
)
46 if self
._id
not in mystemmers
:
47 raise TestFail("MyStemmer #%d deleted more than once" % self
._id
)
48 mystemmers
.remove(self
._id
)
51 # Test the version number reporting functions give plausible results.
52 v
= "%d.%d.%d" % (xapian
.major_version(),
53 xapian
.minor_version(),
55 v2
= xapian
.version_string()
56 expect(v2
, v
, "Unexpected version output")
58 # A regexp check would be better, but seems to create a bogus "leak" of -1
59 # objects in Python 3.
60 expect(len(xapian
.__version
__.split('.')), 3, 'xapian.__version__ not X.Y.Z')
61 expect((xapian
.__version
__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"')
65 print "Unhandled constants: ", res
68 # Check that SWIG isn't generating cvar (regression test for ticket#297).
69 expect_exception(AttributeError, "'module' object has no attribute 'cvar'",
72 stem
= xapian
.Stem("english")
73 expect(str(stem
), "Xapian::Stem(english)", "Unexpected str(stem)")
75 doc
= xapian
.Document()
77 if doc
.get_data() == "a":
78 raise TestFail("get_data+set_data truncates at a zero byte")
79 expect(doc
.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte")
80 doc
.set_data("is there anybody out there?")
82 doc
.add_posting(stem("is"), 1)
83 doc
.add_posting(stem("there"), 2)
84 doc
.add_posting(stem("anybody"), 3)
85 doc
.add_posting(stem("out"), 4)
86 doc
.add_posting(stem("there"), 5)
88 db
= xapian
.WritableDatabase('', xapian
.DB_BACKEND_INMEMORY
)
90 expect(db
.get_doccount(), 1, "Unexpected db.get_doccount()")
91 terms
= ["smoke", "test", "terms"]
92 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, terms
),
93 "(smoke OR test OR terms)")
94 query1
= xapian
.Query(xapian
.Query
.OP_PHRASE
, ("smoke", "test", "tuple"))
95 query2
= xapian
.Query(xapian
.Query
.OP_XOR
, (xapian
.Query("smoke"), query1
, "string"))
96 expect_query(query1
, "(smoke PHRASE 3 test PHRASE 3 tuple)")
97 expect_query(query2
, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
99 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, subqs
), "(a OR b)")
100 expect_query(xapian
.Query(xapian
.Query
.OP_VALUE_RANGE
, 0, '1', '4'),
103 # Check database factory functions are wrapped as expected:
105 expect_exception(xapian
.DatabaseOpeningError
, None,
106 xapian
.Database
, "nosuchdir/nosuchdb", xapian
.DB_BACKEND_STUB
)
107 expect_exception(xapian
.DatabaseOpeningError
, None,
108 xapian
.WritableDatabase
, "nosuchdir/nosuchdb", xapian
.DB_OPEN|xapian
.DB_BACKEND_STUB
)
110 expect_exception(xapian
.NetworkError
, None,
111 xapian
.remote_open
, "/bin/false", "")
112 expect_exception(xapian
.NetworkError
, None,
113 xapian
.remote_open_writable
, "/bin/false", "")
115 expect_exception(xapian
.NetworkError
, None,
116 xapian
.remote_open
, "127.0.0.1", 0, 1)
117 expect_exception(xapian
.NetworkError
, None,
118 xapian
.remote_open_writable
, "127.0.0.1", 0, 1)
120 # Check wrapping of MatchAll and MatchNothing:
122 expect_query(xapian
.Query
.MatchAll
, "<alldocuments>")
123 expect_query(xapian
.Query
.MatchNothing
, "")
125 # Feature test for Query.__iter__
129 expect(term_count
, 4, "Unexpected number of terms in query2")
131 enq
= xapian
.Enquire(db
)
132 enq
.set_query(xapian
.Query(xapian
.Query
.OP_OR
, "there", "is"))
133 mset
= enq
.get_mset(0, 10)
134 expect(mset
.size(), 1, "Unexpected mset.size()")
135 expect(len(mset
), 1, "Unexpected mset.size()")
137 # Feature test for Enquire.matching_terms(docid)
139 for term
in enq
.matching_terms(mset
.get_hit(0)):
141 expect(term_count
, 2, "Unexpected number of matching terms")
143 # Feature test for MSet.__iter__
147 expect(msize
, mset
.size(), "Unexpected number of entries in mset")
149 terms
= " ".join(enq
.matching_terms(mset
.get_hit(0)))
150 expect(terms
, "is there", "Unexpected terms")
152 # Feature test for ESet.__iter__
155 eset
= enq
.get_eset(10, rset
)
159 expect(term_count
, 3, "Unexpected number of expand terms")
161 # Feature test for Database.__iter__
165 expect(term_count
, 5, "Unexpected number of terms in db")
167 # Feature test for Database.allterms
169 for term
in db
.allterms():
171 expect(term_count
, 5, "Unexpected number of terms in db.allterms")
173 # Feature test for Database.postlist
175 for posting
in db
.postlist("there"):
177 expect(count
, 1, "Unexpected number of entries in db.postlist('there')")
179 # Feature test for Database.postlist with empty term (alldocspostlist)
181 for posting
in db
.postlist(""):
183 expect(count
, 1, "Unexpected number of entries in db.postlist('')")
185 # Feature test for Database.termlist
187 for term
in db
.termlist(1):
189 expect(count
, 5, "Unexpected number of entries in db.termlist(1)")
191 # Feature test for Database.positionlist
193 for term
in db
.positionlist(1, "there"):
195 expect(count
, 2, "Unexpected number of entries in db.positionlist(1, 'there')")
197 # Feature test for Document.termlist
199 for term
in doc
.termlist():
201 expect(count
, 5, "Unexpected number of entries in doc.termlist()")
203 # Feature test for TermIter.skip_to
204 term
= doc
.termlist()
209 except StopIteration:
212 raise TestFail("TermIter.skip_to didn't skip term '%s'" % x
.term
)
214 # Feature test for Document.values
216 for term
in doc
.values():
218 expect(count
, 0, "Unexpected number of entries in doc.values")
220 # Check exception handling for Xapian::DocNotFoundError
221 expect_exception(xapian
.DocNotFoundError
, "Docid 3 not found", db
.get_document
, 3)
223 # Check value of OP_ELITE_SET
224 expect(xapian
.Query
.OP_ELITE_SET
, 10, "Unexpected value for OP_ELITE_SET")
226 # Feature test for MatchDecider
227 doc
= xapian
.Document()
229 doc
.add_posting(stem("out"), 1)
230 doc
.add_posting(stem("outside"), 1)
231 doc
.add_posting(stem("source"), 2)
232 doc
.add_value(0, "yes")
235 class testmatchdecider(xapian
.MatchDecider
):
236 def __call__(self
, doc
):
237 return doc
.get_value(0) == "yes"
239 query
= xapian
.Query(stem("out"))
240 enquire
= xapian
.Enquire(db
)
241 enquire
.set_query(query
)
242 mset
= enquire
.get_mset(0, 10, None, testmatchdecider())
243 expect(mset
.size(), 1, "Unexpected number of documents returned by match decider")
244 expect(mset
.get_docid(0), 2, "MatchDecider mset has wrong docid in")
246 # Feature test for ExpandDecider
247 class testexpanddecider(xapian
.ExpandDecider
):
248 def __call__(self
, term
):
249 return (not term
.startswith('a'))
251 enquire
= xapian
.Enquire(db
)
254 eset
= enquire
.get_eset(10, rset
, xapian
.Enquire
.USE_EXACT_TERMFREQ
, testexpanddecider())
255 eset_terms
= [item
.term
for item
in eset
]
256 expect(len(eset_terms
), eset
.size(), "Unexpected number of terms returned by expand")
257 if [t
for t
in eset_terms
if t
.startswith('a')]:
258 raise TestFail("ExpandDecider was not used")
260 # Check min_wt argument to get_eset() works (new in 1.2.5).
261 eset
= enquire
.get_eset(100, rset
, xapian
.Enquire
.USE_EXACT_TERMFREQ
)
262 expect([i
.weight
for i
in eset
][-1] < 1.9, True, "test get_eset() without min_wt")
263 eset
= enquire
.get_eset(100, rset
, xapian
.Enquire
.USE_EXACT_TERMFREQ
, None, 1.9)
264 expect([i
.weight
for i
in eset
][-1] >= 1.9, True, "test get_eset() min_wt")
266 # Check QueryParser parsing error.
267 qp
= xapian
.QueryParser()
268 expect_exception(xapian
.QueryParserError
, "Syntax: <expression> AND <expression>", qp
.parse_query
, "test AND")
270 # Check QueryParser pure NOT option
271 qp
= xapian
.QueryParser()
272 expect_query(qp
.parse_query("NOT test", qp
.FLAG_BOOLEAN
+ qp
.FLAG_PURE_NOT
),
273 "(<alldocuments> AND_NOT test@1)")
275 # Check QueryParser partial option
276 qp
= xapian
.QueryParser()
278 qp
.set_default_op(xapian
.Query
.OP_AND
)
279 qp
.set_stemming_strategy(qp
.STEM_SOME
)
280 qp
.set_stemmer(xapian
.Stem('en'))
281 expect_query(qp
.parse_query("foo o", qp
.FLAG_PARTIAL
),
282 "(Zfoo@1 AND ((SYNONYM WILDCARD OR o) OR Zo@2))")
284 expect_query(qp
.parse_query("foo outside", qp
.FLAG_PARTIAL
),
285 "(Zfoo@1 AND ((SYNONYM WILDCARD OR outside) OR Zoutsid@2))")
287 # Test supplying unicode strings
288 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, (u
'foo', u
'bar')),
290 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, ('foo', u
'bar\xa3')),
291 '(foo OR bar\xc2\xa3)')
292 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, ('foo', 'bar\xc2\xa3')),
293 '(foo OR bar\xc2\xa3)')
294 expect_query(xapian
.Query(xapian
.Query
.OP_OR
, u
'foo', u
'bar'),
297 expect_query(qp
.parse_query(u
"NOT t\xe9st", qp
.FLAG_BOOLEAN
+ qp
.FLAG_PURE_NOT
),
298 "(<alldocuments> AND_NOT Zt\xc3\xa9st@1)")
300 doc
= xapian
.Document()
301 doc
.set_data(u
"Unicode with an acc\xe9nt")
302 doc
.add_posting(stem(u
"out\xe9r"), 1)
303 expect(doc
.get_data(), u
"Unicode with an acc\xe9nt".encode('utf-8'))
304 term
= doc
.termlist().next().term
305 expect(term
, u
"out\xe9r".encode('utf-8'))
307 # Check simple stopper
308 stop
= xapian
.SimpleStopper()
310 expect(stop('a'), False)
311 expect_query(qp
.parse_query(u
"foo bar a", qp
.FLAG_BOOLEAN
),
312 "(Zfoo@1 AND Zbar@2 AND Za@3)")
315 expect(stop('a'), True)
316 expect_query(qp
.parse_query(u
"foo bar a", qp
.FLAG_BOOLEAN
),
317 "(Zfoo@1 AND Zbar@2)")
319 # Feature test for custom Stopper
320 class my_b_stopper(xapian
.Stopper
):
321 def __call__(self
, term
):
324 def get_description(self
):
325 return u
"my_b_stopper"
327 stop
= my_b_stopper()
328 expect(stop
.get_description(), u
"my_b_stopper")
330 expect(stop('a'), False)
331 expect_query(qp
.parse_query(u
"foo bar a", qp
.FLAG_BOOLEAN
),
332 "(Zfoo@1 AND Zbar@2 AND Za@3)")
334 expect(stop('b'), True)
335 expect_query(qp
.parse_query(u
"foo bar b", qp
.FLAG_BOOLEAN
),
336 "(Zfoo@1 AND Zbar@2)")
338 # Test SimpleStopper initialised from a file.
340 srcdir
= os
.environ
['srcdir']
343 stop
= xapian
.SimpleStopper(srcdir
+ '/../shortstop.list')
344 expect(stop('a'), True)
345 expect(stop('am'), False)
346 expect(stop('an'), True)
347 expect(stop('the'), True)
349 expect_exception(xapian
.InvalidArgumentError
, None, xapian
.SimpleStopper
, 'nosuchfile')
352 termgen
= xapian
.TermGenerator()
353 doc
= xapian
.Document()
354 termgen
.set_document(doc
)
355 termgen
.index_text('foo bar baz foo')
356 expect([(item
.term
, item
.wdf
, [pos
for pos
in item
.positer
]) for item
in doc
.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])])
359 # Check DateValueRangeProcessor works
360 context("checking that DateValueRangeProcessor works")
361 qp
= xapian
.QueryParser()
362 vrpdate
= xapian
.DateValueRangeProcessor(1, 1, 1960)
363 qp
.add_valuerangeprocessor(vrpdate
)
364 query
= qp
.parse_query('12/03/99..12/04/01')
365 expect(str(query
), 'Query(VALUE_RANGE 1 19991203 20011204)')
367 # Regression test for bug#193, fixed in 1.0.3.
368 context("running regression test for bug#193")
369 vrp
= xapian
.NumberValueRangeProcessor(0, '$', True)
372 slot
, a
, b
= vrp(a
, b
)
374 expect(xapian
.sortable_unserialise(a
), 10)
375 expect(xapian
.sortable_unserialise(b
), 20)
377 # Feature test for xapian.FieldProcessor
378 context("running feature test for xapian.FieldProcessor")
379 class testfieldprocessor(xapian
.FieldProcessor
):
380 def __call__(self
, s
):
382 raise Exception('already spam')
383 return xapian
.Query("spam")
385 qp
.add_prefix('spam', testfieldprocessor())
386 qp
.add_boolean_prefix('boolspam', testfieldprocessor())
387 qp
.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style
388 qp
.add_boolean_prefix('boolspam3', testfieldprocessor(), '')
389 qp
.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group')
390 qp
.add_boolean_prefix('boolspam5', testfieldprocessor(), None)
391 query
= qp
.parse_query('spam:ignored')
392 expect(str(query
), 'Query(spam)')
394 expect_exception(Exception, 'already spam', qp
.parse_query
, 'spam:spam')
396 # Regression tests copied from PHP (probably always worked in python, but
398 context("running regression tests for issues which were found in PHP")
400 # PHP overload resolution involving boolean types failed.
401 enq
.set_sort_by_value(1, True)
403 # Regression test - fixed in 0.9.10.1.
404 oqparser
= xapian
.QueryParser()
405 oquery
= oqparser
.parse_query("I like tea")
407 # Regression test for bug fixed in 1.4.4:
408 # https://bugs.debian.org/849722
409 oqparser
.add_boolean_prefix('tag', 'K', '')
410 # Make sure other cases also work:
411 oqparser
.add_boolean_prefix('zag', 'XR', False) # Old-style
412 oqparser
.add_boolean_prefix('rag', 'XR', None)
413 oqparser
.add_boolean_prefix('nag', 'XB', '')
414 oqparser
.add_boolean_prefix('bag', 'XB', 'blergh')
415 oqparser
.add_boolean_prefix('gag', 'XB', u
'blergh')
416 oqparser
.add_boolean_prefix('jag', 'XB', b
'blergh')
418 # Regression test for bug#192 - fixed in 1.0.3.
421 # Test setting and getting metadata
422 expect(db
.get_metadata('Foo'), '')
423 db
.set_metadata('Foo', 'Foo')
424 expect(db
.get_metadata('Foo'), 'Foo')
425 expect_exception(xapian
.InvalidArgumentError
, "Empty metadata keys are invalid", db
.get_metadata
, '')
426 expect_exception(xapian
.InvalidArgumentError
, "Empty metadata keys are invalid", db
.set_metadata
, '', 'Foo')
427 expect_exception(xapian
.InvalidArgumentError
, "Empty metadata keys are invalid", db
.get_metadata
, '')
429 # Test OP_SCALE_WEIGHT and corresponding constructor
430 expect_query(xapian
.Query(xapian
.Query
.OP_SCALE_WEIGHT
, xapian
.Query('foo'), 5),
435 stem
= xapian
.Stem(mystem
)
436 expect(stem('test'), 'tst')
437 stem2
= xapian
.Stem(mystem
)
438 expect(stem2('toastie'), 'tst')
440 indexer
= xapian
.TermGenerator()
441 indexer
.set_stemmer(xapian
.Stem(MyStemmer()))
443 doc
= xapian
.Document()
444 indexer
.set_document(doc
)
445 indexer
.index_text('hello world')
448 for t
in doc
.termlist():
451 expect(s
, '/Zhll/Zwrld/hello/world/')
453 parser
= xapian
.QueryParser()
454 parser
.set_stemmer(xapian
.Stem(MyStemmer()))
455 parser
.set_stemming_strategy(xapian
.QueryParser
.STEM_ALL
)
456 expect_query(parser
.parse_query('color television'), '(clr@1 OR tlvsn@2)')
458 def test_internal_enums_not_wrapped():
459 leaf_constants
= [c
for c
in dir(xapian
.Query
) if c
.startswith('LEAF_')]
460 expect(leaf_constants
, [])
462 def test_internals_not_wrapped():
464 for c
in dir(xapian
):
465 # Skip Python stuff like __file__ and __version__.
466 if c
.startswith('__'): continue
467 if c
.endswith('_'): internals
.append(c
)
469 if not c
[0].isupper(): continue
470 cls
= eval('xapian.' + c
)
471 if type(cls
) != type(object): continue
473 if m
.startswith('__'): continue
474 if m
.endswith('_'): internals
.append(c
+ '.' + m
)
476 expect(internals
, [])
478 def test_zz9_check_leaks():
482 raise TestFail("%d MyStemmer objects not deleted" % len(mystemmers
))
484 # Run all tests (ie, callables with names starting "test_").
485 if not runtests(globals()):
488 # vim:syntax=python:set expandtab: