added test for unwanted iteration
[pygr.git] / tests / pygrdata_test.py
blob123dc8639ffb416999abe491047fed74a17283a7
1 import socket, unittest, os, md5, pickle, datetime
2 import warnings
4 import testlib
5 from testlib import testutil, SkipTest, PygrTestProgram
6 from pygr import seqdb, cnestedlist, mapping
7 from pygr.downloader import SourceURL, GenericBuilder
10 warnings.simplefilter("ignore")
11 import pygr.Data
12 warnings.simplefilter("default")
14 try:
15 set
16 except NameError:
17 from sets import Set as set
19 class TestBase(unittest.TestCase):
20 "A base class to all pygr.Data test classes"
22 def setUp(self, pygrDataPath=None, **kwargs):
23 # overwrite the WORLDBASEPATH environment variable
24 self.tempdir = testutil.TempDir('pygrdata')
25 if pygrDataPath is None:
26 pygrDataPath = self.tempdir.path
27 pygr.Data.clear_cache() # make sure no old data loaded
28 pygr.Data.update(pygrDataPath, **kwargs) # use this path
29 # handy shortcuts
30 self.EQ = self.assertEqual
32 class Download_Test(TestBase):
33 "Save seq db and interval to pygr.Data shelve"
35 # tested elsewhere as well, on Linux makes gzip ask for permissions
36 # to overwrite
37 def test_download(self):
38 "Downloading of gzipped file using pygr.Data"
40 url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz')
41 url.__doc__ = 'test download'
43 pygr.Data.addResource('Bio.Test.Download1', url)
44 pygr.Data.save()
46 # performs the download
47 fpath = pygr.Data.Bio.Test.Download1()
48 h = testutil.get_file_md5(fpath)
49 self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73')
50 os.remove(fpath)
52 class GenericBuild_Test(TestBase):
54 def test_generic_build(self):
55 "GenericBuilder construction of the BlastDB"
57 sp_hbb1 = testutil.datafile('sp_hbb1')
58 gb = GenericBuilder('BlastDB', sp_hbb1)
59 s = pickle.dumps(gb)
60 db = pickle.loads(s) # force construction of the BlastDB
61 self.EQ(len(db), 24)
63 found = [x for x in db]
64 found.sort()
66 expected = ['HBB0_PAGBO', 'HBB1_ANAMI', 'HBB1_CYGMA', 'HBB1_IGUIG',
67 'HBB1_MOUSE', 'HBB1_ONCMY', 'HBB1_PAGBO', 'HBB1_RAT',
68 'HBB1_SPHPU', 'HBB1_TAPTE', 'HBB1_TORMA', 'HBB1_TRICR',
69 'HBB1_UROHA', 'HBB1_VAREX', 'HBB1_XENBO', 'HBB1_XENLA',
70 'HBB1_XENTR', 'MYG_DIDMA', 'MYG_ELEMA', 'MYG_ERIEU',
71 'MYG_ESCGI', 'MYG_GALCR', 'PRCA_ANASP', 'PRCA_ANAVA']
72 expected.sort()
74 self.EQ(expected, found)
76 class DNAAnnotation_Test(TestBase):
78 def setUp(self,**kwargs):
79 TestBase.setUp(self)
80 dnaseq = testutil.datafile('dnaseq.fasta')
81 tryannot = testutil.tempdatafile('tryannot')
83 db = seqdb.BlastDB(dnaseq)
84 try:
85 db.__doc__ = 'little dna'
87 pygr.Data.Bio.Test.dna = db
88 annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'),
89 2:('seq1',-60,-50,'bob'),
90 3:('seq2',-20,-10,'mary')},
91 db,
92 sliceAttrDict=dict(id=0, start=1, stop=2,
93 name=3))
94 annoDB.__doc__ = 'trivial annotation'
95 pygr.Data.Bio.Test.annoDB = annoDB
96 nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True,
97 bidirectional=False)
98 try:
99 for annID in annoDB:
100 nlmsa.addAnnotation(annoDB[annID])
102 nlmsa.build()
103 nlmsa.__doc__ = 'trivial map'
104 pygr.Data.Bio.Test.map = nlmsa
105 pygr.Data.schema.Bio.Test.map = \
106 pygr.Data.ManyToManyRelation(db, annoDB,bindAttrs=('exons',))
107 pygr.Data.save()
108 pygr.Data.clear_cache()
109 finally:
110 nlmsa.close()
111 finally:
112 db.close()
114 def test_annotation(self):
115 "Annotation test"
116 db = pygr.Data.Bio.Test.dna()
117 try:
118 s1 = db['seq1']
119 l = s1.exons.keys()
120 annoDB = pygr.Data.Bio.Test.annoDB()
121 assert l == [annoDB[1], -(annoDB[2])]
122 assert l[0].sequence == s1[5:10]
123 assert l[1].sequence == s1[50:60]
124 assert l[0].name == 'fred','test annotation attribute access'
125 assert l[1].name == 'bob'
126 sneg = -(s1[:55])
127 l = sneg.exons.keys()
128 assert l == [annoDB[2][5:], -(annoDB[1])]
129 assert l[0].sequence == -(s1[50:55])
130 assert l[1].sequence == -(s1[5:10])
131 assert l[0].name == 'bob'
132 assert l[1].name == 'fred'
133 finally:
134 db.close() # close SequenceFileDB
135 pygr.Data.Bio.Test.map().close() # close NLMSA
137 def populate_swissprot():
138 "Populate the current pygrData with swissprot data"
139 # build BlastDB out of the sequences
140 sp_hbb1 = testutil.datafile('sp_hbb1')
141 sp = seqdb.BlastDB(sp_hbb1)
142 sp.__doc__ = 'little swissprot'
143 pygr.Data.Bio.Seq.Swissprot.sp42 = sp
145 # also store a fragment
146 hbb = sp['HBB1_TORMA']
147 ival= hbb[10:35]
148 ival.__doc__ = 'fragment'
149 pygr.Data.Bio.Seq.frag = ival
151 # build a mapping to itself
152 m = mapping.Mapping(sourceDB=sp,targetDB=sp)
153 trypsin = sp['PRCA_ANAVA']
154 m[hbb] = trypsin
155 m.__doc__ = 'map sp to itself'
156 pygr.Data.Bio.Seq.spmap = m
158 # create an annotation database and bind as exons attribute
159 pygr.Data.schema.Bio.Seq.spmap = \
160 pygr.Data.OneToManyRelation(sp, sp, bindAttrs=('buddy',))
161 annoDB = seqdb.AnnotationDB({1:('HBB1_TORMA',10,50)}, sp,
162 sliceAttrDict=dict(id=0, start=1, stop=2))
163 exon = annoDB[1]
165 # generate the names where these will be stored
166 tempdir = testutil.TempDir('exonAnnot')
167 filename = tempdir.subfile('cnested')
168 nlmsa = cnestedlist.NLMSA(filename, 'w', pairwiseMode=True,
169 bidirectional=False)
170 nlmsa.addAnnotation(exon)
171 nlmsa.build()
172 annoDB.__doc__ = 'a little annotation db'
173 nlmsa.__doc__ = 'a little map'
174 pygr.Data.Bio.Annotation.annoDB = annoDB
175 pygr.Data.Bio.Annotation.map = nlmsa
176 pygr.Data.schema.Bio.Annotation.map = \
177 pygr.Data.ManyToManyRelation(sp, annoDB, bindAttrs=('exons',))
179 def check_match(self):
180 frag = pygr.Data.Bio.Seq.frag()
181 correct = pygr.Data.Bio.Seq.Swissprot.sp42()['HBB1_TORMA'][10:35]
182 assert frag == correct, 'seq ival should match'
183 assert frag.__doc__ == 'fragment', 'docstring should match'
184 assert str(frag) == 'IQHIWSNVNVVEITAKALERVFYVY', 'letters should match'
185 assert len(frag) == 25, 'length should match'
186 assert len(frag.path) == 142, 'length should match'
188 #store = PygrDataTextFile('results/seqdb1.pickle')
189 #saved = store['hbb1 fragment']
190 #assert frag == saved, 'seq ival should matched stored result'
192 def check_dir(self):
193 expected=['Bio.Annotation.annoDB', 'Bio.Annotation.map',
194 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap']
195 expected.sort()
196 found = pygr.Data.dir('Bio')
197 found.sort()
198 assert found == expected
200 def check_dir_noargs(self):
201 found = pygr.Data.dir()
202 found.sort()
203 found2 = pygr.Data.dir('')
204 found2.sort()
205 assert found == found2
207 def check_dir_download(self):
208 found = pygr.Data.dir(download=True)
209 found.sort()
210 found2 = pygr.Data.dir('', download=True)
211 found2.sort()
212 assert len(found) == 0
213 assert found == found2
215 def check_dir_re(self):
216 expected=['Bio.Annotation.annoDB', 'Bio.Annotation.map',
217 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap']
218 expected.sort()
219 found = pygr.Data.dir('^Bio', 'r')
220 found.sort()
221 assert found == expected
223 expected = ['Bio.Seq.Swissprot.sp42', 'Bio.Seq.spmap']
224 expected.sort()
225 found = pygr.Data.dir('^Bio\..+\.sp', 'r')
226 found.sort()
227 assert found == expected
229 def check_bind(self):
230 sp = pygr.Data.Bio.Seq.Swissprot.sp42()
231 hbb = sp['HBB1_TORMA']
232 trypsin = sp['PRCA_ANAVA']
233 assert hbb.buddy == trypsin, 'automatic schema attribute binding'
235 def check_bind2(self):
236 sp = pygr.Data.Bio.Seq.Swissprot.sp42()
237 hbb = sp['HBB1_TORMA']
238 exons = hbb.exons.keys()
239 assert len(exons)==1, 'number of expected annotations'
240 annoDB = pygr.Data.Bio.Annotation.annoDB()
241 exon = annoDB[1]
242 assert exons[0] == exon, 'test annotation comparison'
243 assert exons[0].pathForward is exon,'annotation parent match'
244 assert exons[0].sequence == hbb[10:50],'annotation to sequence match'
245 onc = sp['HBB1_ONCMY']
246 try:
247 exons = onc.exons.keys()
248 raise ValueError('failed to catch query with no annotations')
249 except KeyError:
250 pass
252 class Sequence_Test(TestBase):
253 def setUp(self, *args, **kwargs):
254 TestBase.setUp(self, *args, **kwargs)
255 populate_swissprot()
256 pygr.Data.save() # finally save everything
257 pygr.Data.clear_cache() # force all requests to reload
259 def test_match(self):
260 "Test matching sequences"
261 check_match(self)
263 def test_dir(self):
264 "Test labels"
265 check_dir(self)
266 check_dir_noargs(self)
267 check_dir_re(self)
269 def test_bind(self):
270 "Test bind"
271 check_bind(self)
272 check_bind2(self)
274 def test_schema(self):
275 "Test schema"
276 sp_hbb1 = testutil.datafile('sp_hbb1')
277 sp2 = seqdb.BlastDB(sp_hbb1)
278 sp2.__doc__ = 'another sp'
279 pygr.Data.Bio.Seq.sp2 = sp2
280 sp = pygr.Data.Bio.Seq.Swissprot.sp42()
281 m = mapping.Mapping(sourceDB=sp,targetDB=sp2)
282 m.__doc__ = 'sp -> sp2'
283 pygr.Data.Bio.Seq.testmap = m
284 pygr.Data.schema.Bio.Seq.testmap = pygr.Data.OneToManyRelation(sp, sp2)
285 pygr.Data.save()
287 pygr.Data.clear_cache()
289 sp3 = seqdb.BlastDB(sp_hbb1)
290 sp3.__doc__ = 'sp number 3'
291 pygr.Data.Bio.Seq.sp3 = sp3
292 sp2 = pygr.Data.Bio.Seq.sp2()
293 m = mapping.Mapping(sourceDB=sp3,targetDB=sp2)
294 m.__doc__ = 'sp3 -> sp2'
295 pygr.Data.Bio.Seq.testmap2 = m
296 pygr.Data.schema.Bio.Seq.testmap2 = pygr.Data.OneToManyRelation(sp3, sp2)
297 l = pygr.Data.getResource.resourceCache.keys() # list of cached resources
298 l.sort()
299 assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
300 pygr.Data.save()
301 g = pygr.Data.getResource.writer.storage.graph
302 expected = set(['Bio.Annotation.annoDB',
303 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3'])
304 found = set(g.keys())
305 self.EQ(len(expected - found), 0)
307 class SQL_Sequence_Test(Sequence_Test):
308 def setUp(self):
309 if not testutil.mysql_enabled():
310 raise SkipTest, "no MySQL installed"
312 self.dbtable = testutil.temp_table_name() # create temp db tables
313 Sequence_Test.setUp(self, pygrDataPath='mysql:' + self.dbtable,
314 mdbArgs=dict(createLayer='temp'))
315 def tearDown(self):
316 testutil.drop_tables(pygr.Data.getResource.writer.storage.cursor,
317 self.dbtable)
319 class InvalidPickle_Test(TestBase):
321 def setUp(self):
322 TestBase.setUp(self)
323 class MyUnpicklableClass(object):
324 pass
325 MyUnpicklableClass.__module__ = '__main__'
326 self.bad = MyUnpicklableClass()
328 self.good = datetime.datetime.today()
330 def test_invalid_pickle(self):
331 "Testing an invalid pickle"
332 s = pygr.Data.dumps(self.good) # should pickle with no errors
333 try:
334 s = pygr.Data.dumps(self.bad) # should raise exception
335 msg = 'failed to catch bad attempt to invalid module ref'
336 raise ValueError(msg)
337 except pygr.Data.WorldbaseNoModuleError:
338 pass
340 class XMLRPC_Test(TestBase):
341 'create an XMLRPC server and access seqdb from it'
342 def setUp(self):
343 TestBase.setUp(self)
344 populate_swissprot() # save some data
345 pygr.Data.save() # finally save everything
346 pygr.Data.clear_cache() # force all requests to reload
348 res = [ 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap',
349 'Bio.Annotation.annoDB', 'Bio.Annotation.map' ]
350 self.server = testutil.TestXMLRPCServer(res, self.tempdir.path)
351 def test_xmlrpc(self):
352 "Test XMLRPC"
353 pygr.Data.clear_cache() # force all requests to reload
354 pygr.Data.update("http://localhost:%s" % self.server.port)
356 check_match(self)
357 check_dir(self)
358 check_dir_noargs(self)
359 check_dir_download(self)
360 check_dir_re(self)
361 check_bind(self)
362 check_bind2(self)
364 sb_hbb1 = testutil.datafile('sp_hbb1')
365 sp2 = seqdb.BlastDB(sb_hbb1)
366 sp2.__doc__ = 'another sp'
367 try:
368 pygr.Data.Bio.Seq.sp2 = sp2
369 pygr.Data.save()
370 msg = 'failed to catch bad attempt to write to XMLRPC server'
371 raise KeyError(msg)
372 except ValueError:
373 pass
374 def tearDown(self):
375 'halt the test XMLRPC server'
376 self.server.close()
379 if __name__ == '__main__':
380 PygrTestProgram(verbosity=2)