tests/pygrdata_test.py

   1 import socket, unittest, os, md5, pickle, datetime
   2 import warnings
   3
   4 import testlib
   5 from testlib import testutil, SkipTest, PygrTestProgram
   6 from pygr import seqdb, cnestedlist, mapping
   7 from pygr.downloader import SourceURL, GenericBuilder
   8
   9
  10 warnings.simplefilter("ignore")
  11 import pygr.Data
  12 warnings.simplefilter("default")
  13
  14 try:
  15     set
  16 except NameError:
  17     from sets import Set as set
  18
  19 class TestBase(unittest.TestCase):
  20     "A base class to all pygr.Data test classes"
  21
  22     def setUp(self, pygrDataPath=None, **kwargs):
  23         # overwrite the WORLDBASEPATH environment variable
  24         self.tempdir = testutil.TempDir('pygrdata')
  25         if pygrDataPath is None:
  26             pygrDataPath = self.tempdir.path
  27         pygr.Data.clear_cache() # make sure no old data loaded
  28         pygr.Data.update(pygrDataPath, **kwargs) # use this path
  29         # handy shortcuts
  30         self.EQ = self.assertEqual
  31
  32 class Download_Test(TestBase):
  33     "Save seq db and interval to pygr.Data shelve"
  34
  35     # tested elsewhere as well, on Linux makes gzip ask for permissions
  36     # to overwrite
  37     def test_download(self):
  38         "Downloading of gzipped file using pygr.Data"
  39
  40         url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz')
  41         url.__doc__ = 'test download'
  42
  43         pygr.Data.addResource('Bio.Test.Download1', url)
  44         pygr.Data.save()
  45
  46         # performs the download
  47         fpath = pygr.Data.Bio.Test.Download1()
  48         h = testutil.get_file_md5(fpath)
  49         self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73')
  50         os.remove(fpath)
  51
  52 class GenericBuild_Test(TestBase):
  53
  54     def test_generic_build(self):
  55         "GenericBuilder construction of the BlastDB"
  56
  57         sp_hbb1 = testutil.datafile('sp_hbb1')
  58         gb = GenericBuilder('BlastDB', sp_hbb1)
  59         s = pickle.dumps(gb)
  60         db = pickle.loads(s) # force construction of the BlastDB
  61         self.EQ(len(db), 24)
  62
  63         found = [x for x in db]
  64         found.sort()
  65
  66         expected = ['HBB0_PAGBO', 'HBB1_ANAMI', 'HBB1_CYGMA', 'HBB1_IGUIG',
  67                    'HBB1_MOUSE', 'HBB1_ONCMY', 'HBB1_PAGBO', 'HBB1_RAT',
  68                    'HBB1_SPHPU', 'HBB1_TAPTE', 'HBB1_TORMA', 'HBB1_TRICR',
  69                    'HBB1_UROHA', 'HBB1_VAREX', 'HBB1_XENBO', 'HBB1_XENLA',
  70                    'HBB1_XENTR', 'MYG_DIDMA', 'MYG_ELEMA', 'MYG_ERIEU',
  71                    'MYG_ESCGI', 'MYG_GALCR', 'PRCA_ANASP', 'PRCA_ANAVA']
  72         expected.sort()
  73
  74         self.EQ(expected, found)
  75
  76 class DNAAnnotation_Test(TestBase):
  77
  78     def setUp(self,**kwargs):
  79         TestBase.setUp(self)
  80         dnaseq = testutil.datafile('dnaseq.fasta')
  81         tryannot = testutil.tempdatafile('tryannot')
  82
  83         db = seqdb.BlastDB(dnaseq)
  84         try:
  85             db.__doc__ = 'little dna'
  86
  87             pygr.Data.Bio.Test.dna = db
  88             annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'),
  89                                          2:('seq1',-60,-50,'bob'),
  90                                          3:('seq2',-20,-10,'mary')},
  91                                         db,
  92                                   sliceAttrDict=dict(id=0, start=1, stop=2,
  93                                                      name=3))
  94             annoDB.__doc__ = 'trivial annotation'
  95             pygr.Data.Bio.Test.annoDB = annoDB
  96             nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True,
  97                                       bidirectional=False)
  98             try:
  99                 for annID in annoDB:
 100                     nlmsa.addAnnotation(annoDB[annID])
 101
 102                 nlmsa.build()
 103                 nlmsa.__doc__ = 'trivial map'
 104                 pygr.Data.Bio.Test.map = nlmsa
 105                 pygr.Data.schema.Bio.Test.map = \
 106                        pygr.Data.ManyToManyRelation(db, annoDB,bindAttrs=('exons',))
 107                 pygr.Data.save()
 108                 pygr.Data.clear_cache()
 109             finally:
 110                 nlmsa.close()
 111         finally:
 112             db.close()
 113
 114     def test_annotation(self):
 115         "Annotation test"
 116         db = pygr.Data.Bio.Test.dna()
 117         try:
 118             s1 = db['seq1']
 119             l = s1.exons.keys()
 120             annoDB = pygr.Data.Bio.Test.annoDB()
 121             assert l == [annoDB[1], -(annoDB[2])]
 122             assert l[0].sequence == s1[5:10]
 123             assert l[1].sequence == s1[50:60]
 124             assert l[0].name == 'fred','test annotation attribute access'
 125             assert l[1].name == 'bob'
 126             sneg = -(s1[:55])
 127             l = sneg.exons.keys()
 128             assert l == [annoDB[2][5:], -(annoDB[1])]
 129             assert l[0].sequence == -(s1[50:55])
 130             assert l[1].sequence == -(s1[5:10])
 131             assert l[0].name == 'bob'
 132             assert l[1].name == 'fred'
 133         finally:
 134             db.close() # close SequenceFileDB
 135             pygr.Data.Bio.Test.map().close() # close NLMSA
 136
 137 def populate_swissprot():
 138     "Populate the current pygrData with swissprot data"
 139     # build BlastDB out of the sequences
 140     sp_hbb1 = testutil.datafile('sp_hbb1')
 141     sp = seqdb.BlastDB(sp_hbb1)
 142     sp.__doc__ = 'little swissprot'
 143     pygr.Data.Bio.Seq.Swissprot.sp42 = sp
 144
 145     # also store a fragment
 146     hbb = sp['HBB1_TORMA']
 147     ival= hbb[10:35]
 148     ival.__doc__ = 'fragment'
 149     pygr.Data.Bio.Seq.frag = ival
 150
 151     # build a mapping to itself
 152     m = mapping.Mapping(sourceDB=sp,targetDB=sp)
 153     trypsin = sp['PRCA_ANAVA']
 154     m[hbb] = trypsin
 155     m.__doc__ = 'map sp to itself'
 156     pygr.Data.Bio.Seq.spmap = m
 157
 158     # create an annotation database and bind as exons attribute
 159     pygr.Data.schema.Bio.Seq.spmap = \
 160            pygr.Data.OneToManyRelation(sp, sp, bindAttrs=('buddy',))
 161     annoDB = seqdb.AnnotationDB({1:('HBB1_TORMA',10,50)}, sp,
 162                                 sliceAttrDict=dict(id=0, start=1, stop=2))
 163     exon = annoDB[1]
 164
 165     # generate the names where these will be stored
 166     tempdir = testutil.TempDir('exonAnnot')
 167     filename = tempdir.subfile('cnested')
 168     nlmsa = cnestedlist.NLMSA(filename, 'w', pairwiseMode=True,
 169                               bidirectional=False)
 170     nlmsa.addAnnotation(exon)
 171     nlmsa.build()
 172     annoDB.__doc__ = 'a little annotation db'
 173     nlmsa.__doc__ = 'a little map'
 174     pygr.Data.Bio.Annotation.annoDB = annoDB
 175     pygr.Data.Bio.Annotation.map = nlmsa
 176     pygr.Data.schema.Bio.Annotation.map = \
 177          pygr.Data.ManyToManyRelation(sp, annoDB, bindAttrs=('exons',))
 178
 179 def check_match(self):
 180     frag = pygr.Data.Bio.Seq.frag()
 181     correct = pygr.Data.Bio.Seq.Swissprot.sp42()['HBB1_TORMA'][10:35]
 182     assert frag == correct, 'seq ival should match'
 183     assert frag.__doc__ == 'fragment', 'docstring should match'
 184     assert str(frag) == 'IQHIWSNVNVVEITAKALERVFYVY', 'letters should match'
 185     assert len(frag) == 25, 'length should match'
 186     assert len(frag.path) == 142, 'length should match'
 187
 188     #store = PygrDataTextFile('results/seqdb1.pickle')
 189     #saved = store['hbb1 fragment']
 190     #assert frag == saved, 'seq ival should matched stored result'
 191
 192 def check_dir(self):
 193     expected=['Bio.Annotation.annoDB', 'Bio.Annotation.map',
 194                 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap']
 195     expected.sort()
 196     found = pygr.Data.dir('Bio')
 197     found.sort()
 198     assert found == expected
 199
 200 def check_dir_noargs(self):
 201     found = pygr.Data.dir()
 202     found.sort()
 203     found2 = pygr.Data.dir('')
 204     found2.sort()
 205     assert found == found2
 206
 207 def check_dir_download(self):
 208     found = pygr.Data.dir(download=True)
 209     found.sort()
 210     found2 = pygr.Data.dir('', download=True)
 211     found2.sort()
 212     assert len(found) == 0
 213     assert found == found2
 214
 215 def check_dir_re(self):
 216     expected=['Bio.Annotation.annoDB', 'Bio.Annotation.map',
 217                 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap']
 218     expected.sort()
 219     found = pygr.Data.dir('^Bio', 'r')
 220     found.sort()
 221     assert found == expected
 222
 223     expected = ['Bio.Seq.Swissprot.sp42', 'Bio.Seq.spmap']
 224     expected.sort()
 225     found = pygr.Data.dir('^Bio\..+\.sp', 'r')
 226     found.sort()
 227     assert found == expected
 228
 229 def check_bind(self):
 230     sp = pygr.Data.Bio.Seq.Swissprot.sp42()
 231     hbb = sp['HBB1_TORMA']
 232     trypsin =  sp['PRCA_ANAVA']
 233     assert hbb.buddy == trypsin, 'automatic schema attribute binding'
 234
 235 def check_bind2(self):
 236     sp = pygr.Data.Bio.Seq.Swissprot.sp42()
 237     hbb = sp['HBB1_TORMA']
 238     exons = hbb.exons.keys()
 239     assert len(exons)==1, 'number of expected annotations'
 240     annoDB = pygr.Data.Bio.Annotation.annoDB()
 241     exon = annoDB[1]
 242     assert exons[0] == exon, 'test annotation comparison'
 243     assert exons[0].pathForward is exon,'annotation parent match'
 244     assert exons[0].sequence == hbb[10:50],'annotation to sequence match'
 245     onc = sp['HBB1_ONCMY']
 246     try:
 247         exons = onc.exons.keys()
 248         raise ValueError('failed to catch query with no annotations')
 249     except KeyError:
 250         pass
 251
 252 class Sequence_Test(TestBase):
 253     def setUp(self, *args, **kwargs):
 254         TestBase.setUp(self, *args, **kwargs)
 255         populate_swissprot()
 256         pygr.Data.save() # finally save everything
 257         pygr.Data.clear_cache() # force all requests to reload
 258
 259     def test_match(self):
 260         "Test matching sequences"
 261         check_match(self)
 262
 263     def test_dir(self):
 264         "Test labels"
 265         check_dir(self)
 266         check_dir_noargs(self)
 267         check_dir_re(self)
 268
 269     def test_bind(self):
 270         "Test bind"
 271         check_bind(self)
 272         check_bind2(self)
 273
 274     def test_schema(self):
 275         "Test schema"
 276         sp_hbb1 = testutil.datafile('sp_hbb1')
 277         sp2 = seqdb.BlastDB(sp_hbb1)
 278         sp2.__doc__ = 'another sp'
 279         pygr.Data.Bio.Seq.sp2 = sp2
 280         sp = pygr.Data.Bio.Seq.Swissprot.sp42()
 281         m = mapping.Mapping(sourceDB=sp,targetDB=sp2)
 282         m.__doc__ = 'sp -> sp2'
 283         pygr.Data.Bio.Seq.testmap = m
 284         pygr.Data.schema.Bio.Seq.testmap = pygr.Data.OneToManyRelation(sp, sp2)
 285         pygr.Data.save()
 286
 287         pygr.Data.clear_cache()
 288
 289         sp3 = seqdb.BlastDB(sp_hbb1)
 290         sp3.__doc__ = 'sp number 3'
 291         pygr.Data.Bio.Seq.sp3 = sp3
 292         sp2 = pygr.Data.Bio.Seq.sp2()
 293         m = mapping.Mapping(sourceDB=sp3,targetDB=sp2)
 294         m.__doc__ = 'sp3 -> sp2'
 295         pygr.Data.Bio.Seq.testmap2 = m
 296         pygr.Data.schema.Bio.Seq.testmap2 = pygr.Data.OneToManyRelation(sp3, sp2)
 297         l = pygr.Data.getResource.resourceCache.keys() # list of cached resources
 298         l.sort()
 299         assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
 300         pygr.Data.save()
 301         g = pygr.Data.getResource.writer.storage.graph
 302         expected = set(['Bio.Annotation.annoDB',
 303                      'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3'])
 304         found = set(g.keys())
 305         self.EQ(len(expected - found), 0)
 306
 307 class SQL_Sequence_Test(Sequence_Test):
 308     def setUp(self):
 309         if not testutil.mysql_enabled():
 310             raise SkipTest, "no MySQL installed"
 311
 312         self.dbtable = testutil.temp_table_name() # create temp db tables
 313         Sequence_Test.setUp(self, pygrDataPath='mysql:' + self.dbtable,
 314                             mdbArgs=dict(createLayer='temp'))
 315     def tearDown(self):
 316         testutil.drop_tables(pygr.Data.getResource.writer.storage.cursor,
 317                              self.dbtable)
 318
 319 class InvalidPickle_Test(TestBase):
 320
 321     def setUp(self):
 322         TestBase.setUp(self)
 323         class MyUnpicklableClass(object):
 324             pass
 325         MyUnpicklableClass.__module__ = '__main__'
 326         self.bad = MyUnpicklableClass()
 327
 328         self.good = datetime.datetime.today()
 329
 330     def test_invalid_pickle(self):
 331         "Testing an invalid pickle"
 332         s = pygr.Data.dumps(self.good) # should pickle with no errors
 333         try:
 334             s = pygr.Data.dumps(self.bad) # should raise exception
 335             msg = 'failed to catch bad attempt to invalid module ref'
 336             raise ValueError(msg)
 337         except pygr.Data.WorldbaseNoModuleError:
 338             pass
 339
 340 class XMLRPC_Test(TestBase):
 341     'create an XMLRPC server and access seqdb from it'
 342     def setUp(self):
 343         TestBase.setUp(self)
 344         populate_swissprot() # save some data
 345         pygr.Data.save() # finally save everything
 346         pygr.Data.clear_cache() # force all requests to reload
 347
 348         res = [ 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap',
 349                 'Bio.Annotation.annoDB', 'Bio.Annotation.map' ]
 350         self.server = testutil.TestXMLRPCServer(res, self.tempdir.path)
 351     def test_xmlrpc(self):
 352         "Test XMLRPC"
 353         pygr.Data.clear_cache() # force all requests to reload
 354         pygr.Data.update("http://localhost:%s" % self.server.port)
 355
 356         check_match(self)
 357         check_dir(self)
 358         check_dir_noargs(self)
 359         check_dir_download(self)
 360         check_dir_re(self)
 361         check_bind(self)
 362         check_bind2(self)
 363
 364         sb_hbb1 = testutil.datafile('sp_hbb1')
 365         sp2 = seqdb.BlastDB(sb_hbb1)
 366         sp2.__doc__ = 'another sp'
 367         try:
 368             pygr.Data.Bio.Seq.sp2 = sp2
 369             pygr.Data.save()
 370             msg = 'failed to catch bad attempt to write to XMLRPC server'
 371             raise KeyError(msg)
 372         except ValueError:
 373             pass
 374     def tearDown(self):
 375         'halt the test XMLRPC server'
 376         self.server.close()
 377
 378
 379 if __name__ == '__main__':
 380     PygrTestProgram(verbosity=2)