1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 116,
11 -requires_modules => [qw(IO::String
13 HTTP::Request::Common)],
14 -requires_networking => 1);
16 use_ok('Bio::DB::GenBank');
17 use_ok('Bio::DB::GenPept');
18 use_ok('Bio::DB::SwissProt');
19 use_ok('Bio::DB::GDB');
20 use_ok('Bio::DB::MeSH');
23 my %expected_lengths = ('NDP_MOUSE' => 131,
50 my ($gb, $seq, $seqio, $seqin, $query);
55 ok $gb = Bio::DB::GenBank->new('-delay'=>0), 'Bio::DB::GenBank';
59 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
60 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Do you have network access? Skipping GenBank tests", 4 if $@;
61 is $seq->length, $expected_lengths{$seq->display_id};
62 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
63 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 3 if $@;
64 is $seq->length, $expected_lengths{$seq->display_id};
65 eval {$seq = $gb->get_Seq_by_version('AF303112.1');};
66 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 2 if $@;
67 is $seq->length, $expected_lengths{$seq->display_id};
68 eval {$seq = $gb->get_Seq_by_gi('405830');};
69 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 1 if $@;
70 is $seq->length, $expected_lengths{$seq->display_id};
73 $seq = $seqio = undef;
77 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
78 skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@;
80 while (my $s = $seqio->next_seq) {
81 is $s->length, $expected_lengths{$s->display_id};
84 skip('No seqs returned', 4) if !$done;
88 $seq = $seqio = undef;
90 # test the temporary file creation and fasta
91 ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0);
93 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
94 skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
95 # last part of id holds the key
96 is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
97 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
98 skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
99 # last part of id holds the key
100 is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
101 # batch mode requires genbank format
102 $gb->request_format("gb");
103 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
104 skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
106 while (my $s = $seqio->next_seq) {
107 is $s->length, $expected_lengths{$s->display_id};
108 undef $gb; # test the case where the db is gone,
109 # but a temp file should remain until seqio goes away.
112 skip('No seqs returned', 4) if !$done;
116 $seq = $seqio = undef;
118 # test pipeline creation
119 ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', '-delay' => 0);
121 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
122 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
123 is $seq->length, $expected_lengths{$seq->display_id};
124 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
125 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
126 is $seq->length, $expected_lengths{$seq->display_id};
127 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
128 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
130 while (my $s = $seqio->next_seq) {
131 is $s->length, $expected_lengths{$s->display_id};
132 undef $gb; # test the case where the db is gone,
133 # but the pipeline should remain until seqio goes away
136 skip('No seqs returned', 4) if !$done;
140 $seq = $seqio = undef;
142 # test query facility
143 ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
144 '-query' => 'Onchocerca volvulus[Organism]',
145 '-mindate' => '2002/1/1',
146 '-maxdate' => '2002/12/31'), 'Bio::DB::Query::GenBank';
148 cmp_ok $query->count, '>', 0;
149 my @ids = $query->ids;
151 is @ids, $query->count;
152 ok $gb = Bio::DB::GenBank->new('-delay' => 0);
153 eval {$seqio = $gb->get_Stream_by_query($query);};
154 skip "Couldn't connect to complete GenBank query tests. Skipping those tests", 5 if $@;
156 while (my $s = $seqio->next_seq) {
157 is $s->length, $expected_lengths{$s->display_id};
158 undef $gb; # test the case where the db is gone,
159 # but the pipeline should remain until seqio goes away
162 skip('No seqs returned', 5) if !$done;
166 $seq = $seqio = undef;
168 # test query facility (again)
169 ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
170 '-ids' => [qw(J00522 AF303112 2981014)]);
172 cmp_ok $query->count, '>', 0;
173 my @ids = $query->ids;
175 is @ids, $query->count;
176 $gb = Bio::DB::GenBank->new('-delay' => 0);
177 eval {$seqio = $gb->get_Stream_by_query($query);};
178 skip "Couldn't connect to complete GenBank query tests. Skipping those tests: $@", 4 if $@;
180 while (my $s = $seqio->next_seq) {
181 is $s->length, $expected_lengths{$s->display_id};
184 skip('No seqs returned', 4) if !$done;
186 $seqio->close(); # the key to preventing errors during make test, no idea why
189 $seq = $seqio = undef;
191 # and yet again, for bug 2133
192 $query = Bio::DB::Query::GenBank->new('-query' => 'AF303112',
193 '-ids' => [qw(J00522 AF303112 2981014)]);
194 is $query->query, 'J00522[PACC]|AF303112[PACC]|2981014[UID]';
196 # test contig retrieval
197 ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gbwithparts');
199 eval {$seq = $gb->get_Seq_by_id('CH402638');};
200 skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@;
201 is $seq->length, $expected_lengths{$seq->display_id};
202 # now to check that postprocess_data in NCBIHelper catches CONTIG...
203 ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gb');
204 eval {$seq = $gb->get_Seq_by_id('CH402638');};
205 skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@;
206 is $seq->length, $expected_lengths{$seq->display_id};
209 $seq = $seqio = undef;
213 ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start => 2, -seq_stop => 7);
215 eval {$seq = $gb->get_Seq_by_acc("A11111");};
216 skip "Couldn't connect to complete GenBank tests. Skipping those tests", 15 if $@;
219 ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -complexity => 0);
220 eval {$seqin = $gb->get_Stream_by_acc("5");};
221 skip "Couldn't connect to complete GenBank tests. Skipping those tests", 13 if $@;
222 @result = (1136, 'dna', 342, 'protein');
223 while ($seq = $seqin->next_seq) {
224 is $seq->length, shift(@result);
225 is $seq->alphabet, shift(@result);
228 # Real batch retrieval using epost/efetch
229 # these tests may change if integrated further into Bio::DB::Gen*
230 # Currently only useful for retrieving GI's via get_seq_stream
231 $gb = Bio::DB::GenBank->new();
232 eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');};
233 skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 8 if $@;
234 my %result = ('M59757' => 12611 ,'X76083'=> 3140, 'J01670'=> 1593);
236 while ($seq = $seqin->next_seq) {
238 my $acc = $seq->accession;
239 ok exists $result{ $acc };
240 is $seq->length, $result{ $acc };
241 delete $result{$acc};
243 skip('No seqs returned', 8) if !$ct;
248 $seq = $seqin = undef;
253 ok $gb = Bio::DB::GenPept->new();
255 eval {$seqin = $gb->get_seq_stream(-uids => [2981015, 1621261, 195055], -mode => 'batch');};
256 skip "Couldn't connect to complete GenPept tests. Skipping those tests", 8 if $@;
257 my %result = ('AAC06201' => 353, 'CAB02640' => 193, 'AAD15290' => 136);
259 while ($seq = $seqin->next_seq) {
261 my $acc = $seq->accession;
262 ok exists $result{ $acc };
263 is $seq->length, $result{ $acc };
264 delete $result{$acc};
266 skip('No seqs returned', 8) if !$ct;
271 $seq = $seqio = undef;
273 ok $gb = Bio::DB::GenPept->new('-delay' => 0);
275 eval {$seq = $gb->get_Seq_by_id('195055');};
276 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 10 if $@;
277 is $seq->length, $expected_lengths{$seq->display_id};
278 eval {$seq = $gb->get_Seq_by_acc('AAC06201');};
279 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 9 if $@;
280 is $seq->length, $expected_lengths{$seq->display_id};
281 eval {$seqio = $gb->get_Stream_by_id([qw(AAC06201 195055)]);};
282 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 8 if $@;
284 while( my $s = $seqio->next_seq ) {
285 is $s->length, $expected_lengths{$s->display_id};
288 skip('No seqs returned', 8) if !$done;
290 # swissprot genpept parsing
291 eval {$seq = $gb->get_Seq_by_acc('2AAA_YEAST');};
292 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 5 if $@;
293 is $seq->length, $expected_lengths{$seq->display_id};
295 # test dbsource stuff
296 # small chance this might change but hopefully not
297 my @annot = $seq->annotation->get_Annotations('dblink');
298 cmp_ok(scalar(@annot), '>', 31);
299 is $annot[0]->database, 'swissprot';
300 is $annot[0]->primary_id, '2AAA_YEAST';
301 is (($seq->annotation->get_Annotations('swissprot_dates'))[0]->value, 'Jul 1, 1993');
304 $seq = $seqio = undef;
309 ok $gb = Bio::DB::SwissProt->new(-retrievaltype =>'pipeline', -delay => 0);
311 eval {$seq = $gb->get_Seq_by_id('YNB3_YEAST');};
312 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 14 if $@;
313 is $seq->length, $expected_lengths{$seq->display_id};
314 is $seq->division, 'YEAST';
316 eval {$seq = $gb->get_Seq_by_acc('P43780');};
317 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 12 if $@;
318 is $seq->length, $expected_lengths{$seq->display_id};
319 eval {$seq = $gb->get_Seq_by_acc('O39869');};
320 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 11 if $@;
321 is $seq->length, $expected_lengths{$seq->accession_number};
322 is $seq->accession_number, 'O39869';
323 is $seq->division, '9PICO';
326 eval {$seq = $gb->get_Seq_by_id('P18584');};
327 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 8 if $@;
328 is $seq->length, $expected_lengths{$seq->display_id};
329 is $seq->display_id, 'DEGP_CHLTR';
330 is $seq->division, 'CHLTR';
332 ok $gb = Bio::DB::SwissProt->new('-retrievaltype' => 'tempfile', '-delay' => 0);
333 eval {$seqio = $gb->get_Stream_by_id(['NDP_MOUSE', 'NDP_HUMAN']);};
334 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 4 if $@;
335 undef $gb; # testing to see if we can remove gb
336 ok $seq = $seqio->next_seq();
337 is $seq->length, $expected_lengths{$seq->display_id};
338 ok $seq = $seqio->next_seq();
339 is $seq->length, $expected_lengths{$seq->display_id};
342 $seq = $seqio = undef;
347 ok my $gdb = Bio::DB::GDB->new();
350 eval {$info = $gdb->get_info(-type => 'marker', -id => 'D1S243');};
351 skip "Couldn't connect to GDB with Bio::DB::GDB.pm. Skipping those tests", 1 if $@;
352 is $info->{gdbid}, 'GDB:188393';
356 # Bio::DB::EntrezGene
359 test_skip(-tests => 8, -requires_module => 'Bio::ASN1::EntrezGene');
360 use_ok('Bio::DB::EntrezGene');
361 ok $gb = Bio::DB::EntrezGene->new(-retrievaltype => 'tempfile', -delay => 0);
362 eval {$seqio = $gb->get_Stream_by_id([2,3064]);};
363 skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 6 if $@;
364 $seq = $seqio->next_seq;
365 is $seq->display_id, "A2M";
366 is $seq->accession_number, 2;
367 $seq = $seqio->next_seq;
368 is $seq->display_id, "HTT";
369 is $seq->accession_number, 3064;
370 eval {$seq = $gb->get_Seq_by_id(6099);};
371 skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 2 if $@;
372 is $seq->display_id, "RP";
373 is $seq->accession_number, 6099;
376 $seq = $seqio = undef;
381 ok my $mesh = Bio::DB::MeSH->new();
384 eval {$t = $mesh->get_exact_term('Dietary Fats');};
385 skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 3 if $@;
386 is $t->each_twig(), 2;
387 eval {$t = $mesh->get_exact_term("Sinus Thrombosis, Intracranial");};
388 skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 2 if $@;
389 is $t->description, "Thrombus formation in an intracranial venous sinus, including the superior sagittal, cavernous, lateral, and petrous sinuses. Etiologies include thrombosis due to infection, DEHYDRATION, coagulation disorders (see THROMBOPHILIA), and CRANIOCEREBRAL TRAUMA.";
390 is $t->id, "D012851";
393 $seq = $seqio = undef;