1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 113,
11 -requires_modules => [qw(IO::String
13 HTTP::Request::Common)],
14 -requires_networking => 1);
16 use_ok('Bio::DB::GenBank');
17 use_ok('Bio::DB::GenPept');
18 use_ok('Bio::DB::SwissProt');
19 use_ok('Bio::DB::MeSH');
22 my %expected_lengths = ('NDP_MOUSE' => 131,
49 my ($gb, $seq, $seqio, $seqin, $query);
54 ok $gb = Bio::DB::GenBank->new('-delay'=>0), 'Bio::DB::GenBank';
58 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
59 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Do you have network access? Skipping GenBank tests", 4 if $@;
60 is $seq->length, $expected_lengths{$seq->display_id};
61 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
62 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 3 if $@;
63 is $seq->length, $expected_lengths{$seq->display_id};
64 eval {$seq = $gb->get_Seq_by_version('AF303112.1');};
65 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 2 if $@;
66 is $seq->length, $expected_lengths{$seq->display_id};
67 eval {$seq = $gb->get_Seq_by_gi('405830');};
68 skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 1 if $@;
69 is $seq->length, $expected_lengths{$seq->display_id};
72 $seq = $seqio = undef;
76 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
77 skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@;
79 while (my $s = $seqio->next_seq) {
80 is $s->length, $expected_lengths{$s->display_id};
83 skip('No seqs returned', 4) if !$done;
87 $seq = $seqio = undef;
89 # test the temporary file creation and fasta
90 ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0);
92 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
93 skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
94 # last part of id holds the key
95 is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
96 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
97 skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
98 # last part of id holds the key
99 is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
100 # batch mode requires genbank format
101 $gb->request_format("gb");
102 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
103 skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
105 while (my $s = $seqio->next_seq) {
106 is $s->length, $expected_lengths{$s->display_id};
107 undef $gb; # test the case where the db is gone,
108 # but a temp file should remain until seqio goes away.
111 skip('No seqs returned', 4) if !$done;
115 $seq = $seqio = undef;
117 # test pipeline creation
118 ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', '-delay' => 0);
120 eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
121 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
122 is $seq->length, $expected_lengths{$seq->display_id};
123 eval {$seq = $gb->get_Seq_by_acc('AF303112');};
124 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
125 is $seq->length, $expected_lengths{$seq->display_id};
126 eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
127 skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
129 while (my $s = $seqio->next_seq) {
130 is $s->length, $expected_lengths{$s->display_id};
131 undef $gb; # test the case where the db is gone,
132 # but the pipeline should remain until seqio goes away
135 skip('No seqs returned', 4) if !$done;
139 $seq = $seqio = undef;
141 # test query facility
142 ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
143 '-query' => 'Onchocerca volvulus[Organism]',
144 '-mindate' => '2002/1/1',
145 '-maxdate' => '2002/12/31'), 'Bio::DB::Query::GenBank';
147 cmp_ok $query->count, '>', 0;
148 my @ids = $query->ids;
150 is @ids, $query->count;
151 ok $gb = Bio::DB::GenBank->new('-delay' => 0);
152 eval {$seqio = $gb->get_Stream_by_query($query);};
153 skip "Couldn't connect to complete GenBank query tests. Skipping those tests", 5 if $@;
155 while (my $s = $seqio->next_seq) {
156 is $s->length, $expected_lengths{$s->display_id};
157 undef $gb; # test the case where the db is gone,
158 # but the pipeline should remain until seqio goes away
161 skip('No seqs returned', 5) if !$done;
165 $seq = $seqio = undef;
167 # test query facility (again)
168 ok $query = Bio::DB::Query::GenBank->new('-db' => 'nucleotide',
169 '-ids' => [qw(J00522 AF303112 2981014)]);
171 cmp_ok $query->count, '>', 0;
172 my @ids = $query->ids;
174 is @ids, $query->count;
175 $gb = Bio::DB::GenBank->new('-delay' => 0);
176 eval {$seqio = $gb->get_Stream_by_query($query);};
177 skip "Couldn't connect to complete GenBank query tests. Skipping those tests: $@", 4 if $@;
179 while (my $s = $seqio->next_seq) {
180 is $s->length, $expected_lengths{$s->display_id};
183 skip('No seqs returned', 4) if !$done;
185 $seqio->close(); # the key to preventing errors during make test, no idea why
188 $seq = $seqio = undef;
190 # and yet again, for bug 2133
191 $query = Bio::DB::Query::GenBank->new('-query' => 'AF303112',
192 '-ids' => [qw(J00522 AF303112 2981014)]);
193 is $query->query, 'J00522[PACC]|AF303112[PACC]|2981014[UID]';
195 # test contig retrieval
196 ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gbwithparts');
198 eval {$seq = $gb->get_Seq_by_id('CH402638');};
199 skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@;
200 is $seq->length, $expected_lengths{$seq->display_id};
201 # now to check that postprocess_data in NCBIHelper catches CONTIG...
202 ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gb');
203 eval {$seq = $gb->get_Seq_by_id('CH402638');};
204 skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@;
205 is $seq->length, $expected_lengths{$seq->display_id};
208 $seq = $seqio = undef;
212 ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start => 2, -seq_stop => 7);
214 eval {$seq = $gb->get_Seq_by_acc("A11111");};
215 skip "Couldn't connect to complete GenBank tests. Skipping those tests", 15 if $@;
218 ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -complexity => 0);
219 eval {$seqin = $gb->get_Stream_by_acc("5");};
220 skip "Couldn't connect to complete GenBank tests. Skipping those tests", 13 if $@;
221 @result = (1136, 'dna', 342, 'protein');
222 while ($seq = $seqin->next_seq) {
223 is $seq->length, shift(@result);
224 is $seq->alphabet, shift(@result);
227 # Real batch retrieval using epost/efetch
228 # these tests may change if integrated further into Bio::DB::Gen*
229 # Currently only useful for retrieving GI's via get_seq_stream
230 $gb = Bio::DB::GenBank->new();
231 eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');};
232 skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 8 if $@;
233 my %result = ('M59757' => 12611 ,'X76083'=> 3140, 'J01670'=> 1593);
235 while ($seq = $seqin->next_seq) {
237 my $acc = $seq->accession;
238 ok exists $result{ $acc };
239 is $seq->length, $result{ $acc };
240 delete $result{$acc};
242 skip('No seqs returned', 8) if !$ct;
247 $seq = $seqin = undef;
252 ok $gb = Bio::DB::GenPept->new();
254 eval {$seqin = $gb->get_seq_stream(-uids => [2981015, 1621261, 195055], -mode => 'batch');};
255 skip "Couldn't connect to complete GenPept tests. Skipping those tests", 8 if $@;
256 my %result = ('AAC06201' => 353, 'CAB02640' => 193, 'AAD15290' => 136);
258 while ($seq = $seqin->next_seq) {
260 my $acc = $seq->accession;
261 ok exists $result{ $acc };
262 is $seq->length, $result{ $acc };
263 delete $result{$acc};
265 skip('No seqs returned', 8) if !$ct;
270 $seq = $seqio = undef;
272 ok $gb = Bio::DB::GenPept->new('-delay' => 0);
274 eval {$seq = $gb->get_Seq_by_id('195055');};
275 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 10 if $@;
276 is $seq->length, $expected_lengths{$seq->display_id};
277 eval {$seq = $gb->get_Seq_by_acc('AAC06201');};
278 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 9 if $@;
279 is $seq->length, $expected_lengths{$seq->display_id};
280 eval {$seqio = $gb->get_Stream_by_id([qw(AAC06201 195055)]);};
281 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 8 if $@;
283 while( my $s = $seqio->next_seq ) {
284 is $s->length, $expected_lengths{$s->display_id};
287 skip('No seqs returned', 8) if !$done;
289 # swissprot genpept parsing
290 eval {$seq = $gb->get_Seq_by_acc('2AAA_YEAST');};
291 skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 5 if $@;
292 is $seq->length, $expected_lengths{$seq->display_id};
294 # test dbsource stuff
295 # small chance this might change but hopefully not
296 my @annot = $seq->annotation->get_Annotations('dblink');
297 cmp_ok(scalar(@annot), '>', 31);
298 is $annot[0]->database, 'UniProtKB';
299 is $annot[0]->primary_id, '2AAA_YEAST';
300 is (($seq->annotation->get_Annotations('swissprot_dates'))[0]->value, 'Jul 1, 1993');
303 $seq = $seqio = undef;
308 ok $gb = Bio::DB::SwissProt->new(-retrievaltype =>'pipeline', -delay => 0);
310 eval {$seq = $gb->get_Seq_by_id('YNB3_YEAST');};
311 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 14 if $@;
312 is $seq->length, $expected_lengths{$seq->display_id};
313 is $seq->division, 'YEAST';
315 eval {$seq = $gb->get_Seq_by_acc('P43780');};
316 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 12 if $@;
317 is $seq->length, $expected_lengths{$seq->display_id};
318 eval {$seq = $gb->get_Seq_by_acc('O39869');};
319 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 11 if $@;
320 is $seq->length, $expected_lengths{$seq->accession_number};
321 is $seq->accession_number, 'O39869';
322 is $seq->division, '9PICO';
325 eval {$seq = $gb->get_Seq_by_id('P18584');};
326 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 8 if $@;
327 is $seq->length, $expected_lengths{$seq->display_id};
328 is $seq->display_id, 'DEGP_CHLTR';
329 is $seq->division, 'CHLTR';
331 ok $gb = Bio::DB::SwissProt->new('-retrievaltype' => 'tempfile', '-delay' => 0);
332 eval {$seqio = $gb->get_Stream_by_id(['NDP_MOUSE', 'NDP_HUMAN']);};
333 skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 4 if $@;
334 undef $gb; # testing to see if we can remove gb
335 ok $seq = $seqio->next_seq();
336 is $seq->length, $expected_lengths{$seq->display_id};
337 ok $seq = $seqio->next_seq();
338 is $seq->length, $expected_lengths{$seq->display_id};
341 $seq = $seqio = undef;
344 # Bio::DB::EntrezGene
347 test_skip(-tests => 8, -requires_module => 'Bio::ASN1::EntrezGene');
348 use_ok('Bio::DB::EntrezGene');
349 ok $gb = Bio::DB::EntrezGene->new(-retrievaltype => 'tempfile', -delay => 0);
350 eval {$seqio = $gb->get_Stream_by_id([2,3064]);};
351 skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 6 if $@;
352 $seq = $seqio->next_seq;
353 is $seq->display_id, "A2M";
354 is $seq->accession_number, 2;
355 $seq = $seqio->next_seq;
356 is $seq->display_id, "HTT";
357 is $seq->accession_number, 3064;
358 eval {$seq = $gb->get_Seq_by_id(6099);};
359 skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 2 if $@;
360 is $seq->display_id, "RP";
361 is $seq->accession_number, 6099;
364 $seq = $seqio = undef;
369 ok my $mesh = Bio::DB::MeSH->new();
372 eval {$t = $mesh->get_exact_term('Dietary Fats');};
373 skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 3 if $@;
374 is $t->each_twig(), 2;
375 eval {$t = $mesh->get_exact_term("Sinus Thrombosis, Intracranial");};
376 skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 2 if $@;
377 is $t->description, "Thrombus formation in an intracranial venous sinus, including the superior sagittal, cavernous, lateral, and petrous sinuses. Etiologies include thrombosis due to infection, DEHYDRATION, coagulation disorders (see THROMBOPHILIA), and CRANIOCEREBRAL TRAUMA.";
378 is $t->id, "D012851";
381 $seq = $seqio = undef;