1 # -*-Perl-*- mode (to keep my emacs happy)
4 use vars qw($DEBUG $TESTCOUNT);
6 eval { require Test; };
12 plan tests => $TESTCOUNT;
17 use Bio::SeqIO::MultiFile;
23 my $verbosity = -1; # Set to -1 for release version, so warnings aren't printed
25 my ($str, $seq,$ast,$temp,$mf,$ent,$out); # predeclare variables for strict
26 $str = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","test.fasta"),
27 '-format' => 'Fasta');
30 ok (defined($seq = $str->next_seq()));
32 print "Sequence 1 of 2 from fasta stream:\n", $seq->seq, "\n" if ( $DEBUG);
34 ok($seq->id, 'roa1_drome');
38 $str = Bio::SeqIO->new(-file=> Bio::Root::IO->catfile("t","data","test.raw"), '-format' => 'Raw');
42 ok ($seq = $str->next_seq());
43 print "Sequence 1 of 2 from Raw stream:\n", $seq->seq, "\n\n" if( $DEBUG);
45 ok ($seq = $str->next_seq());
47 print "Sequence 2 of 2 from Raw stream:\n", $seq->seq, $seq->seq, "\n"
52 $str = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","test.gcg"),
57 ok ( $seq = $str->next_seq());
58 print "Sequence 1 of 1 from GCG stream:\n", $seq->seq, "\n" if( $DEBUG);
61 $str = Bio::SeqIO->new('-file'=> ">".Bio::Root::IO->catfile("t","data","gcg.out"),
64 $str->write_seq($seq);
66 unlink(Bio::Root::IO->catfile("t","data","gcg.out"));
69 $str = Bio::SeqIO->new( '-file'=> Bio::Root::IO->catfile("t","data","test.genbank"),
70 '-format' => 'GenBank');
73 $str->verbose($verbosity);
75 ok ( $seq = $str->next_seq() );
76 print "Sequence 1 of 1 from GenBank stream:\n", $seq->seq, "\n" if( $DEBUG);
79 my $strout = Bio::SeqIO->new('-file'=> ">".Bio::Root::IO->catfile("t","data","genbank.out"),
80 '-format' => 'GenBank');
82 $strout->write_seq($seq);
83 $seq = $str->next_seq();
86 unlink(Bio::Root::IO->catfile("t","data","genbank.out"));
93 $ast = Bio::SeqIO->new( '-format' => 'embl' ,
94 '-file' => Bio::Root::IO->catfile("t","data","roa1.dat"));
95 $ast->verbose($verbosity);
96 my $as = $ast->next_seq();
98 ok($as->display_id, 'HSHNCPA1');
99 ok($as->accession_number, 'X79536');
100 ok($as->seq_version, 1);
101 ok($as->desc, 'H.sapiens mRNA for hnRNPcore protein A1');
102 ok($as->molecule, 'RNA');
103 ok($as->alphabet, 'rna');
104 ok(scalar $as->all_SeqFeatures(), 4);
105 ok($as->length, 1198);
106 ok($as->species->binomial(), 'Homo sapiens');
108 $ast = Bio::SeqIO->new( '-format' => 'GenBank' ,
109 '-file' => Bio::Root::IO->catfile("t","data","roa1.genbank"));
110 $ast->verbose($verbosity);
111 $as = $ast->next_seq();
112 ok $as->molecule, 'mRNA';
113 ok $as->alphabet, 'dna';
115 $mf = Bio::SeqIO::MultiFile->new( '-format' => 'Fasta' ,
117 [ Bio::Root::IO->catfile("t","data","multi_1.fa"),
118 Bio::Root::IO->catfile("t","data","multi_2.fa")]);
123 while( $seq = $mf->next_seq() ) {
125 $temp = $seq->display_id;
130 $ast = Bio::SeqIO->new( '-verbosity' => $verbosity,
131 '-format' => 'swiss' ,
132 '-file' => Bio::Root::IO->catfile("t","data","roa1.swiss"));
133 $as = $ast->next_seq();
135 ok($as->id, 'ROA1_HUMAN', "id is ".$as->id);
136 ok($as->primary_id, 'ROA1');
137 ok($as->length, 371);
138 ok($as->alphabet, 'protein');
139 ok($as->division, 'HUMAN');
140 ok(scalar $as->all_SeqFeatures(), 16);
142 ok(scalar $as->annotation->get_Annotations('reference'), 11);
143 ($ent, $seq, $out) = undef;
145 $ent = Bio::SeqIO->new( '-file' => Bio::Root::IO->catfile("t","data","test.embl"),
146 '-format' => 'embl');
148 $seq = $ent->next_seq();
150 ok(defined $seq->seq(), 1,
151 'failure to read Embl with ^ location and badly split double quotes');
152 ok(scalar $seq->annotation->get_Annotations('reference'), 3);
153 $out = Bio::SeqIO->new('-file'=> ">". Bio::Root::IO->catfile("t","data","embl.out"),
154 '-format' => 'embl');
156 ok($out->write_seq($seq),1,
157 'failure to write Embl format with ^ < and > locations');
159 unlink(Bio::Root::IO->catfile("t","data","embl.out"));
162 my $t_file = Bio::Root::IO->catfile("t","data","test.ace");
167 open BEFORE, $t_file;
172 my $a_in = Bio::SeqIO->new( -FILE => $t_file, -FORMAT => 'ace');
174 while (my $a = $a_in->next_seq) {
178 ok @a_seq, 3, 'wrong number of sequence objects';
180 my $esc_name = $a_seq[1]->display_id;
181 ok( $esc_name , 'Name; 4% strewn with \ various / escaped characters',
182 "bad unescaping of characters, $esc_name");
184 ok $a_seq[0]->alphabet, 'protein', 'alphabets incorrectly detected';
185 ok $a_seq[1]->alphabet, 'dna', 'alphabets incorrectly detected';
187 my $o_file = Bio::Root::IO->catfile("t","data","test.out.ace");
188 my $a_out = Bio::SeqIO->new( -FILE => "> $o_file", -FORMAT => 'ace');
190 foreach my $a (@a_seq) {
191 $a_out->write_seq($a) or $a_out_ok = 0;
193 undef($a_out); # Flush to disk
194 ok $a_out_ok,1,'error writing sequence';
206 ok( ($before and $after and ($before eq $after)),1,
207 'test output file differs from input');
210 my $stream = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","test.genbank"),
211 '-format' => 'GenBank');
212 $stream->verbose($verbosity);
217 while($seq = $stream->next_seq()) {
220 ok $seq->display_id(), "HUMBDNF";
221 $species = $seq->species();
222 @cl = $species->classification();
223 ok( $species->binomial(), "Homo sapiens",
224 'species parsing incorrect for genbank');
225 ok( $cl[3] ne $species->genus(), 1,
226 'genus duplicated in genbank parsing');
230 ok $lasts->display_id(), "HUMBETGLOA";
231 my ($ref) = $lasts->annotation->get_Annotations('reference');
232 ok($ref->medline, 94173918);
235 $ent = Bio::SeqIO->new( '-file' => Bio::Root::IO->catfile("t","data","test.embl"),
236 '-format' => 'embl');
237 $ent->verbose($verbosity);
238 $seq = $ent->next_seq();
239 $species = $seq->species();
240 @cl = $species->classification();
241 ok( $cl[3] ne $species->genus(), 1, 'genus duplicated in EMBL parsing');
244 $seq = Bio::SeqIO->new( '-format' => 'GenBank' ,
245 -file => Bio::Root::IO->catfile("t","data","testfuzzy.genbank"));
246 $seq->verbose($verbosity);
247 ok(defined($as = $seq->next_seq()));
249 my @features = $as->all_SeqFeatures();
251 my $lastfeature = pop @features;
253 # this is a split location; the root doesn't have strand
254 ok($lastfeature->strand, undef);
255 my $location = $lastfeature->location;
256 $location->verbose(-1); # silence the warning of undef seq_id()
257 # see above; splitlocs roots do not have a strand really
258 ok($location->strand, undef);
259 ok($location->start, 83202);
260 ok($location->end, 84996);
262 my @sublocs = $location->sub_Location();
265 my $loc = shift @sublocs;
266 ok($loc->start, 83202);
267 ok($loc->end, 83329);
268 ok($loc->strand, -1);
270 $loc = shift @sublocs;
271 ok($loc->start, 84248);
272 ok($loc->end, 84996);
275 $seq = Bio::SeqIO->new( '-format' => 'GenBank' ,
276 -file => ">".Bio::Root::IO->catfile("t","data","genbank.fuzzyout"));
277 $seq->verbose($verbosity);
278 ok($seq->write_seq($as));
279 unlink(Bio::Root::IO->catfile("t","data","genbank.fuzzyout"));
282 my $seqio = Bio::SeqIO->new( '-format' => 'swiss' ,
283 -file => Bio::Root::IO->catfile("t","data","swiss.dat"));
285 ok(defined( $seq = $seqio->next_seq));
287 # more tests to verify we are actually parsing correctly
288 ok($seq->primary_id, 'MA32');
289 ok($seq->display_id, 'MA32_HUMAN');
290 ok($seq->length, 282);
291 ok($seq->division, 'HUMAN');
292 ok($seq->alphabet, 'protein');
293 ok(scalar $seq->all_SeqFeatures(), 2);
296 foreach my $gn ( $seq->annotation->get_Annotations('gene_name') ) {
297 if( $gn->value =~ /SF2/ ) {
303 # test for feature locations like ?..N
304 ok(defined( $seq = $seqio->next_seq));
306 ok($seq->primary_id, 'ACON');
307 ok($seq->display_id, 'ACON_CAEEL');
308 ok($seq->length, 788);
309 ok($seq->division, 'CAEEL');
310 ok($seq->alphabet, 'protein');
311 ok(scalar $seq->all_SeqFeatures(), 5);
314 foreach my $gn ( $seq->annotation->get_Annotations('gene_name') ) {
315 if( $gn->value =~ /F54H12/ ) {
321 # test dos Linefeeds in gcg parser
322 $str = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","test_badlf.gcg"),
326 ok ( $seq = $str->next_seq());
327 ok(length($seq->seq) > 0 );
328 print "Sequence 1 of 1 from GCG stream:\n", $seq->seq, "\n" if( $DEBUG);
331 $str = new Bio::SeqIO(-format => 'genbank',
332 -file => Bio::Root::IO->catfile("t","data","AF165282.gb"),
333 -verbose => $verbosity);
335 $seq = $str->next_seq;
336 @features = $seq->all_SeqFeatures();
338 ok($features[0]->start, 1);
339 ok($features[0]->end, 226);
340 $location = $features[1]->location;
341 ok($location->isa('Bio::Location::SplitLocationI'));
342 @sublocs = $location->sub_Location();
348 $str = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","seqfile.pir"),
351 $strout = new Bio::SeqIO(-format => 'pir', -fh => \*STDOUT);
353 while( $seq = $str->next_seq()) {
354 ok($seq->id, qr /^[PF]1/ );
355 ok($seq->length > 1);
356 $strout->write_seq($seq) if( $verbosity > 0);
359 # test embl writing of a PrimarySeq
361 my $primaryseq = new Bio::PrimarySeq( -seq => 'AGAGAGAGATA',
365 -accession_number => 'myaccession');
367 my $embl = new Bio::SeqIO(-format => 'embl',
368 -verbose => $verbosity -1,
369 -file => ">primaryseq.embl");
371 ok($embl->write_seq($primaryseq));
374 $embl->write_seq($scalar);
378 unlink("primaryseq.embl");
381 # revcomp split location
382 my $gb = new Bio::SeqIO(-format => 'genbank',
383 -file => Bio::Root::IO->catfile(qw(t data revcomp_mrna.gb)));
385 $seq = $gb->next_seq();
387 $gb = new Bio::SeqIO(-format => 'genbank',
388 -file => ">tmp_revcomp_mrna.gb");
390 $gb->write_seq($seq);
392 ok(! -z "tmp_revcomp_mrna.gb");
394 # INSERT DIFFING CODE HERE
396 unlink("tmp_revcomp_mrna.gb");