1 # -*-Perl-*- Test Harness script for Bioperl
7 test_begin(-tests => 301);
8 use_ok('Bio::SeqIO::genbank');
11 my $verbose = test_debug;
13 my $ast = Bio::SeqIO->new(-format => 'genbank' ,
15 -file => test_input_file('roa1.genbank'));
16 isa_ok($ast, 'Bio::SeqIO');
17 $ast->verbose($verbose);
18 my $as = $ast->next_seq;
19 is $as->molecule, 'mRNA',$as->accession_number;
20 is $as->alphabet, 'dna';
21 is $as->division, 'EST';
22 is join(',',$as->get_dates), '27-OCT-1998';
23 is($as->primary_id, 3598416);
24 my @class = $as->species->classification;
25 is $class[$#class],'Eukaryota';
27 $ast = Bio::SeqIO->new(-format => 'genbank',
29 -file => test_input_file('NT_021877.gbk'));
30 $ast->verbose($verbose);
32 is $as->molecule, 'DNA',$as->accession_number;
33 is $as->alphabet, 'dna';
34 is $as->division, 'CON';
35 is join(',',$as->get_dates), '17-OCT-2003';
36 is($as->primary_id, 37539616);
37 is($as->accession_number, 'NT_021877');
39 my ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures;
40 is(($cds->get_tag_values('transl_except'))[1],
41 '(pos:complement(4224..4226),aa:OTHER)');
43 # test for a DBSOURCE line
44 $ast = Bio::SeqIO->new(-format => 'genbank',
46 -file => test_input_file('BAB68554.gb'));
47 $ast->verbose($verbose);
49 is $as->molecule, 'PRT',$as->accession_number;
50 is $as->alphabet, 'protein';
51 is $as->division, 'VRT';
52 is join(',',$as->get_dates), '11-APR-2002';
53 # Though older GenBank releases indicate SOURCE contains only the common name,
54 # this is no longer true. In general, this line will contain an abbreviated
55 # form of the full organism name (but may contain the full length name),
56 # as well as the optional common name and organelle. There is no get/set
57 # for the abbreviated name but it is accessible via name()
58 ok defined($as->species->name('abbreviated')->[0]);
59 is $as->species->name('abbreviated')->[0], 'Aldabra giant tortoise';
60 is($as->primary_id, 15824047);
61 my $ac = $as->annotation;
63 my @dblinks = $ac->get_Annotations('dblink');
64 is(scalar @dblinks,1);
65 is($dblinks[0]->database, 'GenBank');
66 is($dblinks[0]->primary_id, 'AB072353');
67 is($dblinks[0]->version, '1');
68 is($dblinks[0]->display_text, 'GenBank:AB072353.1','operator overloading in AnnotationI is deprecated');
70 # test for multi-line SOURCE
71 $ast = Bio::SeqIO->new(-format => 'genbank',
73 -file => test_input_file('NC_006346.gb'));
75 is $as->species->binomial('FULL'), 'Bolitoglossa n. sp. RLM-2004',$as->accession_number;;
76 @class = $as->species->classification;
77 is($class[$#class],'Eukaryota');
78 is($as->species->common_name,'mushroomtongue salamander');
80 $ast = Bio::SeqIO->new(-format => 'genbank',
82 -file => test_input_file('U71225.gb'));
84 @class = $as->species->classification;
85 is($class[$#class],'Eukaryota',$as->accession_number);
86 is $as->species->common_name,'black-bellied salamander';
88 # test for unusual common name
89 $ast = Bio::SeqIO->new(-format => 'genbank',
91 -file => test_input_file('AB077698.gb'));
93 # again, this is not a common name but is in name('abbreviated')
94 ok defined($as->species->name('abbreviated')->[0]),$as->accession_number;
95 is $as->species->name('abbreviated')->[0],'Homo sapiens cDNA to mRNA';
97 # test for common name with parentheses
98 $ast = Bio::SeqIO->new(-format => 'genbank',
100 -file => test_input_file('DQ018368.gb'));
101 $as = $ast->next_seq;
102 is $as->species->scientific_name,'(Populus tomentosa x P. bolleana) x P. tomentosa var. truncata',
103 $as->accession_number;;
105 # test secondary accessions
106 my $seqio = Bio::SeqIO->new(-format => 'genbank',
107 -verbose => $verbose,
108 -file => test_input_file('D10483.gbk'));
109 my $seq = $seqio->next_seq;
110 my @kw = $seq->get_keywords;
111 is(scalar @kw, 118, $seq->accession_number);
113 my @sec_acc = $seq->get_secondary_accessions;
114 is(scalar @sec_acc,14);
115 is($sec_acc[-1], 'X56742');
118 my $str = Bio::SeqIO->new(-verbose => $verbose,
119 -file => test_input_file('D12555.gbk'));
121 $seq = $str->next_seq;
124 ok(! $@, 'bug 1487');
126 # bug 1647 rpt_unit sub-feature with multiple parens
127 $str = Bio::SeqIO->new(-format => 'genbank',
128 -verbose => $verbose,
129 -file => test_input_file('mini-AE001405.gb'));
130 ok($seq = $str->next_seq);
131 my @rpts = grep { $_->primary_tag eq 'repeat_region' }
132 $seq->get_SeqFeatures;
133 is $#rpts, 2, 'bug 1647';
134 my @rpt_units = grep {$_->has_tag('rpt_unit')} @rpts;
136 is(($rpt_units[0]->get_tag_values('rpt_unit'))[0],'(TG)10;A;(TG)7');
138 # test bug #1673 , RDB-II genbank files
139 $str = Bio::SeqIO->new(-format => 'genbank',
140 -verbose => $verbose,
141 -file => test_input_file('Mcjanrna_rdbII.gbk')
143 ok($seq = $str->next_seq, 'bug 1673');
144 my @refs = $seq->annotation->get_Annotations('reference');
146 is($seq->display_id,'Mc.janrrnA');
147 is($seq->molecule ,'RNA');
148 is $as->division, 'PLN';
149 is join(',',$as->get_dates), '23-MAY-2005';
151 $str = Bio::SeqIO->new(-format => 'genbank',
152 -file => test_input_file('AF165282.gb'),
153 -verbose => $verbose);
154 $seq = $str->next_seq;
155 my @features = $seq->all_SeqFeatures;
156 is(@features, 5, $seq->accession_number);
157 is($features[0]->start, 1);
158 is($features[0]->end, 226);
159 my $location = $features[1]->location;
160 ok($location->isa('Bio::Location::SplitLocationI'));
161 my @sublocs = $location->sub_Location;
164 # version and primary ID - believe it or not, this wasn't working
165 is ($seq->version, 1);
166 is ($seq->seq_version, 1);
167 is ($seq->primary_id, "5734104");
169 # streaming and Bio::RichSeq creation
170 my $stream = Bio::SeqIO->new(-file => test_input_file('test.genbank'),
171 -verbose => $verbose,
172 -format => 'genbank');
173 $stream->verbose($verbose);
178 my @ids = qw(DDU63596 DDU63595 HUMBDNF);
179 my @tids = (44689, 44689, 9606);
180 my @tnames = ("Dictyostelium discoideum",
181 "Dictyostelium discoideum",
183 while($seq = $stream->next_seq) {
185 is $seq->display_id, $ids[$seqnum];
186 $species = $seq->species;
187 @cl = $species->classification;
188 is( $species->binomial, $tnames[$seqnum],
189 'species parsing incorrect for genbank');
190 is( $cl[3] ne $species->genus, 1,
191 'genus duplicated in genbank parsing');
192 is( $species->ncbi_taxid, $tids[$seqnum] );
197 is($seqnum, 5,'streaming');
198 is $lasts->display_id, "HUMBETGLOA";
199 my ($ref) = $lasts->annotation->get_Annotations('reference');
200 is($ref->medline, 94173918);
203 $stream = Bio::SeqIO->new(-file => test_input_file('test.genbank.noseq'),
204 -verbose => $verbose,
205 -format => 'genbank' );
207 while($seq = $stream->next_seq) {
209 is $seq->display_id, $ids[$seqnum];
211 elsif( $seq->display_id eq 'M37762') {
212 is( ($seq->get_keywords)[0], 'neurotrophic factor');
216 is $seqnum, 5, "Total number of sequences in test file";
219 $seq = Bio::SeqIO->new( -file => test_input_file('testfuzzy.genbank'),
220 -format => 'genbank',
221 -verbose => $verbose );
222 ok(defined($as = $seq->next_seq));
224 @features = $as->all_SeqFeatures;
225 is(@features,21,'Fuzzy in');
226 my $lastfeature = pop @features;
227 # this is a split location; the root doesn't have strand
228 is($lastfeature->strand, undef);
229 $location = $lastfeature->location;
230 #$location->verbose(-1); # silence the warning of undef seq_id()
231 # see above; splitlocs roots do not have a strand really
232 is($location->strand, undef);
233 is($location->start, 83202);
234 is($location->end, 84996);
236 @sublocs = $location->sub_Location;
239 my $loc = shift @sublocs;
240 is($loc->start, 83202);
241 is($loc->end, 83329);
242 is($loc->strand, -1);
244 $loc = shift @sublocs;
245 is($loc->start, 84248);
246 is($loc->end, 84996);
249 $seq = Bio::SeqIO->new(-format => 'genbank',
250 -verbose => $verbose,
251 -file => ">" .test_output_file);
252 $seq->verbose($verbose);
253 ok($seq->write_seq($as),'Fuzzy out');
256 $str = Bio::SeqIO->new(-format => 'genbank',
257 -verbose => $verbose,
258 -file => test_input_file('BK000016-tpa.gbk'));
259 $seq = $str->next_seq;
260 ok(defined $seq, $seq->accession_number);
261 ok(defined $seq->seq);
262 is($seq->accession_number, 'BK000016',$seq->accession_number);
263 is($seq->alphabet, 'dna');
264 is($seq->display_id, 'BK000016');
265 is($seq->length, 1162);
266 is($seq->division, 'ROD');
267 is($seq->get_dates, 1);
268 is($seq->keywords, 'Third Party Annotation; TPA');
269 is($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
270 is($seq->seq_version, 1);
271 is($seq->feature_count, 2);
272 my $spec_obj = $seq->species;
273 is ($spec_obj->common_name, 'house mouse');
274 is ($spec_obj->species, 'musculus');
275 is ($spec_obj->genus, 'Mus');
276 is ($spec_obj->binomial, 'Mus musculus');
277 $ac = $seq->annotation;
278 my $reference = ($ac->get_Annotations('reference') )[0];
279 is ($reference->pubmed, '11479594');
280 is ($reference->medline, '21372465',$seq->accession_number);
282 # validate that what is written is what is read
283 my $testfile = test_output_file;
284 my $out = Bio::SeqIO->new(-file => ">$testfile",
285 -format => 'genbank');
286 $out->write_seq($seq);
289 $str = Bio::SeqIO->new(-format => 'genbank',
291 $seq = $str->next_seq;
292 ok(defined $seq,'roundtrip');
293 ok(defined $seq->seq);
294 is($seq->accession_number, 'BK000016');
295 is($seq->alphabet, 'dna');
296 is($seq->display_id, 'BK000016');
297 is($seq->length, 1162);
298 is($seq->division, 'ROD');
299 is($seq->get_dates, 1);
300 is($seq->keywords, 'Third Party Annotation; TPA');
301 is($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
302 is($seq->seq_version, 1);
303 is($seq->feature_count, 2);
304 $spec_obj = $seq->species;
305 is ($spec_obj->common_name, 'house mouse');
306 is ($spec_obj->species, 'musculus');
307 is ($spec_obj->genus, 'Mus');
308 is ($spec_obj->binomial, 'Mus musculus');
309 $ac = $seq->annotation;
310 $reference = ($ac->get_Annotations('reference') )[0];
311 is ($reference->pubmed, '11479594');
312 is ($reference->medline, '21372465');
314 # write revcomp split location
315 my $gb = Bio::SeqIO->new(-format => 'genbank',
316 # This sequence has an odd LOCUS line which sets off a warning, setting
318 # The newest Ensembl seq lacks this. Maybe update? cjfields 6-5-07
319 -verbose => $verbose ? $verbose : -1,
320 -file => test_input_file('revcomp_mrna.gb'));
321 $seq = $gb->next_seq;
323 $gb = Bio::SeqIO->new(-format => 'genbank',
324 -file => ">$testfile");
326 $gb->write_seq($seq);
328 ok(! -z $testfile, 'revcomp split location');
330 # bug 1925, continuation of long ORGANISM line ends up in @classification:
331 # ORGANISM Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
333 # Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
334 # Enterobacteriaceae; Salmonella.
335 $gb = Bio::SeqIO->new(-format => 'genbank',
336 -verbose => $verbose,
337 -file => test_input_file('NC_006511-short.gbk'));
338 $seq = $gb->next_seq;
339 is $seq->species->common_name, undef, "Bug 1925";
340 is $seq->species->scientific_name, "Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150";
341 @class = $seq->species->classification;
342 is $class[$#class], "Bacteria";
345 $gb = Bio::SeqIO->new(-format => 'genbank',
346 -verbose => $verbose,
347 -file => test_input_file('O_sat.wgs'));
348 $seq = $gb->next_seq;
350 my @tests = ('wgs' => 'AAAA02000001-AAAA02050231',
351 'wgs_scafld' => 'CM000126-CM000137',
352 'wgs_scafld' => 'CH398081-CH401163');
354 my @wgs = map {$seq->annotation->get_Annotations(lc($_))} qw(WGS WGS_SCAFLD);
359 my ($tagname, $value) = (shift @tests, shift @tests);
360 is($wgs->tagname, $tagname, $tagname);
361 is($wgs->value, $value);
367 # make sure we can retrieve a feature with a primary tag of 'misc_difference'
368 $gb = Bio::SeqIO->new(-format => 'genbank',
369 -verbose => $verbose,
370 -file => test_input_file('BC000007.gbk'));
371 $seq = $gb->next_seq;
372 ($cds) = grep { $_->primary_tag eq 'misc_difference' } $seq->get_SeqFeatures;
373 my @vals = $cds->get_tag_values('gene');
374 is $vals[0], 'PX19', $seq->accession_number;
376 # Check that the source,organism section is identical between input and output.
377 # - test an easy one where organism is species, then two different formats of
378 # subspecies, then a species with a format that used to be mistaken for
379 # subspecies, then a bacteria with no genus, and finally a virus with a genus.
381 # These tests are now somewhat out-of-date since we are moving to a Bio::Taxon-
382 # based system for verifying taxonomic information. Right now they just verify
383 # changes so are really useless; I will change them to verify common name,
384 # organelle, scientific name, etc.
386 my $outfile = test_output_file;
388 # output always adds a period (GenBank std), but two of these files do not use them.
390 foreach my $in ('BK000016-tpa.gbk', 'ay116458.gb', 'ay149291.gb', 'NC_006346.gb', 'ay007676.gb', 'dq519393.gb') {
391 my $infile = test_input_file($in);
393 $str = Bio::SeqIO->new(-format => 'genbank',
394 -verbose => $verbose,
396 $seq = $str->next_seq;
398 $out = Bio::SeqIO->new(-file => ">$outfile",
399 -format => 'genbank');
400 $out->write_seq($seq);
403 open my $IN, '<', $infile or die "Could not read file '$infile': $!\n";
407 open my $RESULT, '<', $outfile or die "Could not read file '$outfile': $!\n";
413 while (my $result = <$RESULT>) {
414 if ($result =~ /^KEYWORDS/) {
419 if ($result =~ /^REFERENCE/) {
424 # end periods don't count (not all input files have them)
426 $in[$line] =~ s{\.$}{};
428 if ($result ne $in[$line]) {
442 # NB: there should probably be full testing on all lines to ensure that output
445 # 20061117: problem with *double* colon in some annotation-dblink values
448 foreach my $in ('P35527.gb') {
449 my $infile = test_input_file($in);
450 $str = Bio::SeqIO->new(-format => 'genbank',
451 -verbose => $verbose,
453 $seq = $str->next_seq;
455 my $ac = $seq->annotation; # Bio::AnnotationCollection
456 foreach my $key ($ac->get_all_annotation_keys ) {
457 my @values = $ac->get_Annotations($key);
458 foreach my $ann (@values) {
459 my $value = $ann->display_text;
461 if ($key eq 'dblink') {
462 ok (index($value,'::') < 0); # this should never be true
463 ok ($value, $value); # check value is not empty
465 # print " ann/", sprintf('%12s ',$key), '>>>', $value , '<<<', "\n";
466 # print " index double colon: ",index($value ,'::'), "\n";
469 my @parts = split(/:/,$value);
470 if ( $parts[0] =~ /^(?:
471 # not an exhaustive list of databases;
472 # just the db's referenced in P35527.gb:
473 swissprot | GenBank | GenPept | HSSP | IntAct | Ensembl | KEGG
474 | HGNC | MIM | ArrayExpress | GO | InterPro | Pfam | PRINTS
482 ok ( $parts[1], "$parts[0]" );
492 $str = Bio::SeqIO->new(-format => 'genbank',
493 -verbose => $verbose,
494 -file => test_input_file('AF305198.gb')
497 $species = $str->next_seq->species;
499 is($species->scientific_name, 'Virginia creeper phytoplasma', 'Bug 2195');
500 is(join(', ',$species->classification),
501 'Virginia creeper phytoplasma, 16SrV (Elm yellows group), '
502 . 'Candidatus Phytoplasma, Acholeplasmataceae, Acholeplasmatales, '
503 . 'Mollicutes, Firmicutes, Bacteria',
506 # bug 2569, PROJECT line support, read and write, round-tripping
508 $str = Bio::SeqIO->new(-format => 'genbank',
509 -verbose => $verbose,
510 -file => test_input_file('NC_008536.gb'));
511 $seq = $str->next_seq;
513 my $project = ($seq->annotation->get_Annotations('project'))[0];
514 isa_ok($project, 'Bio::Annotation::SimpleValue');
517 is($project->value, 'GenomeProject:12638');
520 ok(0, "PROJECT not parsed");
523 $outfile = test_output_file;
524 $gb = Bio::SeqIO->new(-format => 'genbank',
525 -verbose => $verbose,
526 -file => ">$outfile");
527 $gb->write_seq($seq);
529 $str = Bio::SeqIO->new(-format => 'genbank',
530 -verbose => $verbose,
532 $seq = $str->next_seq;
534 $project = ($seq->annotation->get_Annotations('project'))[0];
535 isa_ok($project, 'Bio::Annotation::SimpleValue');
538 is($project->value, 'GenomeProject:12638');
541 ok(0, "Roundtrip test failed");
544 # test for swissprot/UniProt/UniProtKB DBSOURCE line (Bug : RT 44536)
545 $ast = Bio::SeqIO->new(-format => 'genbank',
546 -verbose => $verbose,
547 -file => test_input_file('P39765.gb'));
548 $as = $ast->next_seq;
549 is $as->molecule, 'PRT',$as->accession_number;
550 is $as->division, 'BCT',$as->accession_number;
551 is join(',',$as->get_dates), '03-MAR-2009',$as->accession_number;
552 is $as->alphabet, 'protein';
553 # Though older GenBank releases indicate SOURCE contains only the common name,
554 # this is no longer true. In general, this line will contain an abbreviated
555 # form of the full organism name (but may contain the full length name),
556 # as well as the optional common name and organelle. There is no get/set
557 # for the abbreviated name but it is accessible via name()
558 ok defined($as->species->name('abbreviated')->[0]);
559 is $as->species->name('abbreviated')->[0], 'Bacillus subtilis';
560 is($as->primary_id, 20141743);
561 $ac = $as->annotation;
563 @dblinks = $ac->get_Annotations('dblink');
564 is(scalar @dblinks,31);
565 is($dblinks[0]->database, 'UniProtKB');
566 is($dblinks[0]->primary_id, 'PYRR_BACSU');
567 is($dblinks[0]->version, undef);
568 is($dblinks[0]->display_text, 'UniProtKB:PYRR_BACSU','operator overloading in AnnotationI is deprecated');
570 #bug 2982 embl/genbank contig handling
572 $ast = Bio::SeqIO->new( -file => test_input_file('bug2982.gb'),
573 -format => 'genbank' );
574 $seq = $ast->next_seq;
576 ok my @ctg = $seq->annotation->get_Annotations('contig');
577 like $ctg[0]->value, qr/join\(.*?gap.*?complement/;
579 # write_seq() and FTHelper duplicate specific tags, need to check a round-trip
580 $ast = Bio::SeqIO->new(-format => 'genbank' ,
581 -verbose => $verbose,
582 -file => test_input_file('singlescore.gbk'));
583 $as = $ast->next_seq;
584 ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures;
585 my @notes = $cds->get_tag_values('note');
586 is(scalar @notes, 2);
587 $testfile = test_output_file;
588 $out = Bio::SeqIO->new(-file => ">$testfile",
589 -format => 'genbank');
590 $out->write_seq($as);
592 $ast = Bio::SeqIO->new(-format => 'genbank' ,
593 -verbose => $verbose,
594 -file => $testfile );
595 $as = $ast->next_seq;
596 ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures;
597 @notes = $cds->get_tag_values('note');
598 is(scalar @notes, 2);
602 my $in = Bio::SeqIO->new(-format => 'genbank',
603 -file => test_input_file('NC_002058_multDBLINK_bug3375.gb'));
604 $seq = $in->next_seq; # should not throw a warning now
605 @dblinks = $seq->annotation->get_Annotations('dblink'); # contains 5 dblink references
606 # testing DBLINK BioProject: PRJNA15288
607 is($dblinks[0]->database, 'BioProject', 'bug3375 database is BioProject');
608 is($dblinks[0]->primary_id, 'PRJNA15288', 'bug3375 primary_id is PRJNA15288');
609 # testing DBLINK Project:100,200,300
610 is($dblinks[3]->database, 'Project');
611 is($dblinks[3]->primary_id, '300');
612 # testing DBLINK NC_002058.3
613 is($dblinks[4]->database, 'GenBank');
614 is($dblinks[4]->primary_id, 'NC_002058');
615 is($dblinks[4]->version, '3');
617 # long labels handled
619 # Create sequence with feature with a long label qualifier
620 my $seq=Bio::Seq->new(-seq => 'actg',
622 my $feature=Bio::SeqFeature::Generic->new(-primary=>'CDS', -start=>1, -end=>4);
623 my $label='1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r';
624 $feature->add_tag_value(label => $label);
625 $seq->add_SeqFeature($feature);
629 open my $str_fh, '>', \$string or skip("Could not write string, skipping", 2);
630 my $out = Bio::SeqIO->new(-format => 'genbank',
632 $out->write_seq($seq);
635 my $in = Bio::SeqIO->new(-format => 'genbank',
637 my $genbank = $in->next_seq;
638 my ($read_feature) = $genbank->get_SeqFeatures;
639 my ($read_label) = $read_feature->get_tag_values('label');
640 is($read_label, $label, 'Label is the same');
644 $in = Bio::SeqIO->new(-format => 'genbank',
645 -file => test_input_file('YP_007988852.gp'),
646 -verbose => $verbose);
647 $seq = $in->next_seq; # should not throw a warning now
648 is($seq->length, 205);
650 my @anns = $seq->annotation->get_Annotations('contig');
652 isa_ok($anns[0], 'Bio::Annotation::SimpleValue');
653 is($anns[0]->value, 'join(WP_015639704.1:1..205)');
655 is($seq->seq, 'MENRKFGYIRVSSKDQNEGRQLEAMRKIGITERDIYLDKQSGKNFERANYQLLKRIIRKGDI'
656 . 'LYIHSLDRFGRNKEEILQEWNDLTKNIEADIVVLDMPLLDTTQYKDSMGTFIADLVLQILSWMAEEERERIRK'
657 . 'RQREGIDLALQNGIQFGRSPVVVSDEFKEVYRKWKAKELTAVEAMQEAGVKKTSFYKLVKAHENSIKVNS');
659 # Genbank files with CONTIG and sequence should print the sequence with write_seq()
660 $testfile = test_output_file;
661 $out = Bio::SeqIO->new(-file => ">$testfile",
662 -format => 'genbank');
663 $out->write_seq($seq);
666 $in = Bio::SeqIO->new(-file => $testfile,
667 -format => 'genbank',
668 -verbose => $verbose);
669 $seq = $in->next_seq;
670 is($seq->length, 205);
672 @anns = $seq->annotation->get_Annotations('contig');
674 isa_ok($anns[0], 'Bio::Annotation::SimpleValue');
675 is($anns[0]->value, 'join(WP_015639704.1:1..205)');
677 is($seq->seq, 'MENRKFGYIRVSSKDQNEGRQLEAMRKIGITERDIYLDKQSGKNFERANYQLLKRIIRKGDI'
678 . 'LYIHSLDRFGRNKEEILQEWNDLTKNIEADIVVLDMPLLDTTQYKDSMGTFIADLVLQILSWMAEEERERIRK'
679 . 'RQREGIDLALQNGIQFGRSPVVVSDEFKEVYRKWKAKELTAVEAMQEAGVKKTSFYKLVKAHENSIKVNS');
681 $seq = Bio::SeqIO->new(-format => 'genbank',
682 -file => test_input_file('YP_007988852.gp') )->next_seq;
683 @features = $seq->remove_SeqFeatures;
684 is $#features, 10, 'Got 11 features';
686 $seq = Bio::SeqIO->new(-format => 'genbank',
687 -file => test_input_file('YP_007988852.gp') )->next_seq;
688 @features = $seq->remove_SeqFeatures('CDS');
689 is $#features, 0, 'Got 1 feature';
690 is $features[0]->primary_tag, 'CDS', 'Correct primary tag for feature';
691 @features = $seq->remove_SeqFeatures;
692 is $#features, 9, 'Got 10 features';
694 # Handle Structured Comments in COMMENT section
695 $seq = Bio::SeqIO->new(-format => 'genbank',
696 -file => test_input_file('KF527485.gbk') )->next_seq;
697 my $comment = ($seq->get_Annotations('comment') )[0];
698 is($comment->as_text, "Comment:
699 ##Assembly-Data-START##
700 Assembly Method :: Lasergene v. 10
701 Sequencing Technology :: ABI37XL; Sanger dideoxy sequencing
702 ##Assembly-Data-END##",
703 "Got correct Structured Comment");
705 $seq = Bio::SeqIO->new(-format => 'genbank',
706 -file => test_input_file('HM138502.gbk') )->next_seq;
707 $comment = ($seq->get_Annotations('comment') )[0];
708 ok( $comment->as_text
709 =~ /^Comment: Swine influenza A \(H1N1\) virus isolated during human swine flu outbreak of 2009/,
710 "Got correct Structured Comment"
712 ok( $comment->as_text =~ /^##GISAID_EpiFlu\(TM\)Data-START##/m,
713 "Got correct Structured Comment" );
714 ok( $comment->as_text =~ /^Subtype :: H1N1/m,
715 "Got correct Structured Comment"
717 ok( $comment->as_text =~ /^##GISAID_EpiFlu\(TM\)Data-END##/m,
718 "Got correct Structured Comment" );