t/SeqIO/Handler.t

   1 # -*-Perl-*- Test Harness script for Bioperl
   2 # $Id$
   3
   4 use strict;
   5
   6 BEGIN {
   7     use lib '.';
   8     use Bio::Root::Test;
   9
  10     test_begin(-tests           => 561,
  11                -requires_module => 'Data::Stag');
  12
  13     use_ok('Bio::SeqIO');
  14 }
  15
  16
  17 my $verbose = test_debug();
  18
  19 ################################## GenBank ##################################
  20
  21 my $ast = Bio::SeqIO->new(-format => 'gbdriver' ,
  22                         -verbose => $verbose,
  23                         -file => test_input_file("roa1.genbank"));
  24 $ast->verbose($verbose);
  25 my $as = $ast->next_seq();
  26 is $as->molecule, 'mRNA',$as->accession_number;
  27 is $as->alphabet, 'dna';
  28 is($as->primary_id, 3598416);
  29 my @class = $as->species->classification;
  30 is $class[$#class],'Eukaryota';
  31
  32 $ast = Bio::SeqIO->new(-format => 'gbdriver',
  33                               -verbose => $verbose,
  34                        -file => test_input_file("NT_021877.gbk"));
  35 $ast->verbose($verbose);
  36 $as = $ast->next_seq();
  37 is $as->molecule, 'DNA',$as->accession_number;
  38 is $as->alphabet, 'dna';
  39 is($as->primary_id, 37539616);
  40 is($as->accession_number, 'NT_021877');
  41
  42 my ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures();
  43 is(($cds->get_tag_values('transl_except'))[1],
  44    '(pos:complement(4224..4226),aa:OTHER)');
  45
  46 # test for a DBSOURCE line
  47 $ast = Bio::SeqIO->new(-format => 'gbdriver',
  48                               -verbose => $verbose,
  49                        -file => test_input_file("BAB68554.gb"));
  50 $ast->verbose($verbose);
  51 $as = $ast->next_seq();
  52 is $as->molecule, 'linear',$as->accession_number;;
  53 is $as->alphabet, 'protein';
  54 # Though older GenBank releases indicate SOURCE contains only the common name,
  55 # this is no longer true.  In general, this line will contain an abbreviated
  56 # form of the full organism name (but may contain the full length name),
  57 # as well as the optional common name and organelle.  There is no get/set
  58 # for the abbreviated name but it is accessible via name()
  59 ok defined($as->species->name('abbreviated')->[0]);
  60 is $as->species->name('abbreviated')->[0], 'Aldabra giant tortoise';
  61 is($as->primary_id, 15824047);
  62 my $ac = $as->annotation;
  63 ok defined $ac;
  64 my @dblinks = $ac->get_Annotations('dblink');
  65 is(scalar @dblinks,1);
  66 is($dblinks[0]->database, 'GenBank');
  67 is($dblinks[0]->primary_id, 'AB072353');
  68 is($dblinks[0]->version, '1');
  69 is($dblinks[0]->display_text, 'GenBank:AB072353.1','operator overloading in AnnotationI is deprecated');
  70
  71 # test for multi-line SOURCE
  72 $ast = Bio::SeqIO->new(-format => 'gbdriver',
  73                               -verbose => $verbose,
  74                        -file => test_input_file("NC_006346.gb"));
  75 $as = $ast->next_seq;
  76 is $as->species->binomial('FULL'), 'Bolitoglossa n. sp. RLM-2004',$as->accession_number;;
  77 @class = $as->species->classification;
  78 is($class[$#class],'Eukaryota');
  79 is($as->species->common_name,'mushroomtongue salamander');
  80
  81 $ast = Bio::SeqIO->new(-format => 'gbdriver',
  82                               -verbose => $verbose,
  83                        -file => test_input_file("U71225.gb"));
  84 $as = $ast->next_seq;
  85 @class = $as->species->classification;
  86 is($class[$#class],'Eukaryota',$as->accession_number);
  87 is $as->species->common_name,'black-bellied salamander';
  88
  89 # test for unusual common name
  90 $ast = Bio::SeqIO->new(-format => 'gbdriver',
  91                               -verbose => $verbose,
  92                        -file => test_input_file("AB077698.gb"));
  93 $as = $ast->next_seq;
  94 # again, this is not a common name but is in name('abbreviated')
  95 ok defined($as->species->name('abbreviated')->[0]),$as->accession_number;
  96 is $as->species->name('abbreviated')->[0],'Homo sapiens cDNA to mRNA';
  97
  98 # test for common name with parentheses
  99 $ast = Bio::SeqIO->new(-format => 'gbdriver',
 100                               -verbose => $verbose,
 101                        -file => test_input_file("DQ018368.gb"));
 102 $as = $ast->next_seq;
 103 is $as->species->scientific_name,'(Populus tomentosa x P. bolleana) x P. tomentosa var. truncata',
 104 $as->accession_number;;
 105
 106 # test secondary accessions
 107 my $seqio = Bio::SeqIO->new(-format => 'gbdriver',
 108                                     -verbose => $verbose,
 109                                     -file => test_input_file('D10483.gbk'));
 110 my $seq = $seqio->next_seq;
 111 my @kw =  $seq->get_keywords;
 112 is(scalar @kw, 118, $seq->accession_number);
 113 is($kw[-1], 'yabO');
 114 my @sec_acc = $seq->get_secondary_accessions();
 115 is(scalar @sec_acc,14);
 116 is($sec_acc[-1], 'X56742');
 117
 118 # bug #1487
 119 my $str = Bio::SeqIO->new(-verbose => $verbose,
 120                                  -file    => test_input_file('D12555.gbk'));
 121 eval {
 122     $seq = $str->next_seq;
 123 };
 124
 125 ok(! $@, 'bug 1487');
 126
 127 # bug 1647 rpt_unit sub-feature with multiple parens
 128 $str = Bio::SeqIO->new(-format => 'gbdriver',
 129                               -verbose => $verbose,
 130                        -file => test_input_file('mini-AE001405.gb'));
 131 ok($seq = $str->next_seq);
 132 my @rpts = grep { $_->primary_tag eq 'repeat_region' }
 133   $seq->get_SeqFeatures;
 134 is $#rpts, 2, 'bug 1647';
 135 my @rpt_units = grep {$_->has_tag('rpt_unit')} @rpts;
 136 is $#rpt_units, 0;
 137 is(($rpt_units[0]->get_tag_values('rpt_unit'))[0],'(TG)10;A;(TG)7');
 138
 139 # test bug #1673 , RDB-II genbank files
 140 $str = Bio::SeqIO->new(-format => 'gbdriver',
 141                               -verbose => $verbose,
 142                        -file => test_input_file('Mcjanrna_rdbII.gbk')
 143               );
 144 ok($seq = $str->next_seq, 'bug 1673');
 145 my @refs = $seq->annotation->get_Annotations('reference');
 146 is(@refs, 1);
 147 is($seq->display_id,'Mc.janrrnA');
 148 is($seq->molecule ,'RNA');
 149
 150 $str  = Bio::SeqIO->new(-format => 'gbdriver',
 151                               -file   => test_input_file("AF165282.gb"),
 152                               -verbose => $verbose);
 153 $seq = $str->next_seq;
 154 my @features = $seq->all_SeqFeatures();
 155 is(@features, 5, $seq->accession_number);
 156 is($features[0]->start, 1);
 157 is($features[0]->end, 226);
 158 my $location = $features[1]->location;
 159 ok($location->isa('Bio::Location::SplitLocationI'));
 160 my @sublocs = $location->sub_Location();
 161 is(@sublocs, 29);
 162
 163 # version and primary ID - believe it or not, this wasn't working
 164 is ($seq->version, 1);
 165 is ($seq->seq_version, 1);
 166 is ($seq->primary_id, "5734104");
 167
 168 # streaming and Bio::RichSeq creation
 169 my $stream = Bio::SeqIO->new(-file => test_input_file("test.genbank"),
 170                                       -verbose => $verbose,
 171                              -format => 'gbdriver');
 172 $stream->verbose($verbose);
 173 my $seqnum = 0;
 174 my $species;
 175 my @cl;
 176 my $lasts;
 177 my @ids = qw(DDU63596 DDU63595 HUMBDNF);
 178 my @tids = (44689, 44689, 9606);
 179 my @tnames = ("Dictyostelium discoideum","Dictyostelium discoideum",
 180                   "Homo sapiens");
 181 while($seq = $stream->next_seq()) {
 182     if($seqnum < 3) {
 183         is $seq->display_id(), $ids[$seqnum];
 184         $species = $seq->species();
 185         @cl = $species->classification();
 186         is( $species->binomial(), $tnames[$seqnum],
 187              'species parsing incorrect for genbank');
 188         is( $cl[3] ne $species->genus(), 1,
 189              'genus duplicated in genbank parsing');
 190         is( $species->ncbi_taxid, $tids[$seqnum] );
 191     }
 192     $seqnum++;
 193     $lasts = $seq;
 194 }
 195 is($seqnum, 5,'streaming');
 196 is $lasts->display_id(), "HUMBETGLOA";
 197 my ($ref) = $lasts->annotation->get_Annotations('reference');
 198 is($ref->medline, 94173918);
 199 $stream->close();
 200
 201 $stream = Bio::SeqIO->new(-file => test_input_file("test.genbank.noseq"),
 202                                   -verbose => $verbose,
 203                                   -format => 'gbdriver' );
 204 $seqnum = 0;
 205 while($seq = $stream->next_seq()) {
 206     if($seqnum < 3) {
 207         is $seq->display_id(), $ids[$seqnum];
 208     } elsif( $seq->display_id eq 'M37762') {
 209         is( ($seq->get_keywords())[0], 'neurotrophic factor');
 210     }
 211     $seqnum++;
 212 }
 213 is $seqnum, 5, "Total number of sequences in test file";
 214
 215 # fuzzy
 216 $seq = Bio::SeqIO->new( -format => 'gbdriver',
 217                                 -verbose => $verbose,
 218                         -file =>test_input_file("testfuzzy.genbank"));
 219 $seq->verbose($verbose);
 220 ok(defined($as = $seq->next_seq()));
 221
 222 @features = $as->all_SeqFeatures();
 223 is(@features,21,'Fuzzy in');
 224 my $lastfeature = pop @features;
 225 # this is a split location; the root doesn't have strand
 226 is($lastfeature->strand, undef);
 227 $location = $lastfeature->location;
 228 #$location->verbose(-1); # silence the warning of undef seq_id()
 229 # see above; splitlocs roots do not have a strand really
 230 is($location->strand, undef);
 231 is($location->start, 83202);
 232 is($location->end, 84996);
 233
 234 @sublocs = $location->sub_Location();
 235
 236 is(@sublocs, 2);
 237 my $loc = shift @sublocs;
 238 is($loc->start, 83202);
 239 is($loc->end, 83329);
 240 is($loc->strand, -1);
 241
 242 $loc = shift @sublocs;
 243 is($loc->start, 84248);
 244 is($loc->end, 84996);
 245 is($loc->strand,1);
 246
 247 my $outfile = test_output_file();
 248 $seq = Bio::SeqIO->new(-format => 'genbank',
 249                               -verbose => $verbose,
 250                        -file=> ">$outfile");
 251 $seq->verbose($verbose);
 252 ok($seq->write_seq($as),'Fuzzy out');
 253
 254 ## now genbank ##
 255 $str = Bio::SeqIO->new(-format =>'gbdriver',
 256                              -verbose => $verbose,
 257                              -file => test_input_file('BK000016-tpa.gbk'));
 258 $seq = $str->next_seq;
 259 ok(defined $seq, $seq->accession_number);
 260 ok(defined $seq->seq);
 261 is($seq->accession_number, 'BK000016',$seq->accession_number);
 262 is($seq->alphabet, 'dna');
 263 is($seq->display_id, 'BK000016');
 264 is($seq->length, 1162);
 265 is($seq->division, 'ROD');
 266 is($seq->get_dates, 1);
 267 is($seq->keywords, 'Third Party Annotation; TPA');
 268 is($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
 269 is($seq->seq_version, 1);
 270 is($seq->feature_count, 2);
 271 my $spec_obj = $seq->species;
 272 is ($spec_obj->common_name, 'house mouse');
 273 is ($spec_obj->species, 'musculus');
 274 is ($spec_obj->genus, 'Mus');
 275 is ($spec_obj->binomial, 'Mus musculus');
 276 $ac = $seq->annotation;
 277 my $reference =  ($ac->get_Annotations('reference') )[0];
 278 is ($reference->pubmed, '11479594');
 279 is ($reference->medline, '21372465',$seq->accession_number);
 280
 281 # validate that what is written is what is read
 282 my $testfile = test_output_file();
 283 my $out = Bio::SeqIO->new(-file => ">$testfile",
 284                              -format => 'genbank');
 285 $out->write_seq($seq);
 286 $out->close();
 287
 288 $str = Bio::SeqIO->new(-format =>'gbdriver',
 289                              -file => $testfile);
 290 $seq = $str->next_seq;
 291 ok(defined $seq,'roundtrip');
 292 ok(defined $seq->seq);
 293 is($seq->accession_number, 'BK000016');
 294 is($seq->alphabet, 'dna');
 295 is($seq->display_id, 'BK000016');
 296 is($seq->length, 1162);
 297 is($seq->division, 'ROD');
 298 is($seq->get_dates, 1);
 299 is($seq->keywords, 'Third Party Annotation; TPA');
 300 is($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
 301 is($seq->seq_version, 1);
 302 is($seq->feature_count, 2);
 303 $spec_obj = $seq->species;
 304 is ($spec_obj->common_name, 'house mouse');
 305 is ($spec_obj->species, 'musculus');
 306 is ($spec_obj->genus, 'Mus');
 307 is ($spec_obj->binomial, 'Mus musculus');
 308 $ac = $seq->annotation;
 309 $reference =  ($ac->get_Annotations('reference') )[0];
 310 is ($reference->pubmed, '11479594');
 311 is ($reference->medline, '21372465');
 312
 313 # write revcomp split location
 314 my $gb = Bio::SeqIO->new(-format => 'gbdriver',
 315                         -verbose => $verbose,
 316                         -file   => test_input_file('revcomp_mrna.gb'));
 317 $seq = $gb->next_seq();
 318
 319 $gb = Bio::SeqIO->new(-format => 'genbank',
 320                      -file   => ">$testfile");
 321
 322 $gb->write_seq($seq);
 323 undef $gb;
 324 ok(! -z $testfile, 'revcomp split location');
 325
 326 # bug 1925, continuation of long ORGANISM line ends up in @classification:
 327 # ORGANISM  Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
 328 #           9150
 329 #           Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
 330 #           Enterobacteriaceae; Salmonella.
 331 $gb = Bio::SeqIO->new(-format => 'gbdriver',
 332                      -verbose => $verbose,
 333                         -file   => test_input_file('NC_006511-short.gbk'));
 334 $seq = $gb->next_seq;
 335 is $seq->species->common_name, undef, "Bug 1925";
 336 is $seq->species->scientific_name, "Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150";
 337 @class = $seq->species->classification;
 338 is $class[$#class], "Bacteria";
 339
 340 # WGS tests
 341 $gb = Bio::SeqIO->new(-format => 'gbdriver',
 342                       -verbose => $verbose,
 343                     -file   => test_input_file('O_sat.wgs'));
 344 $seq = $gb->next_seq;
 345
 346 my @tests = ('wgs'        => 'AAAA02000001-AAAA02050231',
 347             'wgs_scafld' => 'CM000126-CM000137',
 348             'wgs_scafld' => 'CH398081-CH401163');
 349
 350 my @wgs = map {$seq->annotation->get_Annotations(lc($_))} (qw(WGS WGS_SCAFLD));
 351
 352 my $ct=0;
 353
 354 for my $wgs (@wgs) {
 355     my ($tagname, $value) = (shift @tests, shift @tests);
 356     is($wgs->tagname, $tagname, $tagname);
 357     is($wgs->value, $value);
 358     $ct++;
 359 }
 360
 361 is ($ct, 3);
 362
 363 # make sure we can retrieve a feature with a primary tag of 'misc_difference'
 364 $gb = Bio::SeqIO->new(-format => 'gbdriver',
 365                      -verbose => $verbose,
 366                     -file   => test_input_file('BC000007.gbk'));
 367 $seq = $gb->next_seq;
 368 ($cds) = grep { $_->primary_tag eq 'misc_difference' } $seq->get_SeqFeatures;
 369 my @vals = $cds->get_tag_values('gene');
 370 is $vals[0], 'PX19', $seq->accession_number;
 371
 372 # Check that the source,organism section is identical between input and output.
 373 # - test an easy one where organism is species, then two different formats of
 374 # subspecies, then a species with a format that used to be mistaken for
 375 # subspecies, then a bacteria with no genus, and finally a virus with a genus.
 376
 377 # These tests are now somewhat out-of-date since we are moving to a Bio::Taxon-
 378 # based system for verifying taxonomic information.  Right now they just verify
 379 # changes so are really useless; I will change them to verify common name,
 380 # organelle, scientific name, etc.
 381
 382
 383 # output always adds a period (GenBank std), but two of these files do not use them.
 384
 385 foreach my $in ('BK000016-tpa.gbk', 'ay116458.gb', 'ay149291.gb', 'NC_006346.gb', 'ay007676.gb', 'dq519393.gb') {
 386     my $infile =  test_input_file($in);
 387         $outfile = test_output_file();
 388
 389     $str = Bio::SeqIO->new(-format =>'genbank',
 390                           -verbose => $verbose,
 391                           -file => $infile);
 392     $seq = $str->next_seq;
 393
 394     $out = Bio::SeqIO->new(-file => $outfile, -format => 'genbank');
 395     $out->write_seq($seq);
 396     $out->close();
 397
 398     open my $IN, '<', $infile or die "Could not read file '$infile': $!\n";
 399     my @in = <$IN>;
 400     close $IN;
 401     open my $RESULT, '<', $outfile or die "Could not read file '$outfile': $!\n";
 402     my $line = 0;
 403     my $check = 0;
 404     my $is = 1;
 405
 406     FILECHECK:
 407     while (my $result = <$RESULT>) {
 408         if ($result =~ /^KEYWORDS/) {
 409             $check = 1;
 410             next;
 411         }
 412
 413         if ($result =~ /^REFERENCE/) {
 414             last FILECHECK;
 415         }
 416
 417         if ($check) {
 418
 419             # end periods don't count (not all input files have them)
 420             $result =~ s{\.$}{};
 421             $in[$line] =~ s{\.$}{};
 422
 423             if ($result ne $in[$line]) {
 424                 $is = 0;
 425                 last;
 426             }
 427         }
 428     } continue { $line++ }
 429     close $RESULT;
 430
 431     ok $is, $in;
 432 }
 433
 434 # NB: there should probably be full testing on all lines to ensure that output
 435 # matches input.
 436
 437 # 20061117: problem with *double* colon in some annotation-dblink values
 438 $ct = 0;
 439
 440 foreach my $in ('P35527.gb') {
 441     my $infile =  test_input_file($in);
 442     $str = Bio::SeqIO->new(-format =>'genbank',
 443                          -verbose => $verbose,
 444                          -file => $infile);
 445     $seq = $str->next_seq;
 446     my $ac      = $seq->annotation();      # Bio::AnnotationCollection
 447     foreach my $key ($ac->get_all_annotation_keys() ) {
 448         my @values = $ac->get_Annotations($key);
 449         foreach my $ann (@values) {
 450             my $value = $ann->display_text;
 451             $ct++;
 452             if ($key eq 'dblink') {
 453
 454                 ok (index($value,'::') < 0);   # this should never be true
 455
 456                 ok ($value, $value);   # check value is not empty
 457
 458                 #  print "  ann/", sprintf('%12s  ',$key), '>>>', $value , '<<<', "\n";
 459                 #  print "        index double colon: ",index($value   ,'::'), "\n";
 460
 461                 #  check db name:
 462                 my @parts = split(/:/,$value);
 463                 if ( $parts[0] =~ /^(?:
 464                         #  not an exhaustive list of databases;
 465                         #  just the db's referenced in P35527.gb:
 466                         swissprot | GenBank | GenPept  | HSSP| IntAct | Ensembl | KEGG | HGNC | MIM | ArrayExpress
 467                                   | GO      | InterPro | Pfam| PRINTS | PROSITE
 468                                      )$/x )
 469                 {
 470                     ok 1;
 471                 }
 472                 else {
 473                     ok 0;
 474                 }
 475                     ok ( $parts[1], "$parts[0]" );
 476             }
 477                 # elsif ($key eq 'reference') { }
 478         }
 479     }
 480 }
 481
 482 is($ct, 46);
 483
 484 # bug 2195
 485
 486 $str = Bio::SeqIO->new(-format =>'gbdriver',
 487                       -verbose => $verbose,
 488                       -file => test_input_file('AF305198.gb')
 489                      );
 490
 491 $species = $str->next_seq->species;
 492
 493 is($species->scientific_name, 'Virginia creeper phytoplasma', 'Bug 2195');
 494 is(join(', ',$species->classification), 'Virginia creeper phytoplasma, '.
 495    '16SrV (Elm yellows group), Candidatus Phytoplasma, '.
 496    'Acholeplasmataceae, Acholeplasmatales, Mollicutes, '.
 497    'Firmicutes, Bacteria', 'Bug 2195');
 498
 499 # bug 2569, PROJECT line support, read and write, round-tripping
 500
 501 $str = Bio::SeqIO->new(-format =>'gbdriver',
 502                       -verbose => $verbose,
 503                       -file => test_input_file('NC_008536.gb'));
 504
 505 $seq = $str->next_seq;
 506
 507 my $project = ($seq->annotation->get_Annotations('project'))[0];
 508
 509 isa_ok($project, 'Bio::Annotation::SimpleValue');
 510
 511 if ($project) {
 512         is($project->value, 'GenomeProject:12638');
 513 } else {
 514         ok(0, "PROJECT not parsed");
 515 }
 516
 517 $outfile = test_output_file();
 518
 519 $gb = Bio::SeqIO->new(-format => 'genbank',
 520                               -verbose => $verbose,
 521                        -file=> ">$outfile");
 522
 523 $gb->write_seq($seq);
 524
 525 $str = Bio::SeqIO->new(-format =>'gbdriver',
 526                       -verbose => $verbose,
 527                       -file => $outfile);
 528
 529 $seq = $str->next_seq;
 530
 531 $project = ($seq->annotation->get_Annotations('project'))[0];
 532
 533 isa_ok($project, 'Bio::Annotation::SimpleValue');
 534
 535 if ($project) {
 536         is($project->value, 'GenomeProject:12638');
 537 } else {
 538         ok(0, "Roundtrip test failed");
 539 }
 540
 541 ################################## EMBL ##################################
 542
 543 # Set to -1 for release version, so warnings aren't printed
 544
 545 $ast = Bio::SeqIO->new( -format => 'embldriver',
 546                            -verbose => $verbose,
 547                            -file => test_input_file("roa1.dat"));
 548 $ast->verbose($verbose);
 549 $as = $ast->next_seq();
 550 ok defined $as->seq;
 551 is($as->display_id, 'HSHNCPA1');
 552 is($as->accession_number, 'X79536');
 553 is($as->seq_version, 1);
 554 is($as->version, 1);
 555 is($as->desc, 'H.sapiens mRNA for hnRNPcore protein A1');
 556 is($as->molecule, 'RNA');
 557 is($as->alphabet, 'rna');
 558 is(scalar $as->all_SeqFeatures(), 4);
 559 is($as->length, 1198);
 560 is($as->species->binomial(), 'Homo sapiens');
 561 is($as->get_dates, 2);
 562
 563 # EMBL Release 87 changes (8-17-06)
 564
 565 $ast = Bio::SeqIO->new( -format => 'embldriver',
 566                            -verbose => $verbose,
 567                            -file => test_input_file("roa1_v2.dat"));
 568 $ast->verbose($verbose);
 569 $as = $ast->next_seq();
 570 ok defined $as->seq;
 571 # accession # same as display name now
 572 is($as->display_id, 'X79536');
 573 is($as->get_dates, 2);
 574 is($as->accession_number, 'X79536');
 575 is($as->seq_version, 1);
 576 is($as->version, 1);
 577 is($as->desc, 'H.sapiens mRNA for hnRNPcore protein A1');
 578 # mRNA instead of RNA
 579 is($as->molecule, 'mRNA');
 580 is($as->alphabet, 'rna');
 581 is(scalar $as->all_SeqFeatures(), 4);
 582 is($as->length, 1198);
 583 is($as->species->binomial(), 'Homo sapiens');
 584
 585 my $ent = Bio::SeqIO->new( -file => test_input_file("test.embl"),
 586                            -format => 'embldriver');
 587 $seq = $ent->next_seq();
 588
 589 is(defined $seq->seq(), 1,
 590    'success reading Embl with ^ location and badly split double quotes');
 591 is(scalar $seq->annotation->get_Annotations('reference'), 3);
 592 is($seq->get_dates, 0);
 593
 594 $out = Bio::SeqIO->new(-file=> ">$outfile",
 595                           -format => 'embl');
 596 is($out->write_seq($seq),1,
 597    'success writing Embl format with ^ < and > locations');
 598
 599 # embl with no FT
 600 $ent = Bio::SeqIO->new( -file => test_input_file("test.embl"),
 601                         -format => 'embldriver');
 602 $seq = $ent->next_seq();
 603
 604 ok($seq);
 605 is(lc($seq->subseq(1,10)),'gatcagtaga');
 606 is($seq->length, 4870);
 607
 608 # embl with no FH
 609 my $noFH = Bio::SeqIO->new(-file => test_input_file("no_FH.embl"),
 610                         -format => 'embldriver');
 611 $seq = $noFH->next_seq;
 612 is(scalar($seq->get_SeqFeatures), 4);
 613 is($seq->display_id, 'AE000001');
 614 is($seq->get_dates, 0);
 615
 616 # bug 1571
 617 $ent = Bio::SeqIO->new(-format => 'embldriver',
 618                        -file   => test_input_file('test.embl2sq'));
 619 $seq = $ent->next_seq;
 620 is($seq->length,4870);
 621 is($seq->get_dates, 0);
 622
 623 # embl repbase
 624 $ent = Bio::SeqIO->new(-file => test_input_file("BEL16-LTR_AG.embl"), -format => 'embldriver');
 625 $seq = $ent->next_seq;
 626 is($seq->display_id,'BEL16-LTR_AG');
 627 is($seq->get_dates, 2);
 628
 629 # test secondary accessions in EMBL (bug #1332)
 630 $seqio = Bio::SeqIO->new(-format => 'embldriver',
 631                            -file => test_input_file('ECAPAH02.embl'));
 632 $seq = $seqio->next_seq;
 633 is($seq->accession_number, 'D10483');
 634 is($seq->seq_version, 2);
 635 my @accs = $seq->get_secondary_accessions();
 636 is($accs[0], 'J01597');
 637 is($accs[-1], 'X56742');
 638 is($seq->get_dates, 2);
 639
 640 ### TPA TESTS - Thanks to Richard Adams ###
 641 # test Third Party Annotation entries in EMBL/Gb format
 642 # to ensure compatability with parsers.
 643 $str = Bio::SeqIO->new(-verbose => $verbose,
 644                          -format =>'embldriver',
 645                          -file => test_input_file('BN000066-tpa.embl'));
 646 $seq = $str->next_seq;
 647 ok(defined $seq);
 648 is($seq->accession_number, 'BN000066');
 649 is($seq->alphabet, 'dna');
 650 is($seq->display_id, 'AGA000066');
 651 is($seq->length, 5195);
 652 is($seq->division, 'INV');
 653 is($seq->keywords, 'acetylcholinesterase; achE1 gene; Third Party Annotation; TPA');
 654 is($seq->seq_version, 1);
 655 is($seq->feature_count, 15);
 656 is($seq->get_dates, 2);
 657
 658 $spec_obj = $seq->species;
 659 is ($spec_obj->common_name, 'African malaria mosquito');
 660 is ($spec_obj->species, 'gambiae');
 661 is ($spec_obj->genus, 'Anopheles');
 662 is ($spec_obj->binomial, 'Anopheles gambiae');
 663
 664 $ac = $seq->annotation;
 665 $reference =  ($ac->get_Annotations('reference') )[1];
 666 is ($reference->title,'"A novel acetylcholinesterase gene in mosquitoes codes for the insecticide target and is non-homologous to the ace gene in Drosophila"');
 667 is ($reference->authors,'Weill M., Fort P., Berthomi eu A., Dubois M.P., Pasteur N., Raymond M.');
 668 my $cmmnt =  ($ac->get_Annotations('comment') )[0];
 669 is($cmmnt->text, 'see also AJ488492 for achE-1 from Kisumu strain Third Party Annotation Database: This TPA record uses Anopheles gambiae trace archive data (http://trace.ensembl.org)');
 670
 671
 672 $ent = Bio::SeqIO->new( -file => test_input_file("test.embl"),
 673                         -format => 'embldriver');
 674 $ent->verbose($verbose);
 675 $seq = $ent->next_seq();
 676 $species = $seq->species();
 677 @cl = $species->classification();
 678 is( $cl[3] ne $species->genus(), 1, 'genus duplication test');
 679 $ent->close();
 680
 681 #
 682 ## read-write - test embl writing of a PrimarySeq
 683 #
 684 my $primaryseq = Bio::PrimarySeq->new( -seq => 'AGAGAGAGATA',
 685                                       -id  => 'myid',
 686                                       -desc => 'mydescr',
 687                                       -alphabet => 'DNA',
 688                                       -accession_number => 'myaccession');
 689
 690 $verbose = -1 unless $ENV{'BIOPERLDEBUG'};  # silence warnings unless we are debuggin
 691
 692 my $embl = Bio::SeqIO->new(-format => 'embl',
 693                           -verbose => $verbose,
 694                           -file => ">$outfile");
 695
 696 ok($embl->write_seq($primaryseq));
 697
 698 # this should generate a warning
 699 my $scalar = "test";
 700 eval {
 701         $embl->write_seq($scalar);
 702 };
 703 ok ($@);
 704
 705 ############################## Swiss/UniProt ##############################
 706
 707 $seqio = Bio::SeqIO->new( -verbose => $verbose,
 708                                      -format => 'swissdriver',
 709                                      -file   => test_input_file('test.swiss'));
 710
 711 isa_ok($seqio, 'Bio::SeqIO');
 712 $seq = $seqio->next_seq;
 713 my @gns = $seq->annotation->get_Annotations('gene_name');
 714
 715 $outfile = test_output_file();
 716 $seqio = Bio::SeqIO->new( -verbose => $verbose,
 717                                  -format => 'swiss',
 718                                  -file   => ">$outfile");
 719
 720 $seqio->write_seq($seq);
 721
 722 # reads it in once again
 723 $seqio = Bio::SeqIO->new( -verbose => $verbose,
 724                                  -format => 'swissdriver',
 725                                  -file => $outfile);
 726
 727 $seq = $seqio->next_seq;
 728 isa_ok($seq->species, 'Bio::Species');
 729 is($seq->species->ncbi_taxid, 6239);
 730
 731 # version, seq_update, dates (5 tests)
 732 is($seq->version, 40);
 733 my ($ann) = $seq->annotation->get_Annotations('seq_update');
 734 eval {is($ann->display_text, 35,'operator overloading in AnnotationI is deprecated')};
 735 ok(!$@);
 736
 737 my @dates = $seq->get_dates;
 738 my @date_check = qw(01-NOV-1997 01-NOV-1997 16-OCT-2001);
 739
 740 for my $date (@dates) {
 741     my $expdate = shift @date_check;
 742     is($date, $expdate,'dates');
 743 }
 744
 745 my @gns2 = $seq->annotation->get_Annotations('gene_name');
 746 # check gene name is preserved (was losing suffix in worm gene names)
 747 ok($#gns2 == 0 && $gns[0]->value eq $gns2[0]->value);
 748
 749 # test swissprot multiple RP lines
 750 $str = Bio::SeqIO->new(-file => test_input_file('P33897'));
 751 $seq = $str->next_seq;
 752 isa_ok($seq, 'Bio::Seq::RichSeqI');
 753 @refs = $seq->annotation->get_Annotations('reference');
 754 is( @refs, 23);
 755 is($refs[20]->rp, 'VARIANTS X-ALD LEU-98; ASP-99; GLU-217; GLN-518; ASP-608; ILE-633 AND PRO-660, AND VARIANT THR-13.');
 756
 757 # version, seq_update, dates (5 tests)
 758 is($seq->version, 44);
 759 ($ann) = $seq->annotation->get_Annotations('seq_update');
 760 is($ann->display_text, 28,'operator overloading in AnnotationI is deprecated');
 761 @dates = $seq->get_dates;
 762 @date_check = qw(01-FEB-1994 01-FEB-1994 15-JUN-2004);
 763 for my $date (@dates) {
 764     is($date, shift @date_check);
 765 }
 766
 767 $ast = Bio::SeqIO->new(-verbose => $verbose,
 768                                   -format => 'swissdriver' ,
 769                                   -file => test_input_file("roa1.swiss"));
 770 $as = $ast->next_seq();
 771
 772 ok defined $as->seq;
 773 is($as->id, 'ROA1_HUMAN', "id is ".$as->id);
 774 like($as->primary_id, qr(Bio::PrimarySeq));
 775 is($as->length, 371);
 776 is($as->alphabet, 'protein');
 777 is($as->division, 'HUMAN');
 778 is(scalar $as->all_SeqFeatures(), 16);
 779 is(scalar $as->annotation->get_Annotations('reference'), 11);
 780
 781 # version, seq_update, dates (5 tests)
 782 is($as->version, 35);
 783 ($ann) = $as->annotation->get_Annotations('seq_update');
 784 is($ann->display_text, 15,'operator overloading in AnnotationI is deprecated');
 785 @dates = $as->get_dates;
 786 @date_check = qw(01-MAR-1989 01-AUG-1990 01-NOV-1997);
 787 for my $date (@dates) {
 788     is($date, shift @date_check);
 789 }
 790
 791 ($ent,$out) = undef;
 792 ($as,$seq) = undef;
 793
 794 $seqio = Bio::SeqIO->new(-format => 'swissdriver' ,
 795                                  -verbose => $verbose,
 796                                  -file => test_input_file("swiss.dat"));
 797 $seq = $seqio->next_seq;
 798 isa_ok($seq, 'Bio::Seq::RichSeqI');
 799
 800 # more tests to verify we are actually parsing correctly
 801 like($seq->primary_id, qr(Bio::PrimarySeq));
 802 is($seq->display_id, 'MA32_HUMAN');
 803 is($seq->length, 282);
 804 is($seq->division, 'HUMAN');
 805 is($seq->alphabet, 'protein');
 806 my @f = $seq->all_SeqFeatures();
 807 is(@f, 2);
 808 is($f[1]->primary_tag, 'CHAIN');
 809 is(($f[1]->get_tag_values('description'))[0], 'COMPLEMENT COMPONENT 1, Q SUBCOMPONENT BINDING PROTEIN');
 810
 811 # version, seq_update, dates (5 tests)
 812 is($seq->version, 40);
 813 ($ann) = $seq->annotation->get_Annotations('seq_update');
 814 is($ann->display_text, 31,'operator overloading in AnnotationI is deprecated');
 815 @dates = $seq->get_dates;
 816 @date_check = qw(01-FEB-1995 01-FEB-1995 01-OCT-2000);
 817 for my $date (@dates) {
 818     is($date, shift @date_check);
 819 }
 820
 821 my @genenames = qw(GC1QBP HABP1 SF2P32 C1QBP);
 822 ($ann) = $seq->annotation->get_Annotations('gene_name');
 823 # use Data::Stag findval and element name to get values/nodes
 824 foreach my $gn ( $ann->findval('Name') ) {
 825     ok ($gn, shift(@genenames));
 826 }
 827 foreach my $gn ( $ann->findval('Synonyms') ) {
 828     ok ($gn, shift(@genenames));
 829 }
 830 like($ann->value, qr/Name: GC1QBP/);
 831
 832 # test for feature locations like ?..N
 833 $seq = $seqio->next_seq();
 834 isa_ok($seq, 'Bio::Seq::RichSeqI');
 835 like($seq->primary_id, qr(Bio::PrimarySeq));
 836 is($seq->display_id, 'ACON_CAEEL');
 837 is($seq->length, 788);
 838 is($seq->division, 'CAEEL');
 839 is($seq->alphabet, 'protein');
 840 is(scalar $seq->all_SeqFeatures(), 5);
 841
 842 foreach my $gn ( $seq->annotation->get_Annotations('gene_name') ) {
 843     ok ($gn->value, 'F54H12.1');
 844 }
 845
 846 # test species in swissprot -- this can be a n:n nightmare
 847 $seq = $seqio->next_seq();
 848 isa_ok($seq, 'Bio::Seq::RichSeqI');
 849 like($seq->primary_id, qr(Bio::PrimarySeq));
 850 @sec_acc = $seq->get_secondary_accessions();
 851 is($sec_acc[0], 'P29360');
 852 is($sec_acc[1], 'Q63631');
 853 is($seq->accession_number, 'P42655');
 854 @kw = $seq->get_keywords;
 855 is( $kw[0], 'Brain');
 856 is( $kw[1], 'Neurone');
 857 is($kw[3], 'Multigene family');
 858 is($seq->display_id, '143E_HUMAN');
 859
 860 # hybrid names from old sequences are no longer valid, these are chopped
 861 # off at the first organism
 862 is($seq->species->binomial, "Homo sapiens");
 863 is($seq->species->common_name, "Human");
 864 is($seq->species->ncbi_taxid, 9606);
 865
 866 $seq = $seqio->next_seq();
 867 isa_ok($seq, 'Bio::Seq::RichSeqI');
 868 like($seq->primary_id, qr(Bio::PrimarySeq));
 869 is($seq->species->binomial, "Bos taurus");
 870 is($seq->species->common_name, "Bovine");
 871 is($seq->species->ncbi_taxid, 9913);
 872
 873 # multiple genes in swissprot
 874 $seq = $seqio->next_seq();
 875 isa_ok($seq, 'Bio::Seq::RichSeqI');
 876 like($seq->primary_id, qr(Bio::PrimarySeq));
 877
 878 ($ann) = $seq->annotation->get_Annotations("gene_name");
 879 @genenames = qw(CALM1 CAM1 CALM CAM CALM2 CAM2 CAMB CALM3 CAM3 CAMC);
 880 my $flatnames = "(CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND (CALM3 OR CAM3 OR CAMC)";
 881
 882 my @names = @genenames; # copy array
 883
 884 my @ann_names = $ann->get_all_values();
 885 is(scalar(@ann_names), scalar(@names));
 886
 887 # do this in a layered way (nested tags)
 888 for my $node ($ann->findnode('gene_name')) {
 889     for my $name ($node->findval('Name')) {
 890         is($name, shift(@names));
 891     }
 892     for my $name ($node->findval('Synonyms')) {
 893         is($name, shift(@names));
 894     }
 895 }
 896
 897 is(scalar(@names),0);
 898
 899 # same entry as before, but with the new gene names format
 900 $seqio = Bio::SeqIO->new(-format => 'swissdriver',
 901                                  -verbose => $verbose,
 902                          -file => test_input_file("calm.swiss"));
 903 $seq = $seqio->next_seq();
 904 isa_ok($seq, 'Bio::Seq::RichSeqI');
 905 like($seq->primary_id, qr(Bio::PrimarySeq));
 906
 907 ($ann) = $seq->annotation->get_Annotations("gene_name");
 908 @names = @genenames; # copy array
 909
 910 my @ann_names2 = $ann->get_all_values(); #emulate StructuredValue's flattened array
 911 is(scalar(@ann_names2), scalar(@names));
 912
 913 for my $node ($ann->findnode('gene_name')) {
 914     for my $name ($node->findval('Name')) {
 915         is($name, shift(@names));
 916     }
 917     for my $name ($node->findval('Synonyms')) {
 918         is($name, shift(@names));
 919     }
 920 }
 921
 922 is(scalar(@names),0);
 923
 924 # test proper parsing of references
 925 my @litrefs = $seq->annotation->get_Annotations('reference');
 926 is(scalar(@litrefs), 17);
 927
 928 my @titles = (
 929     '"Complete amino acid sequence of human brain calmodulin."',
 930     '"Multiple divergent mRNAs code for a single human calmodulin."',
 931     '"Molecular analysis of human and rat calmodulin complementary DNA clones. Evidence for additional active genes in these species."',
 932     '"Isolation and nucleotide sequence of a cDNA encoding human calmodulin."',
 933     '"Structure of the human CALM1 calmodulin gene and identification of two CALM1-related pseudogenes CALM1P1 and CALM1P2."',
 934     undef,
 935     '"Characterization of the human CALM2 calmodulin gene and comparison of the transcriptional activity of CALM1, CALM2 and CALM3."',
 936     '"Cloning of human full-length CDSs in BD Creator(TM) system donor vector."',
 937     '"The DNA sequence and analysis of human chromosome 14."',
 938     '"Generation and initial analysis of more than 15,000 full-length human and mouse cDNA sequences."',
 939     '"Alpha-helix nucleation by a calcium-binding peptide loop."',
 940     '"Solution structure of Ca(2+)-calmodulin reveals flexible hand-like properties of its domains."',
 941     '"Calmodulin structure refined at 1.7 A resolution."',
 942     '"Drug binding by calmodulin: crystal structure of a calmodulin-trifluoperazine complex."',
 943     '"Structural basis for the activation of anthrax adenylyl cyclase exotoxin by calmodulin."',
 944     '"Physiological calcium concentrations regulate calmodulin binding and catalysis of adenylyl cyclase exotoxins."',
 945     '"Crystal structure of a MARCKS peptide containing the calmodulin-binding domain in complex with Ca2+-calmodulin."',
 946 );
 947
 948 my @locs = (
 949     "Biochemistry 21:2565-2569(1982).",
 950     "J. Biol. Chem. 263:17055-17062(1988).",
 951     "J. Biol. Chem. 262:16663-16670(1987).",
 952     "Biochem. Int. 9:177-185(1984).",
 953     "Eur. J. Biochem. 225:71-82(1994).",
 954     "Submitted (FEB-1995) to the EMBL/GenBank/DDBJ databases.",
 955     "Cell Calcium 23:323-338(1998).",
 956     "Submitted (MAY-2003) to the EMBL/GenBank/DDBJ databases.",
 957     "Nature 421:601-607(2003).",
 958     "Proc. Natl. Acad. Sci. U.S.A. 99:16899-16903(2002).",
 959     "Proc. Natl. Acad. Sci. U.S.A. 96:903-908(1999).",
 960     "Nat. Struct. Biol. 8:990-997(2001).",
 961     "J. Mol. Biol. 228:1177-1192(1992).",
 962     "Biochemistry 33:15259-15265(1994).",
 963     "Nature 415:396-402(2002).",
 964     "EMBO J. 21:6721-6732(2002).",
 965     "Nat. Struct. Biol. 10:226-231(2003).",
 966 );
 967
 968 my @positions = (
 969      undef, undef,
 970     undef, undef,
 971     undef, undef,
 972     undef, undef,
 973     undef, undef,
 974     undef, undef,
 975     undef, undef,
 976     undef, undef,
 977     undef, undef,
 978     undef, undef,
 979     94, 103,
 980     1, 76,
 981     undef, undef,
 982     undef, undef,
 983     5, 148,
 984     1, 148,
 985     undef, undef,
 986 );
 987
 988 foreach my $litref (@litrefs) {
 989     is($litref->title, shift(@titles));
 990     is($litref->location, shift(@locs));
 991     is($litref->start, shift(@positions));
 992     is($litref->end, shift(@positions));
 993 }
 994
 995 # format parsing changes (pre-rel 9.0)
 996
 997 $seqio = Bio::SeqIO->new( -verbose => $verbose,
 998                          -format => 'swissdriver',
 999                          -file   => test_input_file('pre_rel9.swiss'));
1000
1001 ok($seqio);
1002 $seq = $seqio->next_seq;
1003 isa_ok($seq->species, 'Bio::Species');
1004 is($seq->species->ncbi_taxid, "6239");
1005
1006 # version, seq_update, dates (5 tests)
1007 is($seq->version, 44);
1008 ($ann) = $seq->annotation->get_Annotations('seq_update');
1009 is($ann->display_text, 1);
1010 @dates = $seq->get_dates;
1011 @date_check = qw(01-NOV-1997 01-NOV-1996 30-MAY-2006 );
1012 for my $date (@dates) {
1013     is($date, shift @date_check);
1014 }
1015
1016 my @idcheck = qw(Z66513 T22647 Cel.30446 Q06319 Q20772 F54D5.7 WBGene00010052
1017                  F54D5.7 GO:0005515 IPR006089 IPR006091 IPR006090
1018                  IPR006092 IPR009075 IPR009100 IPR013764 PF00441
1019                  PF02770 PF02771 PS00072 PS00073);
1020
1021 for my $dblink ( $seq->annotation->get_Annotations('dblink') ) {
1022     is($dblink->primary_id, shift @idcheck);
1023 }
1024
1025 $seqio = Bio::SeqIO->new( -verbose => $verbose,
1026                          -format => 'swissdriver',
1027                          -file   => test_input_file('pre_rel9.swiss'));
1028
1029 my @namespaces = qw(Swiss-Prot TrEMBL TrEMBL);
1030
1031 while (my $seq = $seqio->next_seq) {
1032     is($seq->namespace, shift @namespaces);
1033 }
1034
1035 # format parsing changes (rel 9.0, Oct 2006)
1036
1037 $seqio = Bio::SeqIO->new( -verbose => $verbose,
1038                          -format => 'swissdriver',
1039                          -file   => test_input_file('rel9.swiss'));
1040
1041 ok($seqio);
1042 $seq = $seqio->next_seq;
1043 isa_ok($seq->species, 'Bio::Species');
1044 is($seq->species->ncbi_taxid, 6239);
1045
1046 is($seq->version, 47);
1047 ($ann) = $seq->annotation->get_Annotations('seq_update');
1048 is($ann->display_text, 1,'operator overloading in AnnotationI is deprecated');
1049 @dates = $seq->get_dates;
1050 @date_check = qw(01-NOV-1997 01-NOV-1996 31-OCT-2006 );
1051 for my $date (@dates) {
1052     is($date, shift @date_check);
1053 }
1054
1055 @idcheck = qw(Z66513 T22647 Cel.30446 Q06319 Q20772 F54D5.7 cel:F54D5.7
1056          WBGene00010052 F54D5.7 GO:0005515 IPR006089 IPR006091 IPR006090
1057          IPR006092 IPR009075 IPR013786 IPR009100 IPR013764 PF00441 PF02770
1058          PF02771 PS00072 PS00073 );
1059
1060 for my $dblink ( $seq->annotation->get_Annotations('dblink') ) {
1061     is($dblink->primary_id, shift @idcheck);
1062 }
1063
1064 $seqio = Bio::SeqIO->new( -verbose => $verbose,
1065                          -format => 'swissdriver',
1066                          -file   => test_input_file('rel9.swiss'));
1067
1068 @namespaces = qw(Swiss-Prot TrEMBL TrEMBL);
1069
1070 while (my $seq = $seqio->next_seq) {
1071     is($seq->namespace, shift @namespaces);
1072 }
1073
1074 # bug 2288
1075 # Q8GBD3.swiss
1076 $seqio = Bio::SeqIO->new( -verbose => $verbose,
1077                          -format => 'swiss',
1078                          -file   => test_input_file('Q8GBD3.swiss'));
1079
1080 while (my $seq = $seqio->next_seq) {
1081     my $lineage = join(';', $seq->species->classification);
1082         is ($lineage, 'Acetobacter aceti;Acetobacter subgen. Acetobacter;'.
1083                 'Acetobacter;Acetobacteraceae;Rhodospirillales;Alphaproteobacteria;'.
1084                 'Proteobacteria;Bacteria');
1085 }
1086
1087 # test for GenBank swissprot/UniProt/UniProtKB DBSOURCE line (Bug : RT 44536)
1088 $ast = Bio::SeqIO->new(-format => 'genbank',
1089                               -verbose => $verbose,
1090                        -file => test_input_file('P39765.gb'));
1091 $ast->verbose($verbose);
1092 $as = $ast->next_seq();
1093 is $as->molecule, 'PRT',$as->accession_number;;
1094 is $as->alphabet, 'protein';
1095 # Though older GenBank releases indicate SOURCE contains only the common name,
1096 # this is no longer true.  In general, this line will contain an abbreviated
1097 # form of the full organism name (but may contain the full length name),
1098 # as well as the optional common name and organelle.  There is no get/set
1099 # for the abbreviated name but it is accessible via name()
1100 ok defined($as->species->name('abbreviated')->[0]);
1101 is $as->species->name('abbreviated')->[0], 'Bacillus subtilis';
1102 is($as->primary_id, 20141743);
1103 $ac = $as->annotation;
1104 ok defined $ac;
1105 @dblinks = $ac->get_Annotations('dblink');
1106 is(scalar @dblinks,31);
1107 is($dblinks[0]->database, 'UniProtKB');
1108 is($dblinks[0]->primary_id, 'PYRR_BACSU');
1109 is($dblinks[0]->version, undef);
1110 is($dblinks[0]->display_text, 'UniProtKB:PYRR_BACSU','operator overloading in AnnotationI is deprecated');