Bio/SeqIO/entrezgene.pm

   1 # $Id$
   2 # BioPerl module for Bio::SeqIO::entrezgene
   3 #
   4 # You may distribute this module under the same terms as perl itself
   5 #
   6 # POD documentation - main docs before the code
   7
   8 =head1 NAME
   9
  10 Bio::SeqIO::entrezgene - Entrez Gene ASN1 parser
  11
  12 =head1 SYNOPSIS
  13
  14    use Bio::SeqIO;
  15
  16    # don't instantiate directly - instead do
  17    my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
  18                                -file => $file);
  19    my $gene = $seqio->next_seq;
  20
  21 =head1 DESCRIPTION
  22
  23 This is EntrezGene ASN bioperl parser. It is built on top of
  24 L<Bio::ASN1::EntrezGene>, a low level ASN parser built by Mingyi Liu
  25 (L<http://sourceforge.net/projects/egparser>). The easiest way to
  26 use it is shown above.
  27
  28 You will get most of the Entrez Gene annotation such as gene symbol,
  29 gene name and description, accession numbers associated
  30 with the gene, etc. Almost all of these are given as  L<Bio::AnnotationI> objects.
  31
  32 If you need all the data do:
  33
  34    my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
  35                                -file => $file,
  36                                -debug => 'on' );
  37    my ($gene,$genestructure,$uncaptured) = $seqio->next_seq;
  38
  39 The second variable returned, C<$genestructure>, is a L<Bio::Cluster::SequenceFamily>
  40 object. It contains all Refseqs and the genomic contigs that are associated
  41 with the particular gene. The third variable, C<$uncaptured>, is a reference
  42 to a plain array.
  43
  44 You can also modify the output to allow back compatibility with the old
  45 LocusLink parser:
  46
  47    my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
  48                                -file => $file,
  49                                -locuslink => 'convert');
  50
  51 The C<-debug> and C<-locuslink> options slow down the parser.
  52
  53 Example code which looks for ontology terms:
  54
  55   my $eio = new Bio::SeqIO(-file => $file,
  56                            -format => 'entrezgene',
  57                            -service_record => 'yes');
  58
  59   while (my $seq = $eio->next_seq) {
  60     my $gid = $seq->accession_number;
  61     foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
  62       next if ($ot->term->authority eq 'STS marker'); # No STS markers
  63       my $evid = $ot->comment;
  64       $evid =~ s/evidence: //i;
  65       my @ref = $ot->term->get_references;
  66       my $id = $ot->identifier;
  67       my $fid = 'GO:' . sprintf("%07u",$id);
  68       print join("\t",$gid, $ot->ontology->name, $ot->name, $evid,
  69         $fid, @ref?$ref[0]->medline:''), "\n";
  70     }
  71   }
  72
  73 =head1 FEEDBACK
  74
  75 =head2 Mailing Lists
  76
  77 User feedback is an integral part of the evolution of this and other
  78 Bioperl modules. Send your comments and suggestions preferably to
  79 the Bioperl mailing list.  Your participation is much appreciated.
  80
  81   bioperl-l@bioperl.org                  - General discussion
  82   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  83
  84 =head2 Support
  85
  86 Please direct usage questions or support issues to the mailing list:
  87
  88 L<bioperl-l@bioperl.org>
  89
  90 rather than to the module maintainer directly. Many experienced and
  91 reponsive experts will be able look at the problem and quickly
  92 address it. Please include a thorough description of the problem
  93 with code and data examples if at all possible.
  94
  95 =head2 Reporting Bugs
  96
  97 Report bugs to the Bioperl bug tracking system to help us keep track
  98 of the bugs and their resolution. Bug reports can be submitted via
  99 the web:
 100
 101   http://bugzilla.open-bio.org/
 102
 103 =head1 AUTHOR - Stefan Kirov
 104
 105 Email skirov at utk.edu
 106
 107 =head1 CONTRIBUTORS
 108
 109 Hilmar Lapp, hlapp at gmx.net
 110
 111 =head1 APPENDIX
 112
 113 This parser is based on Bio::ASN1::EntrezGene module.
 114
 115 The rest of the documentation details each of the object methods.
 116 Internal methods are usually preceded with a _
 117
 118 =cut
 119
 120 package Bio::SeqIO::entrezgene;
 121
 122 use strict;
 123 use Bio::ASN1::EntrezGene;
 124 use Bio::Seq;
 125 use Bio::Species;
 126 use Bio::Annotation::SimpleValue;
 127 use Bio::Annotation::DBLink;
 128 use Bio::Annotation::Comment;
 129 use Bio::SeqFeature::Generic;
 130 use Bio::Annotation::Reference;
 131 use Bio::SeqFeature::Gene::Exon;
 132 use Bio::SeqFeature::Gene::Transcript;
 133 use Bio::SeqFeature::Gene::GeneStructure;
 134 use Bio::Cluster::SequenceFamily;
 135
 136 #use Bio::Ontology::Ontology; Relationships.... later
 137 use Bio::Ontology::Term;
 138 use Bio::Annotation::OntologyTerm;
 139 use Data::Dumper;
 140 use base qw(Bio::SeqIO);
 141
 142 %main::eg_to_ll = (
 143     'Official Full Name' => 'OFFICIAL_GENE_NAME',
 144     'chromosome'         => 'CHR',
 145     'cyto'               => 'MAP',
 146     'Official Symbol'    => 'OFFICIAL_SYMBOL'
 147 );
 148 @main::egonly = keys %main::eg_to_ll;
 149
 150 # We define $xval and some other variables so we don't have
 151 # to pass them as arguments
 152 my ( $seq, $ann, $xval, %seqcollection, $buf );
 153
 154 sub _initialize {
 155     my ( $self, @args ) = @_;
 156     $self->SUPER::_initialize(@args);
 157     my %param = @args;
 158     @param{ map { lc $_ } keys %param } = values %param;    # lowercase keys
 159     $self->{_debug}          = $param{-debug}          || 'off';
 160     $self->{_locuslink}      = $param{-locuslink}      || 'no';
 161     $self->{_service_record} = $param{-service_record} || 'no';
 162     $self->{_parser} = Bio::ASN1::EntrezGene->new( file => $param{-file} );
 163
 164     #Instantiate the low level parser here (it is -file in Bioperl
 165     #-should tell M.)
 166     #$self->{_parser}->next_seq; #First empty record- bug in Bio::ASN::Parser
 167 }
 168
 169 sub next_seq {
 170     my $self  = shift;
 171     my $value = $self->{_parser}->next_seq(1);
 172
 173     # $value contains data structure for the
 174     # record being parsed. 2 indicates the recommended
 175     # trimming mode of the data structure
 176     #I use 1 as I prefer not to descend into size 0 arrays
 177     return unless ($value);
 178     my $debug = $self->{_debug};
 179     $self->{_ann}        = Bio::Annotation::Collection->new();
 180     $self->{_currentann} = Bio::Annotation::Collection->new();
 181     my @alluncaptured;
 182
 183     # parse the entry
 184     #my @keys=keys %{$value}; obsolete
 185     $xval = $value->[0];
 186
 187         #return unless ($xval->{gene}->{desc} eq 'albumin');
 188         #return new Bio::Seq (-id=>'Generif service record', -seq=>'')
 189         #    unless ($xval->{'track-info'}{geneid}== 283);
 190     return Bio::Seq->new( -id => 'Generif service record', -seq => '' )
 191       if (
 192         ( $self->{_service_record} ne 'yes' )
 193         && ( $xval->{gene}->{desc} =~
 194             /record to support submission of generifs for a gene not in entrez/i
 195         )
 196       );
 197
 198     #Basic data
 199     #$xval->{summary}=~s/\n//g;
 200     my $seq = Bio::Seq->new(
 201         -display_id       => $xval->{gene}{locus},
 202         -accession_number => $xval->{'track-info'}{geneid},
 203         -desc             => $xval->{summary}
 204     );
 205
 206     #Source data here
 207     $self->_add_to_ann( $xval->{'track-info'}->{status}, 'Entrez Gene Status' );
 208     my $lineage = $xval->{source}{org}{orgname}{lineage};
 209     $lineage =~ s/[\s\n]//g;
 210     my ( $comp, @lineage );
 211     while ($lineage) {
 212         ( $comp, $lineage ) = split( /;/, $lineage, 2 );
 213         unshift @lineage, $comp;
 214     }
 215     unless ( exists( $xval->{source}{org}{orgname}{name}{binomial} ) ) {
 216         shift @lineage;
 217         my ( $gen, $sp ) = split( /\s/, $xval->{source}{org}{taxname} );
 218         if ( ($sp) && ( $sp ne '' ) ) {
 219             if ( $gen =~ /plasmid/i ) {
 220                 $sp = $gen . $sp;
 221             }
 222             unshift @lineage, $sp;
 223         }
 224         else {
 225             unshift @lineage, 'unknown';
 226         }
 227     }
 228     else {
 229         my $sp = $xval->{source}{org}{orgname}{name}{binomial}{species};
 230         if ( ($sp) && ( $sp ne '' ) ) {
 231             my ( $spc, $strain ) = split( 'sp.', $sp );    #Do we need strain?
 232             $spc =~ s/\s//g;
 233             if ( ($spc) && ( $spc ne '' ) ) {
 234                 unshift @lineage, $spc;
 235             }
 236             else {
 237                 unshift @lineage, 'unknown';
 238             }
 239         }
 240         else {
 241             unshift @lineage, 'unknown';
 242         }
 243     }
 244
 245     #print Dumper($xval->{source}{org});
 246     my $ncbiid;
 247     if ( ref( $xval->{source}{org}{db} ) eq 'ARRAY' ) {
 248         foreach my $taxonomy ( @{ $xval->{source}{org}{db} } ) {
 249             if ( lc( $taxonomy->{db} ) eq 'taxon' ) {
 250                 $ncbiid = $taxonomy->{tag}{id};
 251             }
 252             else {
 253                 push @alluncaptured, $taxonomy;
 254             }
 255             delete $xval->{source}{org}{db};
 256         }
 257     }
 258     $ncbiid = $ncbiid || $xval->{source}{org}{db}{tag}{id};
 259     my $s1     = shift @lineage;
 260     my $s2     = shift @lineage;
 261     my $specie = Bio::Species->new(
 262         -classification => [ $s1, $s2 ],
 263         -ncbi_taxid     => $ncbiid
 264     );
 265     $specie->common_name( $xval->{source}{org}{common} );
 266     if ( exists( $xval->{source}->{subtype} )
 267         && ( $xval->{source}->{subtype} ) )
 268     {
 269         if ( ref( $xval->{source}->{subtype} ) eq 'ARRAY' ) {
 270             foreach my $subtype ( @{ $xval->{source}->{subtype} } ) {
 271                 $self->_add_to_ann( $subtype->{name}, $subtype->{subtype} );
 272             }
 273         }
 274         else {
 275             $self->_add_to_ann(
 276                 $xval->{source}->{subtype}->{name},
 277                 $xval->{source}->{subtype}->{subtype}
 278             );
 279         }
 280     }
 281
 282     #Synonyms
 283     if ( ref( $xval->{gene}->{syn} ) eq 'ARRAY' ) {
 284         foreach my $symsyn ( @{ $xval->{gene}->{syn} } ) {
 285             $self->_add_to_ann( $symsyn, 'ALIAS_SYMBOL' );
 286         }
 287     }
 288     else {
 289         $self->_add_to_ann( $xval->{gene}->{syn}, 'ALIAS_SYMBOL' )
 290           if ( $xval->{gene}->{syn} );
 291     }
 292
 293     #COMMENTS (STS not dealt with yet)
 294     if ( exists( $xval->{comments} ) ) {
 295         if ( ref( $xval->{comments} ) eq 'ARRAY' ) {
 296             for my $i ( 0 .. $#{ $xval->{comments} } ) {
 297                 $self->{_current} = $xval->{comments}->[$i];
 298                 push @alluncaptured, $self->_process_all_comments();
 299             }
 300         }
 301         else {
 302             $self->{_current} = $xval->{comments};
 303             push @alluncaptured, $self->_process_all_comments();
 304         }
 305     }
 306
 307     #Gene
 308     if ( exists( $xval->{gene}->{db} ) ) {
 309         if ( ref( $xval->{gene}->{db} ) eq 'ARRAY' ) {
 310             foreach my $genedb ( @{ $xval->{gene}->{db} } ) {
 311                 my $id =
 312                   exists( $genedb->{tag}->{id} )
 313                   ? $genedb->{tag}->{id}
 314                   : $genedb->{tag}->{str};
 315                 $self->_add_to_ann( $id, $genedb->{db} );
 316             }
 317         }
 318         else {
 319             my $id =
 320               ( $xval->{gene}->{db}->{tag}->{id} )
 321               ? $xval->{gene}->{db}->{tag}->{id}
 322               : $xval->{gene}->{db}->{tag}->{str};
 323             $self->_add_to_ann( $id, $xval->{gene}->{db}->{db} );
 324         }
 325         $self->_add_to_ann( $xval->{gene}->{'locus-tag'}, 'LOCUS_SYNONYM' );
 326         delete $xval->{gene}->{db} unless ( $debug eq 'off' );
 327     }
 328
 329     #LOCATION To do: uncaptured stuff
 330     if ( exists( $xval->{location} ) ) {
 331         if ( ref( $xval->{location} ) eq 'ARRAY' ) {
 332             foreach my $loc ( @{ $xval->{location} } ) {
 333                 $self->_add_to_ann( $loc->{'display-str'},
 334                     $loc->{method}->{'map-type'} );
 335             }
 336         }
 337         else {
 338             $self->_add_to_ann(
 339                 $xval->{location}->{'display-str'},
 340                 $xval->{location}->{method}->{'map-type'}
 341             );
 342         }
 343         delete $xval->{location} unless ( $debug eq 'off' );
 344     }
 345
 346     #LOCUS
 347     if ( ref( $xval->{locus} ) eq 'ARRAY' ) {
 348         foreach my $locus ( @{ $xval->{locus} } ) {
 349             $self->{_current} = $locus;
 350             push @alluncaptured, $self->_process_locus();
 351         }
 352     }
 353     else {
 354         push @alluncaptured, $self->_process_locus( $xval->{locus} );
 355     }
 356
 357     #Homology
 358     my ( $uncapt, $hom, $anchor ) = _process_src( $xval->{homology}->{source} );
 359     foreach my $homann (@$hom) {
 360         $self->{_ann}->add_Annotation( 'dblink', $homann );
 361     }
 362     push @alluncaptured, $uncapt;
 363
 364     #Index terms
 365     if (   ( exists( $xval->{'xtra-index-terms'} ) )
 366         && ( $xval->{'xtra-index-terms'} ) )
 367     {
 368         if ( ref( $xval->{'xtra-index-terms'} ) eq 'ARRAY' ) {
 369             foreach my $term ( @{ $xval->{'xtra-index-terms'} } ) {
 370                 $self->_add_to_ann( $term, 'Index terms' );
 371             }
 372         }
 373         else {
 374             $self->_add_to_ann( $xval->{'xtra-index-terms'}, 'Index terms' );
 375         }
 376     }
 377
 378     #PROPERTIES
 379     my @prop;
 380     if ( exists( $xval->{properties} ) ) {
 381         if ( ref( $xval->{properties} ) eq 'ARRAY' ) {
 382             foreach my $property ( @{ $xval->{properties} } ) {
 383                 push @alluncaptured, $self->_process_prop($property);
 384             }
 385         }
 386         else {
 387             push @alluncaptured, $self->_process_prop( $xval->{properties} );
 388         }
 389     }
 390     $seq->annotation( $self->{_ann} );
 391     $seq->species($specie);
 392     my @seqs;
 393     foreach my $key ( keys %seqcollection )
 394     {    #Optimize this, no need to go through hash?
 395         push @seqs, @{ $seqcollection{$key} };
 396     }
 397     my $cluster = Bio::Cluster::SequenceFamily->new(
 398         -family_id   => $seq->accession_number,
 399         -description => "Entrez Gene " . $seq->accession_number,
 400         -members     => \@seqs
 401     );    #Our EntrezGene object
 402           #clean
 403     unless ( $debug eq 'off' ) {
 404         delete $xval->{homology}->{source};
 405         delete( $xval->{summary} );
 406         delete( $xval->{'track-info'} );
 407         delete( $xval->{gene}{locus} );
 408         delete( $xval->{source}{org}{orgname}{lineage} );
 409         delete $xval->{source}{org}{orgname}{name}{binomial}{species};
 410         delete $xval->{gene}{syn};
 411         delete $xval->{source}->{subtype};
 412         delete $xval->{comments};
 413         delete $xval->{properties};
 414         delete $xval->{'xtra-index-terms'};
 415         delete $xval->{status};
 416     }
 417     push @alluncaptured, $xval;
 418     undef %seqcollection;
 419     $seq->annotation( _backcomp_ll( $self->{_ann} ) )
 420       if ( $self->{_locuslink} eq 'convert' );    #Fix this!
 421     return
 422       wantarray
 423       ? ( $seq, $cluster, \@alluncaptured )
 424       : $seq;                                     #Hilmar's suggestion
 425 }
 426
 427 sub _process_refseq {
 428     my $self     = shift;
 429     my $products = shift;
 430     my $ns       = shift;
 431     my $iter     = shift;
 432     $iter++;
 433     my $pid;
 434     my ( @uncaptured, @products );
 435     if ( ref($products) eq 'ARRAY' ) { @products = @{$products}; }
 436     else                             { push @products, $products; }
 437
 438     foreach my $product (@products) {
 439         if ( ( ref($product) eq 'ARRAY' ) && ( $#{$product} > -1 ) ) {
 440             $self->_process_refseq( $product, $ns, $iter );
 441             next;
 442         }
 443         if (
 444             (
 445                 exists( $product->{products} )
 446                 && ( !exists( $product->{accession} ) )
 447             )
 448           )
 449         {
 450             $self->_process_refseq( $product->{products}, $ns );
 451             next;
 452         }
 453
 454         #if ((exists($product->{products})&&($product->{products}))) {
 455         #       $self->_process_refseq($product->{products},$ns,$iter);
 456         #}
 457         if (   ( exists( $product->{seqs}->{whole}->{gi} ) )
 458             && ( ref( $product->{seqs}->{whole}->{gi} ) eq 'ARRAY' ) )
 459         {
 460             $product->{seqs}->{whole}->{gi} =
 461               $product->{seqs}->{whole}->{gi}->[0];
 462         }    #Lose some data
 463         if (   ( exists( $product->{seqs}->{whole}->{gi} ) )
 464             || ( exists( $product->{accession} ) ) )
 465         {    #Minimal data required
 466             my $cann = Bio::Annotation::Collection->new();
 467             $pid = $product->{accession};
 468             my $authority =
 469               exists( $product->{type} )
 470               ? $product->{type}
 471               : $product->{heading};
 472             my $nseq = Bio::Seq->new(
 473                 -accession_number => $product->{seqs}->{whole}->{gi},
 474                 -display_id       => $product->{accession},
 475                 -authority        => $authority,
 476                 -namespace        => $ns
 477             );
 478             if ( exists( $product->{source} ) && ( $product->{source} ) ) {
 479                 if (   ( !defined( $nseq->authority ) )
 480                     && ( exists( $product->{source}->{src} ) )
 481                     && ( exists( $product->{source}->{src}->{db} ) ) )
 482                 {
 483                     $nseq->authority( $product->{source}->{src}->{db} );
 484                 }
 485                 my ( $uncapt, $allann ) = _process_src( $product->{source} );
 486                 push @uncaptured, $uncapt;
 487                 delete $product->{source};
 488                 foreach my $annotation ( @{$allann} ) {
 489                     $cann->add_Annotation( 'dblink', $annotation );
 490                 }
 491             }
 492
 493             delete $product->{seqs}->{whole}->{gi};
 494             delete $product->{accession};
 495             delete $product->{source};
 496             delete $product->{heading};
 497             my ( $uncapt, $ann, $cfeat ) =
 498               $self->_process_comments( $product->{comment} )
 499               if ( exists( $product->{comment} ) );
 500             push @uncaptured, $uncapt;
 501             foreach my $feat ( @{$cfeat} ) {
 502                 $nseq->add_SeqFeature($feat);
 503             }
 504             if ( exists( $product->{products} ) && ( $product->{products} ) ) {
 505                 my ( $uncapt, $prodid ) =
 506                   $self->_process_refseq( $product->{products} );
 507                 push @uncaptured, $uncapt;
 508                 my $simann = Bio::Annotation::SimpleValue->new(
 509                     -value   => $prodid,
 510                     -tagname => 'product'
 511                 );
 512                 $cann->add_Annotation($simann);
 513             }
 514             foreach my $key ( keys %$ann ) {
 515                 foreach my $val ( @{ $ann->{$key} } ) {
 516                     $cann->add_Annotation( $key, $val );
 517                 }
 518             }
 519             $nseq->annotation($cann);
 520             push @{ $seqcollection{seq} }, $nseq;
 521         }
 522     }
 523     undef @products;
 524     undef $products;
 525
 526     #my $ti2=new Benchmark;
 527     # my $td= timediff($ti2, $ti1);
 528     #        print "\tITER $iter:",timestr($td),"\n";
 529     return \@uncaptured, $pid, $seqcollection{seq}, $iter;
 530 }
 531
 532 sub _process_links {
 533     my $self  = shift;
 534     my $links = shift;
 535     my ( @annot, @uncapt );
 536     if ( ref($links) eq 'ARRAY' ) {
 537         foreach my $link (@$links) {
 538             my ( $uncapt, $annot ) = _process_src( $link->{source} )
 539               if ( exists( $link->{source} ) );
 540             push @uncapt, $uncapt;
 541             foreach my $annotation (@$annot) {
 542                 $self->{_ann}->add_Annotation( 'dblink', $annotation );
 543             }
 544         }
 545     }
 546     else {
 547         my ( $uncapt, $annot ) = _process_src( $links->{source} )
 548           if ( exists( $links->{source} ) );
 549         push @uncapt, $uncapt;
 550         foreach my $annotation (@$annot) {
 551             $self->{_ann}->add_Annotation( 'dblink', $annotation );
 552         }
 553     }
 554     return @uncapt;
 555 }
 556
 557 sub _add_to_ann {    #Highest level only
 558     my ( $self, $val, $tag ) = @_;
 559
 560   #  $val=~s/\n//g;#Low level EG parser leaves this so we take care of them here
 561     unless ($tag) {
 562         $self->warn( "No tagname for value $val, tag $tag " . $seq->id . "\n" );
 563         return;
 564     }
 565     my $simann =
 566       Bio::Annotation::SimpleValue->new( -value => $val, -tagname => $tag );
 567     $self->{_ann}->add_Annotation($simann);
 568 }
 569
 570 sub _process_comments {
 571     my $self = shift;
 572     my $prod = shift;
 573     my ( %cann, @feat, @uncaptured, @comments, @sfann );
 574     if ( ( ref($prod) eq 'HASH' ) && ( exists( $prod->{comment} ) ) ) {
 575         $prod = $prod->{comment};
 576     }
 577     if ( ref($prod) eq 'ARRAY' ) {
 578         @comments = @{$prod};
 579     }
 580     else {
 581         push @comments, $prod;
 582     }
 583     my $i = 0;
 584     for my $comm (@comments) {    # Each comments is a hash reference
 585         $self->throw("Comment not a hash reference")
 586           unless ref($comm) eq 'HASH';
 587         my ( $desc, $nfeat, $add, @ann, @comm );
 588
 589 # next unless (exists($comm->{comment}));#Should be more careful when calling _process_comment:To do
 590         my $heading = $comm->{heading} || 'description';
 591         if ( !exists( $comm->{comment} ) ) {
 592             if (   ( exists( $comm->{type} ) )
 593                 && ( $comm->{type} )
 594                 && ( $self->{_current_heading} ) )
 595             {
 596                 $comm->{type} = $self->{_current_heading};
 597             }
 598             if (   ( exists( $comm->{source} ) )
 599                 && ( exists( $comm->{type} ) )
 600                 && ( exists( $comm->{text} ) )
 601                 && ( $comm->{type} ne 'comment' ) )
 602             {
 603                 my ( $uncapt, $annot, $anchor ) =
 604                   _process_src( $comm->{source} );
 605                 my $cann = shift(@$annot);
 606                 if ( defined $cann ) {
 607                     $cann->optional_id( $comm->{text} );
 608                     $cann->authority( $comm->{type} );
 609                     $cann->version( $comm->{version} );
 610                     push @sfann, $cann;
 611                 }
 612             }
 613         }
 614
 615         while (ref($comm) eq 'HASH'
 616             && ( exists( $comm->{comment} ) )
 617             && ( $comm->{comment} ) )
 618         {
 619             if ( ( exists( $comm->{source} ) ) && ( $comm->{source} ) ) {
 620                 my ( $uncapt, $allann, $anchor ) =
 621                   _process_src( $comm->{source} );
 622                 if ($allann) {
 623                     delete $comm->{source};
 624                     push @uncaptured, $uncapt;
 625                     foreach my $annotation ( @{$allann} ) {
 626                         if ( $annotation->{_anchor} ) {
 627                             $desc .= $annotation->{_anchor} . ' ';
 628                         }
 629                         $annotation->optional_id($heading);
 630                         push @sfann, $annotation;
 631                         push @{ $cann{'dblink'} }, $annotation;
 632                     }
 633                 }
 634             }
 635
 636             $comm = $comm->{comment};
 637
 638             if ( ref($comm) eq 'ARRAY' ) {
 639                 @comm = @{$comm};
 640             }
 641             else {
 642                 push @comm, $comm if ($comm);
 643             }
 644
 645             foreach my $ccomm (@comm) {
 646                 next unless ($ccomm);
 647                 if ( exists( $ccomm->{source} ) ) {
 648                     my ( $uncapt, $allann, $anchor ) =
 649                       _process_src( $ccomm->{source} );
 650                     if ($allann) {
 651                         @sfann = @{$allann};
 652                         delete $ccomm->{source};
 653                         push @uncaptured, $uncapt;
 654                     }
 655                 }
 656                 $ccomm = $ccomm->{comment}
 657                   if ( exists( $ccomm->{comment} ) );    #Alice in Wonderland???
 658                 my @loc;
 659                 if ($ccomm) {
 660                     if ( ref($ccomm) eq 'ARRAY' ) {
 661                         @loc = @{$ccomm};
 662                     }
 663                     else {
 664                         push @loc, $ccomm;
 665                     }
 666                 }
 667                 foreach my $loc (@loc) {
 668                     if (   ( exists( $loc->{text} ) )
 669                         && ( $loc->{text} =~ /Location/i ) )
 670                     {
 671                         my ( $l1, $rest ) = split( /-/, $loc->{text} );
 672                         $l1   =~ s/\D//g;
 673                         $rest =~ s/^\s//;
 674                         my ( $l2, $scorestr ) = split( /\s/, $rest, 2 );
 675                         my ( $scoresrc, $score ) = split( /:/, $scorestr );
 676                         $score =~ s/\D//g;
 677                         my ( %tags, $tag );
 678                         unless ($l1) {
 679                             next;
 680                         }
 681                         $nfeat = Bio::SeqFeature::Generic->new(
 682                             -start   => $l1,
 683                             -end     => $l2,
 684                             -strand  => $tags{strand},
 685                             -source  => $loc->{type},
 686                             -seq_id  => $desc,
 687                             -primary => $heading,
 688                             -score   => $score,
 689                             -tag     => { score_src => $scoresrc }
 690                         );
 691                         my $sfeatann = Bio::Annotation::Collection->new();
 692                         foreach my $sfann (@sfann) {
 693                             $sfeatann->add_Annotation( 'dblink', $sfann );
 694                         }
 695                         undef @sfann;
 696                         $nfeat->annotation($sfeatann)
 697                           ; #Thus the annotation will be available both in the seq and seqfeat?
 698                         push @feat, $nfeat;
 699                         delete $loc->{text};
 700                         delete $loc->{type};
 701                     }
 702                     elsif ( exists( $loc->{label} ) ) {
 703                         my $simann = Bio::Annotation::SimpleValue->new(
 704                             -value   => $loc->{text},
 705                             -tagname => $loc->{label}
 706                         );
 707                         delete $loc->{text};
 708                         delete $loc->{label};
 709                         push @{ $cann{'simple'} }, $simann;
 710                         push @uncaptured, $loc;
 711                     }
 712                     elsif ( exists( $loc->{text} ) ) {
 713                         my $simann = Bio::Annotation::SimpleValue->new(
 714                             -value   => $loc->{text},
 715                             -tagname => $heading
 716                         );
 717                         delete $loc->{text};
 718                         push @{ $cann{'simple'} }, $simann;
 719                         push @uncaptured, $loc;
 720                     }
 721                 }
 722             }    #Bit clumsy but that's what we get from the low level parser
 723         }
 724         $i++;
 725     }
 726     if (@sfann) {
 727         push @{ $cann{'dblink'} }, @sfann;
 728     }    #Annotation that is not location specific, for example phenotype
 729          #undef $self->{_current_heading};
 730     return \@uncaptured, \%cann, \@feat;
 731 }
 732
 733 sub _process_src {
 734     my $src = shift;
 735
 736     #Trick we do because sometimes we have an array ref
 737     my ( @ann, $anch, @uncapt );
 738     if ( ref($src) eq 'ARRAY' ) {
 739         foreach my $msrc (@$src) {
 740             my ( $uncapt, $ann, $anchor ) = _process_src($msrc);
 741             push @ann,    @$ann;
 742             push @uncapt, $uncapt;
 743             $anch = $anchor;
 744         }
 745         return \@uncapt, \@ann, $anch;
 746     }
 747     return unless ( exists( $src->{src}->{tag} ) );
 748
 749     #my $t0=new Benchmark
 750     my $db = $src->{src}->{db};
 751     delete $src->{src}->{db};
 752     my $anchor = $src->{anchor} || '';
 753     delete $src->{anchor};
 754     my $url;
 755     if ( exists( $src->{url} ) && ( $src->{url} ) ) {
 756         $url = $src->{url};
 757         $url =~ s/\n//g;
 758         delete $src->{url};
 759     }
 760     if (   ( exists( $src->{src}->{tag}->{str} ) )
 761         && ( $src->{src}->{tag}->{str} ) )
 762     {
 763         my @sq = split( /[,;]/, $src->{src}->{tag}->{str} );
 764         delete $src->{src}->{tag};
 765         foreach my $id (@sq) {
 766             $id =~ s/\n//g;
 767             undef $anchor if ( $anchor eq 'id' );
 768             my $simann = Bio::Annotation::DBLink->new(
 769                 -database   => $db,
 770                 -primary_id => $id,
 771                 -authority  => $src->{heading}
 772             );
 773             $simann->url($url) if ($url);    #DBLink should have URL!
 774             push @ann, $simann;
 775         }
 776     }
 777     else {
 778         my $id = $src->{src}->{tag}->{id} || '';
 779         delete $src->{src}->{tag};
 780         undef $anchor if ( $anchor eq 'id' );
 781         $id =~ s/\n//g;
 782         my $simann = Bio::Annotation::DBLink->new(
 783             -database   => $db,
 784             -primary_id => $id,
 785             -authority  => $src->{heading}
 786         );
 787         if ($anchor) {
 788             $simann->{_anchor} = $anchor;
 789             $simann->optional_id($anchor);
 790         }
 791         $simann->url($url) if ($url);    #DBLink should have URL!
 792         push @ann, $simann;
 793     }
 794
 795     #my $t1=new Benchmark;
 796     #my $td= timediff($t1, $t0);
 797     #print "\t\tSRC:",timestr($td),"\n";
 798     return $src, \@ann, $anchor;
 799 }
 800
 801 sub _add_references {
 802     my $self = shift;
 803     my $refs = shift;
 804     if ( ref($refs) eq 'ARRAY' ) {
 805         foreach my $ref (@$refs) {
 806             my $refan = Bio::Annotation::Reference->new(
 807                 -database   => 'Pubmed',
 808                 -primary_id => $ref
 809             );
 810             $self->{_ann}->add_Annotation( 'Reference', $refan );
 811         }
 812     }
 813     else {
 814         my $refan = Bio::Annotation::Reference->new(
 815             -database   => 'Pubmed',
 816             -primary_id => $refs
 817         );
 818         $self->{_ann}->add_Annotation( 'Reference', $refan );
 819     }
 820 }
 821
 822 #Should we do this at all if no seq coord are present?
 823 sub _process_locus {
 824     my $self = shift;
 825     my @uncapt;
 826     return $self
 827       unless ( exists( $self->{_current}->{accession} )
 828         && ( $self->{_current}->{accession} ) );
 829     my $gseq = Bio::Seq->new(
 830         -display_id       => $self->{_current}->{accession},
 831         -version          => $self->{_current}->{version},
 832         -accession_number => $self->{_current}->{seqs}->{'int'}->{id}->{gi},
 833         -authority        => $self->{_current}->{type},
 834         -namespace        => $self->{_current}->{heading}
 835     );
 836     delete $self->{_current}->{accession};
 837     delete $self->{_current}->{version};
 838     delete $self->{_current}->{'int'}->{id}->{gi};
 839     my ( $start, $end, $strand );
 840
 841     if ( exists( $self->{_current}->{seqs}->{'int'}->{from} ) ) {
 842         $start = $self->{_current}->{seqs}->{'int'}->{from};
 843         delete $self->{_current}->{seqs}->{'int'}->{from};
 844
 845       #unless ($start) {print $locus->{seqs}->{'int'}->{from},"\n",$locus,"\n";}
 846         $end = $self->{_current}->{seqs}->{'int'}->{to};
 847         delete $self->{_current}->{seqs}->{'int'}->{to};
 848         delete $self->{_current}->{seqs}->{'int'}->{strand};
 849         $strand =
 850           $self->{_current}->{seqs}->{'int'}->{strand} eq 'minus' ? -1 : 1
 851           if ( exists( $self->{_current}->{seqs}->{'int'}->{strand} ) )
 852           ;    #1 being default
 853         my $nfeat = Bio::SeqFeature::Generic->new(
 854             -start  => $start,
 855             -end    => $end,
 856             -strand => $strand,
 857             primary => 'gene location'
 858         );
 859         $gseq->add_SeqFeature($nfeat);
 860     }
 861     my @products;
 862     if ( ref( $self->{_current}->{products} ) eq 'ARRAY' ) {
 863         @products = @{ $self->{_current}->{products} };
 864     }
 865     else {
 866         push @products, $self->{_current}->{products};
 867     }
 868     delete $self->{_current}->{products};
 869     my $gstruct = Bio::SeqFeature::Gene::GeneStructure->new();
 870     foreach my $product (@products) {
 871         my ( $tr, $uncapt ) =
 872           _process_products_coordinates( $product, $start, $end, $strand );
 873         $gstruct->add_transcript($tr) if ($tr);
 874         undef $tr->{parent};    #Because of a cycleG
 875         push @uncapt, $uncapt;
 876     }
 877     $gseq->add_SeqFeature($gstruct);
 878     push @{ $seqcollection{genestructure} }, $gseq;
 879     return @uncapt;
 880 }
 881
 882 =head1 _process_products_coordinates
 883
 884 To do:
 885
 886 =cut
 887
 888 sub _process_products_coordinates {
 889     my $coord = shift;
 890     my $start = shift
 891       || 0;    #In case it is not known: should there be an entry at all?
 892     my $end    = shift || 1;
 893     my $strand = shift || 1;
 894     my ( @coords, @uncapt );
 895     return unless ( exists( $coord->{accession} ) );
 896     my $transcript = Bio::SeqFeature::Gene::Transcript->new(
 897         -primary => $coord->{accession},    #Desc is actually non functional...
 898         -start   => $start,
 899         -end     => $end,
 900         -strand  => $strand,
 901         -desc    => $coord->{type}
 902     );
 903
 904     if (   ( exists( $coord->{'genomic-coords'}->{mix}->{'int'} ) )
 905         || ( exists( $coord->{'genomic-coords'}->{'packed-int'} ) ) )
 906     {
 907         @coords =
 908           exists( $coord->{'genomic-coords'}->{mix}->{'int'} )
 909           ? @{ $coord->{'genomic-coords'}->{mix}->{'int'} }
 910           : @{ $coord->{'genomic-coords'}->{'packed-int'} };
 911         foreach my $exon (@coords) {
 912             next unless ( exists( $exon->{from} ) );
 913             my $exonobj = Bio::SeqFeature::Gene::Exon->new(
 914                 -start  => $exon->{from},
 915                 -end    => $exon->{to},
 916                 -strand => $strand
 917             );
 918             $transcript->add_exon($exonobj);
 919             delete $exon->{from};
 920             delete $exon->{to};
 921             delete $exon->{strand};
 922             push @uncapt, $exon;
 923         }
 924     }
 925     my ( $prot, $uncapt );
 926     if ( exists( $coord->{products} ) ) {
 927         my ( $prot, $uncapt ) =
 928           _process_products_coordinates( $coord->{products},
 929             $start, $end, $strand );
 930         $transcript->add_SeqFeature($prot);
 931         push @uncapt, $uncapt;
 932     }
 933     return $transcript, \@uncapt;
 934 }
 935
 936 =head1 _process_prop
 937
 938 To do: process GO
 939
 940 =cut
 941
 942 sub _process_prop {
 943     my $self = shift;
 944     my $prop = shift;
 945     my @uncapt;
 946     if ( exists( $prop->{properties} ) ) {    #Iterate
 947         if ( ref( $prop->{properties} ) eq 'ARRAY' ) {
 948             foreach my $propn ( @{ $prop->{properties} } ) {
 949                 push @uncapt, $self->_process_prop($propn);
 950             }
 951         }
 952         else {
 953             push @uncapt, $self->_process_prop( $prop->{properties} );
 954         }
 955     }
 956     unless ( ( exists( $prop->{heading} ) )
 957         && ( $prop->{heading} eq 'GeneOntology' ) )
 958     {
 959         $self->_add_to_ann( $prop->{text}, $prop->{label} )
 960           if ( exists( $prop->{text} ) );
 961         delete $prop->{text};
 962         delete $prop->{label};
 963         push @uncapt, $prop;
 964         return \@uncapt;
 965     }
 966
 967     #Will do GO later
 968     if ( exists( $prop->{comment} ) ) {
 969         push @uncapt, $self->_process_go( $prop->{comment} );
 970     }
 971 }
 972
 973 sub _process_all_comments {
 974     my $self    = shift;
 975     my $product = $self->{_current};    #Better without copying
 976     my @alluncaptured;
 977     my $heading = $product->{heading} if ( exists( $product->{heading} ) );
 978     if ($heading) {
 979
 980         #my $tx1=new Benchmark;
 981         delete $product->{heading};
 982       CLASS: {
 983             if ( $heading =~ 'RefSeq Status' )
 984             {    #IN case NCBI changes slightly the spacing:-)
 985                 $self->_add_to_ann( $product->{label}, 'RefSeq status' );
 986                 last CLASS;
 987             }
 988             if ( $heading =~ 'NCBI Reference Sequences' )
 989             {    #IN case NCBI changes slightly the spacing:-)
 990                 if (   ( exists( $product->{comment} ) )
 991                     && ( !exists( $product->{products} ) ) )
 992                 {
 993                     $product->{products} = $product->{comment};
 994                 }
 995
 996 #unless (($product->{products})&&(exists($product->{comment}))) {
 997 #if (ref ($product->{comment}) eq 'ARRAY') {
 998 #       foreach my $pc (@{$product->{comment}}) {
 999 #               push @{$product->{products}},$pc->{products};
1000 #       }
1001 #}
1002 #else {
1003 #       $product->{products}=exists($product->{comments}->{products})?$product->{comments}->{products}:$product->{comment};
1004 #}
1005 #}
1006                 my @uncaptured =
1007                   $self->_process_refseq( $product->{products}, 'refseq' );
1008                 push @alluncaptured, @uncaptured;
1009                 last CLASS;
1010             }
1011             if (   ( $heading =~ 'Related Sequences' )
1012                 && ( exists( $product->{products} ) ) )
1013             {    #IN case NCBI changes slightly the spacing:-)
1014                 my @uncaptured = $self->_process_refseq( $product->{products} );
1015                 push @alluncaptured, @uncaptured;
1016                 last CLASS;
1017             }
1018             if (   ( $heading =~ 'Additional Links' )
1019                 && ( exists( $product->{comment} ) ) )
1020             {    #IN case NCBI changes slightly the spacing:-)
1021                 push @alluncaptured,
1022                   $self->_process_links( $product->{comment} );
1023                 last CLASS;
1024             }
1025             if ( $heading =~ 'LocusTagLink' )
1026             {    #IN case NCBI changes slightly the spacing:-)
1027                 $self->_add_to_ann(
1028                     $product->{source}->{src}->{tag}->{id},
1029                     $product->{source}->{src}->{db}
1030                 );
1031                 last CLASS;
1032             }
1033             if (   ( $heading =~ 'Sequence Tagged Sites' )
1034                 && ( exists( $product->{comment} ) ) )
1035             {    #IN case NCBI changes slightly the spacing:-)
1036                 push @alluncaptured, $self->_process_STS( $product->{comment} );
1037                 delete $product->{comment};
1038                 last CLASS;
1039             }
1040             if ( $heading =~ 'Pathways' ) {
1041                 $self->{_current_heading} = 'Pathways';
1042                 last CLASS;
1043             }
1044         }
1045
1046         # my $tx2=new Benchmark;
1047         # my $td= timediff($tx2, $tx1);
1048         #print "\t\t$heading:",timestr($td),"\n";
1049
1050     }
1051     if ( exists( $product->{type} ) && ( $product->{type} eq 'generif' ) ) {
1052         push @alluncaptured, $self->_process_grif($product);
1053         return @alluncaptured;    #Maybe still process the comments?
1054     }
1055     if ( exists( $product->{refs} ) ) {
1056         $self->_add_references( $product->{refs}->{pmid} );
1057         delete $product->{refs}->{pmid};
1058         push @alluncaptured, $product;
1059     }
1060     if ( exists( $product->{comment} ) ) {
1061         my ( $uncapt, $allan, $allfeat ) =
1062           $self->_process_comments( $product->{comment} );
1063         foreach my $key ( keys %$allan ) {
1064             foreach my $val ( @{ $allan->{$key} } ) {
1065                 $self->{_ann}->add_Annotation( $key, $val );
1066             }
1067         }
1068         delete $product->{refs}->{comment};
1069         push @alluncaptured, $uncapt;
1070     }
1071
1072     #if (exists($product->{source})) {
1073     #    my ($uncapt,$ann,$anchor)=_process_src($product->{source});
1074     #    foreach my $dbl (@$ann) {
1075     #        $self->{_ann}->add_Annotation('dblink',$dbl);
1076     #    }
1077     #}
1078     return @alluncaptured;
1079 }
1080
1081 sub _process_STS {
1082     my $self    = shift;
1083     my $comment = shift;
1084     my @comm;
1085     push @comm, ( ref($comment) eq 'ARRAY' ) ? @{$comment} : $comment;
1086     foreach my $product (@comm) {
1087         my $sts = Bio::Ontology::Term->new(
1088             -identifier => $product->{source}->{src}->{tag}->{id},
1089             -name       => $product->{source}->{anchor},
1090             -comment    => $product->{source}->{'post-text'}
1091         );
1092         $sts->namespace( $product->{source}->{src}->{db} );
1093         $sts->authority('STS marker');
1094         my @alt;
1095         if ( exists( $product->{comment} ) ) {
1096             push @alt,
1097               ( ref( $product->{comment} ) eq 'ARRAY' )
1098               ? @{ $product->{comment} }
1099               : $product->{comment};
1100             foreach my $alt (@alt) {
1101                 $sts->add_synonym( $alt->{text} );
1102             }
1103         }
1104         my $annterm = Bio::Annotation::OntologyTerm->new();
1105         $annterm->term($sts);
1106         $self->{_ann}->add_Annotation( 'OntologyTerm', $annterm );
1107     }
1108 }
1109
1110 sub _process_go {
1111     my $self = shift;
1112     my $comm = shift;
1113     my @comm;
1114     push @comm, ( ref($comm) eq 'ARRAY' ) ? @{$comm} : $comm;
1115     foreach my $comp (@comm) {
1116         my $category = $comp->{label};
1117         if ( ref( $comp->{comment} ) eq 'ARRAY' ) {
1118             foreach my $go ( @{ $comp->{comment} } ) {
1119                 my $term = _get_go_term( $go, $category );
1120                 my $annterm = Bio::Annotation::OntologyTerm->new(
1121                     -tagname => 'Gene Ontology' );
1122                 $annterm->term($term);
1123                 $self->{_ann}->add_Annotation( 'OntologyTerm', $annterm );
1124             }
1125         }
1126         else {
1127             my $term = _get_go_term( $comp->{comment}, $category );
1128             my $annterm =
1129               Bio::Annotation::OntologyTerm->new( -tagname => 'Gene Ontology' );
1130             $annterm->term($term);
1131             $self->{_ann}->add_Annotation( 'OntologyTerm', $annterm );
1132         }
1133     }
1134 }
1135
1136 sub _process_grif {
1137     my $self = shift;
1138     my $grif = shift;
1139     if (   ( exists( $grif->{comment} ) )
1140         && ( ref( $grif->{comment} ) eq 'ARRAY' ) )
1141     {
1142         my @uncapt;
1143         foreach my $product ( @{ $grif->{comment} } ) {
1144             next unless ( exists( $product->{text} ) );
1145             my $uproduct = $self->_process_grif($product);
1146
1147             #$self->{_ann->add_Annotation($type,$grifobj);
1148             push @uncapt, $uproduct;
1149         }
1150         return \@uncapt;
1151     }
1152     if ( exists( $grif->{comment}->{comment} ) ) {
1153         $grif = $grif->{comment};
1154     }
1155     my $ref =
1156       ( ref( $grif->{refs} ) eq 'ARRAY' )
1157       ? shift @{ $grif->{refs} }
1158       : $grif->{refs};
1159     my $refergene = '';
1160     my $refdb     = '';
1161     my ( $obj, $type );
1162     if ( $ref->{pmid} ) {
1163         if ( exists( $grif->{source} ) )
1164         {    #unfortunatrely we cannot put yet everything in
1165             $refergene = $grif->{source}->{src}->{tag}->{id};
1166             $refdb     = $grif->{source}->{src}->{db};
1167         }
1168         my $grifobj = new Bio::Annotation::Comment( -text => $grif->{text} );
1169         $obj = Bio::Annotation::DBLink->new(
1170             -database   => 'generif',
1171             -primary_id => $ref->{pmid}
1172             , #The pubmed id (at least the first one) which is a base for the conclusion
1173             -version     => $grif->{version},
1174             -optional_id => $refergene,
1175             -authority   => $refdb
1176         );
1177         $obj->comment($grifobj);
1178         $type = 'dblink';
1179     }
1180     else {
1181         $obj = new Bio::Annotation::SimpleValue( $grif->{text}, 'generif' );
1182         $type = 'generif';
1183     }
1184     delete $grif->{text};
1185     delete $grif->{version};
1186     delete $grif->{type};
1187     delete $grif->{refs};
1188     $self->{_ann}->add_Annotation( $type, $obj );
1189     return $grif;
1190 }
1191
1192 sub _get_go_term {
1193     my $go       = shift;
1194     my $category = shift;
1195     my $refan    = Bio::Annotation::Reference->new(    #We expect one ref per GO
1196         -medline => $go->{refs}->{pmid},
1197         -title   => 'no title'
1198     );
1199     my $term = Bio::Ontology::Term->new(
1200         -identifier => $go->{source}->{src}->{tag}->{id},
1201         -name       => $go->{source}->{anchor},
1202         -definition => $go->{source}->{anchor},
1203         -comment    => $go->{source}->{'post-text'},
1204         -version    => $go->{version}
1205     );
1206     $term->add_reference($refan);
1207     $term->namespace($category);
1208     return $term;
1209 }
1210
1211 sub _backcomp_ll {
1212     my $ann    = shift;
1213     my $newann = Bio::Annotation::Collection->new();
1214
1215     #$newann->{_annotation}->{ALIAS_SYMBOL}=$ann->{_annotation}->{ALIAS_SYMBOL};
1216     # $newann->{_annotation}->{CHR}=$ann->{_annotation}->{chromosome};
1217     # $newann->{_annotation}->{MAP}=$ann->{_annotation}->{cyto};
1218     foreach my $tagmap ( keys %{ $ann->{_typemap}->{_type} } ) {
1219         next if ( grep( /$tagmap/, @main::egonly ) );
1220         $newann->{_annotation}->{$tagmap} = $ann->{_annotation}->{$tagmap};
1221     }
1222
1223 #$newann->{_annotation}->{Reference}=$ann->{_annotation}->{Reference};
1224 #$newann->{_annotation}->{generif}=$ann->{_annotation}->{generif};
1225 #$newann->{_annotation}->{comment}=$ann->{_annotation}->{comment};
1226 # $newann->{_annotation}->{OFFICIAL_GENE_NAME}=$ann->{_annotation}->{'Official Full Name'};
1227     $newann->{_typemap}->{_type} = $ann->{_typemap}->{_type};
1228     foreach my $ftype ( keys %main::eg_to_ll ) {
1229         my $newkey = $main::eg_to_ll{$ftype};
1230         $newann->{_annotation}->{$newkey} = $ann->{_annotation}->{$ftype};
1231         $newann->{_typemap}->{_type}->{$newkey} =
1232           'Bio::Annotation::SimpleValue';
1233         delete $newann->{_typemap}->{_type}->{$ftype};
1234         $newann->{_annotation}->{$newkey}->[0]->{tagname} = $newkey;
1235     }
1236     foreach my $dblink ( @{ $newann->{_annotation}->{dblink} } ) {
1237         next unless ( $dblink->{_url} );
1238         my $simann = Bio::Annotation::SimpleValue->new(
1239             -value   => $dblink->{_url},
1240             -tagname => 'URL'
1241         );
1242         $newann->add_Annotation($simann);
1243     }
1244
1245 #        my $simann=Bio::Annotation::SimpleValue->new(-value=>$seq->desc,-tagname=>'comment');
1246 #        $newann->add_Annotation($simann);
1247
1248     return $newann;
1249 }
1250
1251 1;