1 # $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
3 # BioPerl module for Bio::TreeIO::phyloxml
5 # Cared for by Mira Han <mirhan@indiana.edu>
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
19 # do not use this module directly
21 my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
23 my $tree = $treeio->next_tree;
27 This module handles parsing and writing of phyloXML format.
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to the
35 Bioperl mailing list. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 of the bugs and their resolution. Bug reports can be submitted viax the
46 http://bugzilla.open-bio.org/
48 =head1 AUTHOR - Mira Han
50 Email mirhan@indiana.edu
54 The rest of the documentation details each of the object methods.
55 Internal methods are usually preceded with a _
60 # Let the code begin...
63 package Bio
::TreeIO
::phyloxml
;
66 # Object preamble - inherits from Bio::Root::Root
69 use Bio
::Tree
::AnnotatableNode
;
70 use Bio
::Annotation
::SimpleValue
;
72 use XML
::LibXML
::Reader
;
73 use base
qw(Bio::TreeIO);
77 my($self, %args) = @_;
78 $args{-treetype
} ||= 'Bio::Tree::Tree';
79 $args{-nodetype
} ||= 'Bio::Tree::AnnotatableNode';
80 $self->SUPER::_initialize
(%args);
82 # phyloxml TreeIO does not use SAX,
83 # therefore no need to attach EventHandler
84 # instead we will define a reader that is a pull-parser of libXML
85 if ($self->{'_file'}) {
86 $self->{'_reader'} = XML
::LibXML
::Reader
->new(
87 location
=> $self->{'_file'},
92 $self->treetype($args{-treetype
});
93 $self->nodetype($args{-nodetype
});
94 $self->{'_lastitem'} = {}; # holds open items and the attribute hash
101 my %start_elements = (
102 'phylogeny' => \
&element_phylogeny
,
103 'clade' => \
&element_clade
,
105 $self->{'_start_elements'} = \
%start_elements;
107 'phylogeny' => \
&end_element_phylogeny
,
108 'clade' => \
&end_element_clade
,
110 $self->{'_end_elements'} = \
%end_elements;
117 Usage : my $tree = $treeio->next_tree
118 Function: Gets the next tree in the stream
119 Returns : Bio::Tree::TreeI
128 my $reader = $self->{'_reader'};
130 while ($reader->read)
132 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT
)
134 if ($reader->name eq 'phylogeny')
136 $tree = $self->end_element_phylogeny();
149 Usage : $treeio->write_tree($tree);
150 Function: Write a tree out to data stream in phyloxml format
152 Args : Bio::Tree::TreeI object
157 my ($self, @trees) = @_;
158 foreach my $tree (@trees) {
159 my $root = $tree->get_root_node;
160 $self->_print("<phylogeny>");
161 $self->_print($self->_write_tree_Helper($root));
162 $self->_print("</phylogeny>");
165 $self->flush if $self->_flush_on_write && defined $self->_fh;
170 sub _write_tree_Helper
172 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
173 if (ref($node) ne 'Bio::Tree::AnnotatableNode') {
174 $self->throw( "node must be a Bio::Tree::AnnotatableNode" );
176 my $ac = $node->annotation;
177 my $seq = $node->sequence;
181 my @attr = $ac->get_Annotations('_attr'); # check id_source
183 my @id_source = $attr[0]->get_Annotations('id_source');
185 $str .= " id_source=\"".$id_source[0]->value."\"";
190 # print all descendent nodes
191 foreach my $child ( $node->each_Descendent() ) {
192 $str = $self->_write_tree_Helper($child, $str);
195 # print all annotations
196 $str = print_annotation
( $node, $str, $ac );
197 # print all sequences
199 $str = print_seq_annotation
( $self, $str, $seq );
220 my $reader = $self->{'_reader'};
221 if ($reader->nodeType == XML_READER_TYPE_ELEMENT
)
223 $self->{'_lastitem'}->{$reader->name}++;
224 push @
{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
226 if (exists $self->{'_start_elements'}->{$reader->name}) {
227 my $method = $self->{'_start_elements'}->{$reader->name};
231 $self->element_annotation();
233 if ($reader->isEmptyElement) {
234 # do procedures for XML_READER_TYPE_END_ELEMENT since element is complete
236 if (exists $self->{'_end_elements'}->{$reader->name}) {
237 my $method = $self->{'_end_elements'}->{$reader->name};
241 $self->end_element_annotation();
243 $self->{'_lastitem'}->{ $reader->name }--;
244 pop @
{$self->{'_lastitem'}->{'current'}};
247 if ($reader->nodeType == XML_READER_TYPE_TEXT
)
249 $self->{'_currenttext'} = $reader->value;
251 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT
)
253 if (exists $self->{'_end_elements'}->{$reader->name}) {
254 my $method = $self->{'_end_elements'}->{$reader->name};
258 $self->end_element_annotation();
260 $self->{'_lastitem'}->{ $reader->name }--;
261 pop @
{$self->{'_lastitem'}->{'current'}};
262 $self->{'_currenttext'} = '';
267 =head2 processAttribute
269 Title : processAttribute
280 my ($self, $data) = @_;
281 my $reader = $self->{'_reader'};
283 # several ways of reading attributes:
284 # read all attributes:
285 if ($reader-> moveToFirstAttribute
) {
287 $data->{$reader->name()} = $reader->value;
288 } while ($reader-> moveToNextAttribute
);
289 $reader-> moveToElement
;
291 # back at the element
294 # read a specific attribute:
295 #print "Attribute b: ",$reader-> getAttribute('b'),"\n";
299 =head2 element_phylogeny
301 Title : element_phylogeny
302 Usage : $handler->element_phylogeny
303 Function: Begins a Tree event cycle
309 sub element_phylogeny
312 $self->{'_currentitems'} = []; # holds nodes while parsing current level
313 $self->{'_currentnodes'} = []; # holds nodes while constructing tree
314 $self->{'_currentannotation'} = []; # holds annotationcollection
315 $self->{'_currenttext'} = '';
316 $self->{'_levelcnt'} = [];
317 $self->{'_id_link'} = {};
319 $self->processAttribute($self->current_attr);
323 sub end_element_phylogeny
328 # if there is more than one node in _currentnodes
329 # aggregate the nodes into trees basically ad-hoc.
330 if ( @
{$self->{'_currentnodes'}} > 1)
332 $root = $self->nodetype->new( -verbose
=> $self->verbose,
334 tostring
=> \
&node_to_string
,
336 while ( @
{$self->{'_currentnodes'}} ) {
337 my ($node) = ( shift @
{$self->{'_currentnodes'}});
338 $root->add_Descendent($node);
341 # if there is only one node in _currentnodes
343 elsif ( @
{$self->{'_currentnodes'}} == 1)
345 $root = shift @
{$self->{'_currentnodes'}};
348 my $tree = $self->treetype->new(
349 -verbose
=> $self->verbose,
351 -id
=> $self->current_attr->{'name'},
352 %{$self->current_attr}
354 foreach my $tag ( keys %{$self->current_attr} ) {
355 $tree->add_tag_value( $tag, $self->current_attr->{$tag} );
363 Title : element_clade
364 Usage : $->element_clade
365 Function: Begins a clade cycle
374 my $reader = $self->{'_reader'};
375 my %data = (); # doesn't use current attribute in order to save memory
376 $self->processAttribute(\
%data);
377 # create a node (Annotatable Node)
378 my $tnode = $self->nodetype->new( -verbose
=> $self->verbose,
380 tostring
=> \
&node_to_string
,
383 # add all attributes as tags (Annotation::SimpleValue)
384 foreach my $tag ( keys %data ) {
385 $tnode->add_tag_value( $tag, $data{$tag} );
387 # if there is id_source add clade to _id_link
388 if (exists $data{'id_source'}) {
389 $self->{'_id_link'}->{$data{'id_source'}} = $tnode;
391 # push into temporary list
392 push @
{$self->{'_currentitems'}}, $tnode;
395 sub end_element_clade
398 my $reader = $self->{'_reader'};
400 my $curcount = scalar @
{$self->{'_currentnodes'}};
401 my $level = $reader->depth() - 2;
402 my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
404 # pop from temporary list
405 my $tnode = pop @
{$self->{'_currentitems'}};
406 if ( $childcnt > 0) {
407 if( $childcnt > $curcount)
409 $self->throw("something wrong with event construction treelevel ".
410 "$level is recorded as having $childcnt nodes ".
411 "but current nodes at this level is $curcount\n");
413 my @childnodes = splice( @
{$self->{'_currentnodes'}}, - $childcnt);
414 for ( @childnodes ) {
415 $tnode->add_Descendent($_);
417 $self->{'_levelcnt'}->[$level+1] = 0;
419 push @
{$self->{'_currentnodes'}}, $tnode;
420 $self->{'_levelcnt'}->[$level]++;
425 =head2 element_annotation
427 Title : element_annotation
428 Usage : $->element_annotation
435 sub element_annotation
438 my $reader = $self->{'_reader'};
439 my $current = $self->current_element();
440 my $prev = $self->prev_element();
442 # read attributes of element
443 $self->processAttribute($self->current_attr);
447 map { if ($_ =~ /^id_ref/) {push @idrefs, $self->current_attr->{$_};} } keys %{$self->current_attr};
449 my @srcbyidrefs = ();
450 foreach my $idref (@idrefs) { push @srcbyidrefs, $self->{'_id_link'}->{$idref}; }
452 # exception when id_ref is defined but id_src is not, or vice versa.
453 if (@idrefs xor @srcbyidrefs) {
454 $self->throw("id_ref and id_src incompatible: @idrefs, @srcbyidrefs");
457 # we are annotating a Node
458 # set _currentannotation
459 if ( (@srcbyidrefs && $srcbyidrefs[0]->isa($self->nodetype)) || ((@srcbyidrefs == 0) && $prev eq 'clade') ) {
460 # find node to annotate
463 $tnode = $srcbyidrefs[0];
466 $tnode = $self->{'_currentitems'}->[-1];
468 my $ac = $tnode->annotation();
469 # add the new anncollection with the current element as key
470 my $newann = Bio
::Annotation
::Collection
->new();
471 $ac->add_Annotation($current, $newann);
472 # push to current annotation
473 push (@
{$self->{'_currentannotation'}}, $newann);
476 # we are already within an annotation
478 my $ac = $self->{'_currentannotation'}->[-1];
480 # add the new anncollection with the current element as key
481 my $newann = Bio
::Annotation
::Collection
->new();
482 $ac->add_Annotation($current, $newann);
483 push (@
{$self->{'_currentannotation'}}, $newann);
489 =head2 end_element_annotation
491 Title : end_element_annotation
492 Usage : $->end_element_annotation
499 sub end_element_annotation
502 my $reader = $self->{'_reader'};
503 my $current = $self->current_element();
504 my $prev = $self->prev_element();
507 my $idsrc = $self->current_attr->{'id_source'};
511 map { if ($_ =~ /^id_ref/) {
512 push @idrefs, $self->current_attr->{$_};
513 delete $self->current_attr->{$_};
514 } } keys %{$self->current_attr};
516 my @srcbyidrefs = ();
517 foreach my $idref (@idrefs) { push @srcbyidrefs, $self->{'_id_link'}->{$idref}; }
519 # exception when id_src is defined but id_ref is not, or vice versa.
520 if (@idrefs xor @srcbyidrefs) {
521 $self->throw("id_ref and id_src incompatible: @idrefs, @srcbyidrefs");
524 # we are annotating a Tree
525 if ( $prev eq 'phylogeny') {
527 # if annotation regards nodes
528 if ($srcbyidrefs[0]->isa($self->nodetype)) {
531 # if annotation regards sequences
532 elsif ($srcbyidrefs[0]->isa("Bio::SeqI")) {
533 # add code to implement sequence_relation among Bio::SeqI's
537 # annotate Tree via tree attribute
538 $self->prev_attr->{$current} = $self->{'_currenttext'};
541 # we are annotating a Node
542 if (( @srcbyidrefs && $srcbyidrefs[0]->isa($self->nodetype)) || ((@srcbyidrefs == 0) && $prev eq 'clade'))
544 # pop from current annotation
545 my $ac = pop (@
{$self->{'_currentannotation'}});
546 $self->annotateNode( $current, $ac);
547 # additional setups for compatibility with NodeI
550 $tnode = $srcbyidrefs[0];
553 $tnode = $self->{'_currentitems'}->[-1];
555 if ($current eq 'name') {
556 $tnode->id($self->{'_currenttext'});
558 elsif ($current eq 'branch_length') {
559 $tnode->branch_length($self->{'_currenttext'});
561 elsif ($current eq 'confidence') {
562 if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
563 $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
566 elsif ($current eq 'sequence') {
568 if (my @molseq = $ac->get_Annotations('mol_seq')) {
569 my @strac = $molseq[0]->get_Annotations('_text');
570 $str = $strac[0]->value();
572 my $newseq = Bio
::Seq
->new(-seq
=> $str, -annotation
=>$ac);
573 $tnode->sequence($newseq);
574 $ac->remove_Annotations('mol_seq');
575 $tnode->annotation->remove_Annotations($current);
576 # if there is id_source add sequence to _id_link
578 $self->{'_id_link'}->{$idsrc} = $newseq;
581 elsif ($idsrc && $current eq 'taxonomy') {
582 # if there is id_source add sequence to _id_link
583 $self->{'_id_link'}->{$idsrc} = $ac;
586 elsif ($prev eq 'clade_relation') {
588 # we are within an Annotation
590 my $ac = pop (@
{$self->{'_currentannotation'}});
592 $self->annotateNode( $current, $ac);
601 Usage : $->annotateNode( $element, $ac)
610 my ($self, $element, $newac) = @_;
611 # if attribute exists then add Annotation::Collection
612 if ( scalar keys %{$self->current_attr} ) {
613 my $newattr = Bio
::Annotation
::Collection
->new();
614 foreach my $tag (keys %{$self->current_attr}) {
615 my $sv = new Bio
::Annotation
::SimpleValue
(
616 -value
=> $self->current_attr->{$tag}
618 $newattr->add_Annotation($tag, $sv);
620 $newac->add_Annotation('_attr', $newattr);
622 # if text exists add text as SimpleValue
623 if ( $self->{'_currenttext'} ) {
624 my $newvalue = new Bio
::Annotation
::SimpleValue
( -value
=> $self->{'_currenttext'} );
625 $newac->add_Annotation('_text', $newvalue);
633 Usage : $->element_id
634 Function: identifier element used by phylogeny, clade, taxonomy
643 my $reader = $self->{'_reader'};
652 Function: returns the attribute hash for current item
662 return 0 if ! defined $self->{'_lastitem'} ||
663 ! defined $self->{'_lastitem'}->{'current'}->[-1];
664 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
665 (@keys == 1) || die "there should be only one key for each hash";
666 return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
673 Function: returns the attribute hash for previous item
683 return 0 if ! defined $self->{'_lastitem'} ||
684 ! defined $self->{'_lastitem'}->{'current'}->[-2];
685 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
686 (@keys == 1) || die "there should be only one key for each hash";
687 return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
690 =head2 current_element
692 Title : current_element
701 sub current_element
{
704 return 0 if ! defined $self->{'_lastitem'} ||
705 ! defined $self->{'_lastitem'}->{'current'}->[-1];
706 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
707 (@keys == 1) || die "there should be only one key for each hash";
725 return 0 if ! defined $self->{'_lastitem'} ||
726 ! defined $self->{'_lastitem'}->{'current'}->[-2];
727 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
728 (@keys == 1) || die "there should be only one key for each hash";
747 return 0 if ! defined $self->{'_lastitem'} ||
748 ! defined $self->{'_lastitem'}->{'current'}->[-1];
749 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
750 (@keys == 1) || die "there should be only one key for each hash";
751 return ($e eq $keys[0]);
754 =head2 within_element
756 Title : within_element
768 return $self->{'_lastitem'}->{$e};
774 Usage : $obj->treetype($newval)
776 Returns : value of treetype
777 Args : newvalue (optional)
783 my ($self,$value) = @_;
784 if( defined $value) {
785 $self->{'treetype'} = $value;
787 return $self->{'treetype'};
793 Usage : $obj->nodetype($newval)
795 Returns : value of nodetype
796 Args : newvalue (optional)
802 my ($self,$value) = @_;
803 if( defined $value) {
804 $self->{'nodetype'} = $value;
806 return $self->{'nodetype'};
810 =head1 Methods for implementing to_string callback for AnnotatableNode
814 =head2 node_to_string
816 Title : node_to_string
817 Usage : $annotatablenode->to_string_callback(\&node_to_string)
818 Function: set as callback in AnnotatableNode, prints the node information in string
819 Returns : string of node information
827 my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
828 # not a Bio::TreeIO::phyloxml
830 my $ac = $self->annotation;
831 my $seq = $self->sequence;
835 my @attr = $ac->get_Annotations('_attr'); # check id_source
837 my @id_source = $attr[0]->get_Annotations('id_source');
839 $str .= " id_source=\"".$id_source[0]->value."\"";
844 # print all annotations
845 $str = print_annotation
( $self, $str, $ac );
846 # print all sequences
848 $str = print_seq_annotation
( $self, $str, $seq );
857 my ($self, $str, $ac) = @_;
859 my @all_anns = $ac->get_Annotations();
860 foreach my $ann (@all_anns) {
861 my $key = $ann->tagname;
862 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
863 if (ref($ann) eq 'Bio::Annotation::SimpleValue')
865 if ($key eq '_text') {
874 elsif (ref($ann) eq 'Bio::Annotation::Collection')
876 my @attrs = $ann->get_Annotations('_attr');
877 if (@attrs) { # if there is a attribute collection
879 $str = print_attr
($self, $str, $attrs[0]);
885 $str = print_annotation
($self, $str, $ann);
894 my ($self, $str, $ac) = @_;
895 my @all_attrs = $ac->get_Annotations();
896 foreach my $attr (@all_attrs) {
897 if (ref($attr) ne 'Bio::Annotation::SimpleValue') {
898 $self->throw("attribute should be a SimpleValue");
901 $str .= $attr->tagname;
903 $str .= $attr->value;
908 sub print_seq_annotation
910 my ($self, $str, $seq) = @_;
912 $str .= "<sequence>";
913 my @all_anns = $seq->annotation->get_Annotations();
914 foreach my $ann (@all_anns) {
915 my $key = $ann->tagname;
916 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
917 if (ref($ann) eq 'Bio::Annotation::SimpleValue')
919 if ($key eq '_text') {
928 elsif (ref($ann) eq 'Bio::Annotation::Collection')
930 my @attrs = $ann->get_Annotations('_attr');
931 if (@attrs) { # if there is a attribute collection
933 $str = print_attr
($self, $str, $attrs[0]);
939 $str = print_annotation
($self, $str, $ann);
947 $str .= "</mol_seq>";
950 $str .= "</sequence>";