1 # $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
3 # BioPerl module for Bio::TreeIO::phyloxml
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mira Han <mirhan@indiana.edu>
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
21 # do not use this module directly
23 my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
25 my $tree = $treeio->next_tree;
29 This module handles parsing and writing of phyloXML format.
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 of the bugs and their resolution. Bug reports can be submitted viax the
59 https://redmine.open-bio.org/projects/bioperl/
61 =head1 AUTHOR - Mira Han
63 Email mirhan@indiana.edu
67 The rest of the documentation details each of the object methods.
68 Internal methods are usually preceded with a _
73 # Let the code begin...
76 package Bio
::TreeIO
::phyloxml
;
79 # Object preamble - inherits from Bio::Root::Root
82 use Bio
::Tree
::AnnotatableNode
;
83 use Bio
::Annotation
::SimpleValue
;
84 use Bio
::Annotation
::Relation
;
86 use XML
::LibXML
::Reader
;
87 use base
qw(Bio::TreeIO);
92 my($self, %args) = @_;
93 $args{-treetype
} ||= 'Bio::Tree::Tree';
94 $args{-nodetype
} ||= 'Bio::Tree::AnnotatableNode';
95 $self->SUPER::_initialize
(%args);
97 # phyloxml TreeIO does not use SAX,
98 # therefore no need to attach EventHandler
99 # instead we will define a reader that is a pull-parser of libXML
100 if ($self->mode eq 'r') {
102 $self->{'_reader'} = XML
::LibXML
::Reader
->new(
107 if (!$self->{'_reader'}) {
108 $self->throw("XML::LibXML::Reader not initialized");
111 elsif ($self->mode eq 'w') {
112 # print default lines
113 $self->_print('<?xml version="1.0" encoding="UTF-8"?>',"\n");
114 $self->_print('<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">');
117 $self->treetype($args{-treetype
});
118 $self->nodetype($args{-nodetype
});
119 $self->{'_lastitem'} = {}; # holds open items and the attribute hash
126 my %start_elements = (
127 'phylogeny' => \
&element_phylogeny
,
128 'clade' => \
&element_clade
,
129 'sequence_relation' => \
&element_relation
,
130 'clade_relation' => \
&element_relation
,
132 $self->{'_start_elements'} = \
%start_elements;
134 'phylogeny' => \
&end_element_phylogeny
,
135 'clade' => \
&end_element_clade
,
136 'sequence_relation' => \
&end_element_relation
,
137 'clade_relation' => \
&end_element_relation
,
139 $self->{'_end_elements'} = \
%end_elements;
144 if ($self->mode eq 'w') {
145 $self->_print('</phyloxml>');
146 $self->flush if $self->_flush_on_write && defined $self->_fh;
148 $self->SUPER::DESTROY
;
154 Usage : my $tree = $treeio->next_tree
155 Function: Gets the next tree in the stream
156 Returns : Bio::Tree::TreeI
164 my $reader = $self->{'_reader'};
166 while ($reader->read)
168 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT
)
170 if ($reader->name eq 'phylogeny')
172 $tree = $self->end_element_phylogeny();
176 $self->processXMLNode;
183 Title : add_phyloXML_annotation
184 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -attr=>"id_source = \"A\"")
185 Function: add attributes to an object
186 Returns : the node that we added annotations to
187 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
188 -attr => string in the form "A = B", where A is the attribute name and B is the attribute value
194 my ($self, @args) = @_;
195 my ($obj, $attr) = $self->_rearrange([qw(OBJ ATTR)], @args);
198 $attr = '<dummy '.$attr.'/>';
201 my $oldreader = $self->{'_reader'}; # save reader
202 $self->{'_reader'} = XML
::LibXML
::Reader
->new(
206 my $reader = $self->{'_reader'};
207 $self->{'_currentannotation'} = []; # holds annotationcollection
208 $self->{'_currenttext'} = '';
209 #$self->{'_id_link'} = {};
211 # pretend we saw a <clade> element
212 $self->{'_lastitem'}->{'dummy'}++;
213 push @
{$self->{'_lastitem'}->{'current'}}, { 'dummy'=>{}}; # current holds current element and empty hash for its attributes
215 # push object to annotate
216 push @
{$self->{'_currentitems'}}, $obj;
218 # read attributes of element
219 while ($reader->read)
221 #$self->processXMLNode;
222 $self->processAttribute($self->current_attr);
225 # if there is id_source add sequence to _id_link
226 if (exists $self->current_attr->{'id_source'}) {
227 my $idsrc = $self->current_attr->{'id_source'};
228 $self->{'_id_link'}->{$idsrc} = $obj;
233 if (exists $self->current_attr->{'id_ref'}) {
234 $idref = $self->current_attr->{'id_ref'};
238 $srcbyidref = $self->{'_id_link'}->{$idref};
240 # exception when id_ref is defined but id_src is not, or vice versa.
241 if ($idref xor $srcbyidref) {
242 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
245 # if attribute exists then add Annotation::Collection with tag '_attr'
246 my $newac = $obj->annotation;
247 if ( scalar keys %{$self->current_attr} ) {
248 my $newattr = Bio
::Annotation
::Collection
->new();
249 foreach my $tag (keys %{$self->current_attr}) {
250 my $sv = Bio
::Annotation
::SimpleValue
->new(
251 -value
=> $self->current_attr->{$tag}
253 $newattr->add_Annotation($tag, $sv);
255 $newac->add_Annotation('_attr', $newattr);
258 # pop from temporary list
259 pop @
{$self->{'_currentitems'}};
260 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
261 pop @
{$self->{'_lastitem'}->{'current'}};
263 $self->{'_reader'} = $oldreader; # restore reader
268 =head2 add_phyloXML_annotation
270 Title : add_phyloXML_annotation
271 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring)
272 my $tree = $treeio->add_phyloXML_annotation('-obj'=>$tree, '-xml'=>'<sequence_relation id_ref_0="A" id_ref_1="B" type="orthology"/>')
274 Function: add annotations to a node in the phyloXML format string
275 Returns : the node that we added annotations to
276 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
277 -xml => string in phyloXML format that describes the annotation for the node
281 sub add_phyloXML_annotation
283 my ($self, @args) = @_;
284 my ($obj, $xml_string) = $self->_rearrange([qw(OBJ XML)], @args);
286 $xml_string = '<phyloxml>'.$xml_string.'</phyloxml>';
287 $self->debug( $xml_string );
289 my $oldreader = $self->{'_reader'}; # save reader
290 $self->{'_reader'} = XML
::LibXML
::Reader
->new(
291 string
=> $xml_string,
294 my $reader = $self->{'_reader'};
295 #$self->{'_currentannotation'} = []; # holds annotationcollection
296 #$self->{'_currenttext'} = '';
297 #$self->{'_id_link'} = {};
299 # pretend we saw a <clade> element
300 $self->{'_lastitem'}->{'clade'}++;
301 push @
{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes
302 # our object to annotate (nodeI)
303 # push into temporary list
304 push @
{$self->{'_currentitems'}}, $obj;
306 $reader->read; #read away the first element 'phyloxml'
307 while ($reader->read)
309 $self->processXMLNode;
312 # pop from temporary list
313 pop @
{$self->{'_currentitems'}};
314 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
315 pop @
{$self->{'_lastitem'}->{'current'}};
317 $self->{'_reader'} = $oldreader; # restore reader
325 Usage : $treeio->write_tree($tree);
326 Function: Write a tree out to data stream in phyloxml format
328 Args : Bio::Tree::TreeI object
334 my ($self, @trees) = @_;
335 foreach my $tree (@trees) {
336 my $root = $tree->get_root_node;
337 $self->_print("<phylogeny");
338 my @tags = $tree->get_all_tags();
340 foreach my $tag (@tags) {
341 my @values = $tree->get_tag_values($tag);
343 $attr_str .= " ".$tag."=\"".$_."\"";
347 my ($b_rooted) = $tree->get_tag_values('rooted');
349 $attr_str .= " rooted=\"true\"";
352 if($tree->is_binary($tree->get_root_node)) {
353 $attr_str .= " rooted=\"true\"";
356 $attr_str .= " rooted=\"false\"";
359 $self->_print($attr_str);
361 if ($root->isa('Bio::Tree::AnnotatableNode')) {
362 $self->_print($self->_write_tree_Helper_annotatableNode($root));
365 $self->_print($self->_write_tree_Helper_generic($root));
368 # print clade relations
369 while (my $str = pop (@
{$self->{'_tree_attr'}->{'clade_relation'}})) {
372 # print sequence relations
373 while (my $str = pop (@
{$self->{'_tree_attr'}->{'sequence_relation'}})) {
376 $self->_print("</phylogeny>");
378 $self->flush if $self->_flush_on_write && defined $self->_fh;
382 =head2 _write_tree_Helper_annotatableNode
384 Title : _write_tree_Helper_annotatableNode
385 Usage : internal method used by write_tree, not to be used directly
386 Function: recursive helper function of write_tree for the annotatableNodes.
387 translates annotations into xml elements.
388 Returns : string describing the node
389 Args : Bio::Node::AnnotatableNode object, string
393 sub _write_tree_Helper_annotatableNode
395 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
397 my $ac = $node->annotation;
399 # if clade_relation exists
400 my @relations = $ac->get_Annotations('clade_relation');
401 foreach (@relations) {
402 my $clade_rel = $self->_relation_to_string($node, $_, '');
404 push (@
{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel);
409 my ($attr) = $ac->get_Annotations('_attr'); # check id_source
411 my ($id_source) = $attr->get_Annotations('id_source');
413 $str .= " id_source=\"".$id_source->value."\"";
418 # print all descendent nodes
419 foreach my $child ( $node->each_Descendent() ) {
420 $str = $self->_write_tree_Helper_annotatableNode($child, $str);
423 # print all annotations
424 $str = print_annotation
( $node, $str, $ac );
426 # print all sequences
427 if ($node->has_sequence) {
428 foreach my $seq (@
{$node->sequence}) {
429 # if sequence_relation exists
430 my @relations = $seq->annotation->get_Annotations('sequence_relation');
431 foreach (@relations) {
432 my $sequence_rel = $self->_relation_to_string($seq, $_, '');
434 push (@
{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel);
436 $str = print_seq_annotation
( $node, $str, $seq );
445 =head2 _write_tree_Helper_generic
447 Title : _write_tree_Helper_generic
448 Usage : internal method used by write_tree, not to be used directly
449 Function: recursive helper function of write_tree for generic NodesI.
450 all tags are translated into property elements.
451 Returns : string describing the node
452 Args : Bio::Node::NodeI object, string
456 sub _write_tree_Helper_generic
458 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
463 # print all descendent nodes
464 foreach my $child ( $node->each_Descendent() ) {
465 $str = $self->_write_tree_Helper_generic($child, $str);
469 my @tags = $node->get_all_tags();
470 foreach my $tag (@tags) {
471 my @values = $node->get_tag_values($tag);
472 foreach my $val (@values) {
473 $str .= "<property datatype=\"xsd:string\" ref=\"tag:$tag\" applies_to=\"clade\">";
475 $str .= "</property>";
479 # print NodeI features
485 if ($node->branch_length) {
486 $str .= "<branch_length>";
487 $str .= $node->branch_length;
488 $str .= "</branch_length>";
490 if ($node->bootstrap) {
491 $str .= "<confidence type = \"bootstrap\">";
492 $str .= $node->bootstrap;
493 $str .= "</confidence>";
500 =head2 _relation_to_string
502 Title : _relation_to_string
503 Usage : internal method used by write_tree, not to be used directly
504 Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements.
505 Returns : string describing the node
506 Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation,
507 the Annotation::Relation object,
512 # It may be more appropriate to make Annotation::Relation have
513 # a to_string callback function,
514 # and have this subroutine set as the callback when we are in
516 # I've put it here for now, since write_tree is the only place it is used.
518 sub _relation_to_string
{
519 my ($self, $obj, $rel, $str) = @_;
521 my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source
523 my @id_source = $attr[0]->get_Annotations('id_source');
525 my ($id_ref_0) = $obj->annotation->get_nested_Annotations(
526 '-keys' => ['id_source'],
528 my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations(
529 '-keys' => ['id_source'],
532 my $confidence = $rel->confidence();
533 my $confidence_type = $rel->confidence_type();
535 $str .= $rel->tagname;
536 $str .= " id_ref_0=\"".$id_ref_0->value."\"";
537 $str .= " id_ref_1=\"".$id_ref_1->value."\"";
538 $str .= " type=\"".$rel->type."\"";
540 $str .= " ><confidence";
541 if ($confidence_type) {
542 $str .= " type=\"".$confidence_type."\"";
546 $str .= "</confidence>";
548 $str .= $rel->tagname;
557 =head2 read_annotation
559 Title : read_annotation
560 Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1);
561 Function: read text value (or attribute value) of the annotations corresponding to the element path
562 Returns : list of text values of the annotations matching the path
563 Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI)
564 -path => path of the nested elements
565 -attr => Boolean value to indicate whether to get the attribute of the element or the text value.
566 (default is 0, meaning text value is returned)
570 # It may be more appropriate to make a separate Annotation::phyloXML object
571 # and have this subroutine within that object so it can handle the
572 # reading and writing of the values and attributes.
573 # but since tagTree is a temporary stub and I didn't want to make
574 # a redundant object I've put it here for now.
578 my ($self, @args) = @_;
579 my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args);
580 my $ac = $obj->annotation;
582 my @elements = split ('/', $path);
583 my $final = pop @elements;
584 push (@elements, '_attr');
585 push (@elements, $final);
586 $path = join ('/', @elements);
587 return $self->_read_annotation_attr_Helper( [$ac], $path);
590 return $self->_read_annotation_text_Helper( [$ac], $path);
594 sub _read_annotation_text_Helper
596 my ($self, $acs, $path) = @_;
597 my @elements = split ('/', $path);
598 my $key = shift @elements;
600 foreach my $ac (@
$acs) {
601 foreach my $ann ($ac->get_Annotations($key)) {
602 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
605 if (@elements == 0) {
607 my @texts = map {$_->get_Annotations('_text')} @nextacs;
609 $_ && push (@values, $_->value);
614 $path = join ('/', @elements);
615 return $self->_read_annotation_text_Helper( \
@nextacs, $path);
619 sub _read_annotation_attr_Helper
621 my ($self, $acs, $path) = @_;
622 my @elements = split ('/', $path);
623 my $key = shift @elements;
625 foreach my $ac (@
$acs) {
626 foreach my $ann ($ac->get_Annotations($key)) {
627 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
630 if (@elements == 1) {
631 my $attrname = $elements[0];
632 my @sv = map {$_->get_Annotations($attrname)} @nextacs;
633 return map {$_->value} @sv;
636 $path = join ('/', @elements);
637 return $self->_read_annotation_attr_Helper( \
@nextacs, $path);
641 =head1 Methods for parsing the XML document
645 =head2 processXMLNode
647 Title : processXMLNode
648 Usage : $treeio->processXMLNode
649 Function: read the XML node and process according to the node type
658 my $reader = $self->{'_reader'};
659 my $nodetype = $reader->nodeType;
660 if ( $nodetype == XML_READER_TYPE_ELEMENT
)
662 $self->{'_lastitem'}->{$reader->name}++;
663 push @
{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
665 if (exists $self->{'_start_elements'}->{$reader->name}) {
666 my $method = $self->{'_start_elements'}->{$reader->name};
670 $self->element_default();
672 if ($reader->isEmptyElement) {
673 # element is complete
674 # set nodetype so it can jump and
675 # do procedures for XML_READER_TYPE_END_ELEMENT
676 $nodetype = XML_READER_TYPE_END_ELEMENT
;
679 if ($nodetype == XML_READER_TYPE_TEXT
)
681 $self->{'_currenttext'} = $reader->value;
683 if ($nodetype == XML_READER_TYPE_END_ELEMENT
)
685 if (exists $self->{'_end_elements'}->{$reader->name}) {
686 my $method = $self->{'_end_elements'}->{$reader->name};
690 $self->end_element_default();
692 $self->{'_lastitem'}->{ $reader->name }--;
693 pop @
{$self->{'_lastitem'}->{'current'}};
694 $self->{'_currenttext'} = '';
699 =head2 processAttribute
701 Title : processAttribute
702 Usage : $treeio->processAttribute(\%hash_for_attribute);
703 Function: reads the attributes of the current element into a hash
705 Args : hash reference where the attributes will be stored.
711 my ($self, $data) = @_;
712 my $reader = $self->{'_reader'};
714 # several ways of reading attributes:
715 # read all attributes:
716 if ($reader-> moveToFirstAttribute
) {
718 $data->{$reader->name()} = $reader->value;
719 } while ($reader-> moveToNextAttribute
);
720 $reader-> moveToElement
;
725 =head2 element_phylogeny
727 Title : element_phylogeny
728 Usage : $treeio->element_phylogeny
729 Function: initialize the parsing of a tree
735 sub element_phylogeny
738 $self->{'_currentitems'} = []; # holds nodes while parsing current level
739 $self->{'_currentnodes'} = []; # holds nodes while constructing tree
740 $self->{'_currentannotation'} = []; # holds annotationcollection
741 $self->{'_currenttext'} = '';
742 $self->{'_levelcnt'} = [];
743 $self->{'_id_link'} = {};
744 $self->{'_tree_attr'} = $self->current_attr;
745 $self->processAttribute($self->current_attr);
749 =head2 end_element_phylogeny
751 Title : end_element_phylogeny
752 Usage : $treeio->end_element_phylogeny
753 Function: ends the parsing of a tree building a Tree::TreeI object.
754 Returns : Tree::TreeI
759 sub end_element_phylogeny
764 # if there is more than one node in _currentnodes
765 # aggregate the nodes into trees basically ad-hoc.
766 if ( @
{$self->{'_currentnodes'}} > 1)
768 $root = $self->nodetype->new(
770 tostring
=> \
&node_to_string
,
772 while ( @
{$self->{'_currentnodes'}} ) {
773 my ($node) = ( shift @
{$self->{'_currentnodes'}});
774 $root->add_Descendent($node);
777 # if there is only one node in _currentnodes
779 elsif ( @
{$self->{'_currentnodes'}} == 1)
781 $root = shift @
{$self->{'_currentnodes'}};
784 my $tree = $self->treetype->new(
786 -id
=> $self->current_attr->{'name'},
787 %{$self->current_attr}
789 foreach my $tag ( keys %{$self->current_attr} ) {
790 $tree->add_tag_value( $tag, $self->current_attr->{$tag} );
797 Title : element_clade
798 Usage : $treeio->element_clade
799 Function: initialize the parsing of a node
800 creates a new AnnotatableNode with annotations
809 my $reader = $self->{'_reader'};
810 my %clade_attr = (); # doesn't use current attribute in order to save memory
811 $self->processAttribute(\
%clade_attr);
812 # create a node (Annotatable Node)
813 my $tnode = $self->nodetype->new(
815 tostring
=> \
&node_to_string
,
818 # add all attributes as annotation collection with tag '_attr'
819 my $ac = $tnode->annotation;
820 my $newattr = Bio
::Annotation
::Collection
->new();
821 foreach my $tag (keys %clade_attr) {
822 my $sv = Bio
::Annotation
::SimpleValue
->new(
823 -value
=> $clade_attr{$tag}
825 $newattr->add_Annotation($tag, $sv);
827 $ac->add_Annotation('_attr', $newattr);
829 # if there is id_source add clade to _id_link
830 if (exists $clade_attr{'id_source'}) {
831 $self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode;
833 # push into temporary list
834 push @
{$self->{'_currentitems'}}, $tnode;
837 =head2 end_element_clade
839 Title : end_element_clade
840 Usage : $treeio->end_element_clade
841 Function: ends the parsing of a node
847 sub end_element_clade
850 my $reader = $self->{'_reader'};
852 my $curcount = scalar @
{$self->{'_currentnodes'}};
853 my $level = $reader->depth() - 2;
854 my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
856 # pop from temporary list
857 my $tnode = pop @
{$self->{'_currentitems'}};
858 if ( $childcnt > 0) {
859 if( $childcnt > $curcount)
861 $self->throw("something wrong with event construction treelevel ".
862 "$level is recorded as having $childcnt nodes ".
863 "but current nodes at this level is $curcount\n");
865 my @childnodes = splice( @
{$self->{'_currentnodes'}}, - $childcnt);
866 for ( @childnodes ) {
867 $tnode->add_Descendent($_);
869 $self->{'_levelcnt'}->[$level+1] = 0;
871 push @
{$self->{'_currentnodes'}}, $tnode;
872 $self->{'_levelcnt'}->[$level]++;
876 =head2 element_relation
878 Title : element_relation
879 Usage : $treeio->element_relation
880 Function: starts the parsing of clade relation & sequence relation
889 $self->processAttribute($self->current_attr);
890 my $relationtype = $self->current_attr->{'type'};
891 my $id_ref_0 = $self->current_attr->{'id_ref_0'};
892 my $id_ref_1 = $self->current_attr->{'id_ref_1'};
895 $srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0};
896 $srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1};
898 # exception when id_ref is defined but id_src is not, or vice versa.
899 if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) {
900 $self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]);
904 my $ac0 = $srcbyidref[0]->annotation;
905 my $newann = Bio
::Annotation
::Relation
->new(
906 '-type' => $relationtype,
907 '-to' => $srcbyidref[1],
908 '-tagname' => $self->current_element
910 $ac0->add_Annotation($self->current_element, $newann);
912 my $ac1 = $srcbyidref[1]->annotation;
913 $newann = Bio
::Annotation
::Relation
->new(
914 '-type' => $relationtype,
915 '-to' => $srcbyidref[0],
916 '-tagname' => $self->current_element
918 $ac1->add_Annotation($self->current_element, $newann);
919 push (@
{$self->{'_currentannotation'}}, $newann);
922 =head2 end_element_relation
924 Title : end_element_relation
925 Usage : $treeio->end_element_relation
926 Function: ends the parsing of clade relation & sequence relation
932 sub end_element_relation
935 my $ac = pop (@
{$self->{'_currentannotation'}});
939 =head2 element_default
941 Title : element_default
942 Usage : $treeio->element_default
943 Function: starts the parsing of all other elements
952 my $reader = $self->{'_reader'};
953 my $current = $self->current_element();
954 my $prev = $self->prev_element();
956 # read attributes of element
957 $self->processAttribute($self->current_attr);
961 if (exists $self->current_attr->{'id_ref'}) {
962 $idref = $self->current_attr->{'id_ref'};
966 $srcbyidref = $self->{'_id_link'}->{$idref};
968 # exception when id_ref is defined but id_src is not, or vice versa.
969 if ($idref xor $srcbyidref) {
970 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
973 # we are annotating a Node
974 # set _currentannotation
975 if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) {
976 # find node to annotate
979 $tnode = $srcbyidref;
982 $tnode = $self->{'_currentitems'}->[-1];
984 my $ac = $tnode->annotation();
985 # add the new anncollection with the current element as key
986 my $newann = Bio
::Annotation
::Collection
->new();
987 $ac->add_Annotation($current, $newann);
988 # push to current annotation
989 push (@
{$self->{'_currentannotation'}}, $newann);
991 # we are within sequence_relation or clade_relation
992 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
995 # we are already within an annotation
997 my $ac = $self->{'_currentannotation'}->[-1];
999 # add the new anncollection with the current element as key
1000 my $newann = Bio
::Annotation
::Collection
->new();
1001 $ac->add_Annotation($current, $newann);
1002 push (@
{$self->{'_currentannotation'}}, $newann);
1008 =head2 end_element_default
1010 Title : end_element_default
1011 Usage : $treeio->end_element_default
1012 Function: ends the parsing of all other elements
1018 sub end_element_default
1021 my $reader = $self->{'_reader'};
1022 my $current = $self->current_element();
1023 my $prev = $self->prev_element();
1026 my $idsrc = $self->current_attr->{'id_source'};
1030 if (exists $self->current_attr->{'id_ref'}) {
1031 $idref = $self->current_attr->{'id_ref'};
1032 delete $self->current_attr->{'id_ref'};
1035 my $srcbyidref = '';
1036 $srcbyidref = $self->{'_id_link'}->{$idref};
1038 # exception when id_ref is defined but id_src is not, or vice versa.
1039 if ($idref xor $srcbyidref) {
1040 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
1043 # we are annotating a Tree
1044 if ((!$srcbyidref) && $prev eq 'phylogeny') {
1045 # annotate Tree via tree attribute
1046 $self->prev_attr->{$current} = $self->{'_currenttext'};
1048 # we are within sequence_relation or clade_relation
1049 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
1050 my $ann_relation = $self->{'_currentannotation'}->[-1];
1051 # we are here only with <confidence>
1052 if ($current eq 'confidence') {
1053 if (exists $self->current_attr->{'type'}) {
1054 $ann_relation->confidence_type($self->current_attr->{'type'});
1056 $ann_relation->confidence($self->{'_currenttext'});
1059 $self->throw($current, " is not allowed within <*_relation>");
1062 # we are annotating a Node
1063 elsif (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade'))
1065 # pop from current annotation
1066 my $ac = pop (@
{$self->{'_currentannotation'}});
1067 $self->annotateNode( $current, $ac);
1069 # additional setups for compatibility with NodeI
1072 $tnode = $srcbyidref;
1075 $tnode = $self->{'_currentitems'}->[-1];
1077 if ($current eq 'name') {
1078 $tnode->id($self->{'_currenttext'});
1080 elsif ($current eq 'branch_length') {
1081 $tnode->branch_length($self->{'_currenttext'});
1083 elsif ($current eq 'confidence') {
1084 if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
1085 $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
1088 elsif ($current eq 'sequence') {
1089 # if annotation is <sequence>
1090 # transform the Bio::Annotation object into a Bio::Seq object
1092 # retrieve the sequence
1093 if (my ($molseq) = $ac->get_Annotations('mol_seq')) {
1094 my ($strac) = $molseq->get_Annotations('_text');
1095 $str = $strac->value();
1097 # create Seq object with sequence
1098 my $newseq = Bio
::Seq
->new( -seq
=> $str,
1101 $tnode->sequence($newseq);
1102 $ac->remove_Annotations('mol_seq');
1103 $tnode->annotation->remove_Annotations($current);
1104 # if there is id_source add sequence to _id_link
1106 $self->{'_id_link'}->{$idsrc} = $newseq;
1109 elsif ($idsrc && $current eq 'taxonomy') {
1110 # if there is id_source add sequence to _id_link
1111 $self->{'_id_link'}->{$idsrc} = $ac;
1114 # we are within a default Annotation
1116 my $ac = pop (@
{$self->{'_currentannotation'}});
1118 $self->annotateNode( $current, $ac);
1126 Title : annotateNode
1127 Usage : $treeio->annotateNode($element, $ac)
1128 Function: adds text value and attributes to the AnnotationCollection
1129 that has element name as key. If there are nested elements,
1130 optional AnnotationCollections are added recursively,
1131 with the nested element name as key.
1132 The structure of each AnnotationCollection is
1133 'element' => AnnotationCollection {
1134 '_text' => SimpleValue (text value)
1135 '_attr' => AnnotationCollection {
1136 attribute1 => SimpleValue (attribute value 1)
1137 attribute2 => SimpleValue (attribute value 2)
1140 ['nested element' => AnnotationCollection ]
1149 my ($self, $element, $newac) = @_;
1150 # if attribute exists then add Annotation::Collection with tag '_attr'
1151 if ( scalar keys %{$self->current_attr} ) {
1152 my $newattr = Bio
::Annotation
::Collection
->new();
1153 foreach my $tag (keys %{$self->current_attr}) {
1154 my $sv = Bio
::Annotation
::SimpleValue
->new(
1155 -value
=> $self->current_attr->{$tag}
1157 $newattr->add_Annotation($tag, $sv);
1159 $newac->add_Annotation('_attr', $newattr);
1161 # if text exists add text as SimpleValue with tag '_text'
1162 if ( $self->{'_currenttext'} ) {
1163 my $newvalue = Bio
::Annotation
::SimpleValue
->new( -value
=> $self->{'_currenttext'} );
1164 $newac->add_Annotation('_text', $newvalue);
1169 =head1 Methods for exploring the document
1175 Title : current_attr
1176 Usage : $attr_hash = $treeio->current_attr;
1177 Function: returns the attribute hash for current item
1178 Returns : reference of the attribute hash
1186 return 0 if ! defined $self->{'_lastitem'} ||
1187 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1188 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1189 (@keys == 1) || die "there should be only one key for each hash";
1190 return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
1196 Usage : $hash_ref = $treeio->prev_attr
1197 Function: returns the attribute hash for previous item
1198 Returns : reference of the attribute hash
1206 return 0 if ! defined $self->{'_lastitem'} ||
1207 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1208 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1209 (@keys == 1) || die "there should be only one key for each hash";
1210 return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
1213 =head2 current_element
1215 Title : current_element
1216 Usage : $element = $treeio->current_element
1217 Function: returns the name of the current element
1218 Returns : string (element name)
1223 sub current_element
{
1226 return 0 if ! defined $self->{'_lastitem'} ||
1227 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1228 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1229 (@keys == 1) || die "there should be only one key for each hash";
1235 Title : prev_element
1236 Usage : $element = $treeio->current_element
1237 Function: returns the name of the previous element
1238 Returns : string (element name)
1246 return 0 if ! defined $self->{'_lastitem'} ||
1247 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1248 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1249 (@keys == 1) || die "there should be only one key for each hash";
1257 Usage : $obj->treetype($newval)
1258 Function: returns the tree type (default is Bio::Tree::Tree)
1259 Returns : value of treetype
1260 Args : newvalue (optional)
1266 my ($self,$value) = @_;
1267 if( defined $value) {
1268 $self->{'treetype'} = $value;
1270 return $self->{'treetype'};
1276 Usage : $obj->nodetype($newval)
1277 Function: returns the node type (default is Bio::Node::AnnotatableNode)
1278 Returns : value of nodetype
1279 Args : newvalue (optional)
1284 my ($self,$value) = @_;
1285 if( defined $value) {
1286 $self->{'nodetype'} = $value;
1288 return $self->{'nodetype'};
1292 =head1 Methods for implementing to_string callback for AnnotatableNode
1296 =head2 node_to_string
1298 Title : node_to_string
1299 Usage : $annotatablenode->to_string_callback(\&node_to_string)
1300 Function: set as callback in AnnotatableNode, prints the node information in string
1301 Returns : string of node information
1306 # this function is similar to _write_tree_Helper_annotatableNode,
1307 # but it is not recursive
1310 my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
1311 # not a Bio::TreeIO::phyloxml
1313 my $ac = $self->annotation;
1317 my @attr = $ac->get_Annotations('_attr'); # check id_source
1319 my @id_source = $attr[0]->get_Annotations('id_source');
1321 $str .= " id_source=\"".$id_source[0]->value."\"";
1326 # print all annotations
1327 $str = print_annotation
( $self, $str, $ac );
1328 # print all sequences
1329 if ($self->has_sequence) {
1330 foreach my $seq (@
{$self->sequence}) {
1331 $str = print_seq_annotation
( $self, $str, $seq );
1339 =head2 print_annotation
1341 Title : print_annotation
1342 Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection)
1343 Function: prints the annotationCollection in a phyloXML format.
1344 Returns : string of annotation information
1345 Args : string to which the Annotation should be concatenated to,
1346 annotationCollection that holds the Annotations
1350 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1351 # and have this subroutine within that object so it can handle the
1352 # reading and writing of the values and attributes.
1353 # especially since this function is used both by
1354 # Bio::TreeIO::phyloxml (through write_tree) and
1355 # Bio::Node::AnnotatableNode (through node_to_string).
1356 # but since tagTree is a temporary stub and I didn't want to make
1357 # a redundant object I've put it here for now.
1359 sub print_annotation
1361 my ($self, $str, $ac) = @_;
1363 my @all_anns = $ac->get_Annotations();
1364 foreach my $ann (@all_anns) {
1365 my $key = $ann->tagname;
1366 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1367 if ($ann->isa('Bio::Annotation::SimpleValue'))
1369 if ($key eq '_text') {
1370 $str .= $ann->value;
1374 $str .= $ann->value;
1378 elsif ($ann->isa('Bio::Annotation::Collection'))
1380 my @attrs = $ann->get_Annotations('_attr');
1381 if (@attrs) { # if there is a attribute collection
1383 $str = print_attr
($self, $str, $attrs[0]);
1389 $str = print_annotation
($self, $str, $ann);
1399 Usage : $str = $annotatablenode->print_attr($str, $annotationcollection)
1400 Function: prints the annotationCollection in a phyloXML format.
1401 Returns : string of attributes
1402 Args : string to which the Annotation should be concatenated to,
1403 AnnotationCollection that holds the attributes
1407 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1408 # and have this subroutine within that object so it can handle the
1409 # reading and writing of the values and attributes.
1410 # especially since this function is used both by
1411 # Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode
1412 # (through print_annotation).
1413 # but since tagTree is a temporary stub and I didn't want to make
1414 # a redundant object I've put it here for now.
1418 my ($self, $str, $ac) = @_;
1419 my @all_attrs = $ac->get_Annotations();
1420 foreach my $attr (@all_attrs) {
1421 if (!$attr->isa('Bio::Annotation::SimpleValue')) {
1422 $self->throw("attribute should be a SimpleValue");
1425 $str .= $attr->tagname;
1427 $str .= '"'.$attr->value.'"';
1432 =head2 print_sequence_annotation
1434 Title : print_sequence_annotation
1435 Usage : $str = $node->print_seq_annotation( $str, $seq );
1436 Function: prints the Bio::Seq object associated with the node
1437 in a phyloXML format.
1438 Returns : string that describes the sequence
1439 Args : string to which the Annotation should be concatenated to,
1440 Seq object to print in phyloXML
1444 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1445 # and have this subroutine within that object so it can handle the
1446 # reading and writing of the values and attributes.
1447 # especially since this function is used both by
1448 # Bio::TreeIO::phyloxml (through write_tree) and
1449 # Bio::Node::AnnotatableNode (through node_to_string).
1450 # but since tagTree is a temporary stub and I didn't want to make
1451 # a redundant object I've put it here for now.
1454 sub print_seq_annotation
1456 my ($self, $str, $seq) = @_;
1458 $str .= "<sequence";
1459 my ($attr) = $seq->annotation->get_Annotations('_attr'); # check id_source
1461 my ($id_source) = $attr->get_Annotations('id_source');
1463 $str .= " id_source=\"".$id_source->value."\"";
1468 my @all_anns = $seq->annotation->get_Annotations();
1469 foreach my $ann (@all_anns) {
1470 my $key = $ann->tagname;
1471 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1472 if ($ann->isa('Bio::Annotation::SimpleValue'))
1474 if ($key eq '_text') {
1475 $str .= $ann->value;
1479 $str .= $ann->value;
1483 elsif ($ann->isa('Bio::Annotation::Collection'))
1485 my @attrs = $ann->get_Annotations('_attr');
1486 if (@attrs) { # if there is a attribute collection
1488 $str = print_attr
($self, $str, $attrs[0]);
1494 $str = print_annotation
($self, $str, $ann);
1500 $str .= "<mol_seq>";
1501 $str .= $seq->seq();
1502 $str .= "</mol_seq>";
1505 $str .= "</sequence>";