Squash warning
[bioperl-live.git] / Bio / TreeIO / phyloxml.pm
blob5716835b20059f7cb1bc2655e355385fc2bd832a
1 # $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
3 # BioPerl module for Bio::TreeIO::phyloxml
5 # Cared for by Mira Han <mirhan@indiana.edu>
7 # Copyright Mira Han
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
17 =head1 SYNOPSIS
19 # do not use this module directly
20 use Bio::TreeIO;
21 my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
22 -file => 'tree.dnd');
23 my $tree = $treeio->next_tree;
25 =head1 DESCRIPTION
27 This module handles parsing and writing of phyloXML format.
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to the
35 Bioperl mailing list. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40 =head2 Reporting Bugs
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 of the bugs and their resolution. Bug reports can be submitted viax the
44 web:
46 http://bugzilla.open-bio.org/
48 =head1 AUTHOR - Mira Han
50 Email mirhan@indiana.edu
52 =head1 APPENDIX
54 The rest of the documentation details each of the object methods.
55 Internal methods are usually preceded with a _
57 =cut
60 # Let the code begin...
63 package Bio::TreeIO::phyloxml;
64 use strict;
66 # Object preamble - inherits from Bio::Root::Root
68 use Bio::Tree::Tree;
69 use Bio::Tree::AnnotatableNode;
70 use Bio::Annotation::SimpleValue;
71 use Bio::Annotation::Relation;
72 use XML::LibXML;
73 use XML::LibXML::Reader;
74 use base qw(Bio::TreeIO);
77 sub _initialize
79 my($self, %args) = @_;
80 $args{-treetype} ||= 'Bio::Tree::Tree';
81 $args{-nodetype} ||= 'Bio::Tree::AnnotatableNode';
82 $self->SUPER::_initialize(%args);
84 # phyloxml TreeIO does not use SAX,
85 # therefore no need to attach EventHandler
86 # instead we will define a reader that is a pull-parser of libXML
87 if ($self->mode eq 'r') {
88 if ($self->_fh) {
89 $self->{'_reader'} = XML::LibXML::Reader->new(
90 IO => $self->_fh,
91 no_blanks => 1
94 if (!$self->{'_reader'}) {
95 $self->throw("XML::LibXML::Reader not initialized");
98 elsif ($self->mode eq 'w') {
99 # print default lines
100 $self->_print('<?xml version="1.0" encoding="UTF-8"?>',"\n");
101 $self->_print('<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.00/phyloxml.xsd" xmlns="http://www.phyloxml.org">', "\n");
104 $self->treetype($args{-treetype});
105 $self->nodetype($args{-nodetype});
106 $self->{'_lastitem'} = {}; # holds open items and the attribute hash
107 $self->_init_func();
110 sub _init_func
112 my ($self) = @_;
113 my %start_elements = (
114 'phylogeny' => \&element_phylogeny,
115 'clade' => \&element_clade,
116 'sequence_relation' => \&element_relation,
117 'clade_relation' => \&element_relation,
119 $self->{'_start_elements'} = \%start_elements;
120 my %end_elements = (
121 'phylogeny' => \&end_element_phylogeny,
122 'clade' => \&end_element_clade,
123 'sequence_relation' => \&end_element_relation,
124 'clade_relation' => \&end_element_relation,
126 $self->{'_end_elements'} = \%end_elements;
129 sub DESTROY {
130 my $self = shift;
131 if ($self->mode eq 'w') {
132 $self->_print('</phyloxml>');
133 $self->flush if $self->_flush_on_write && defined $self->_fh;
135 $self->SUPER::DESTROY;
138 =head2 next_tree
140 Title : next_tree
141 Usage : my $tree = $treeio->next_tree
142 Function: Gets the next tree in the stream
143 Returns : Bio::Tree::TreeI
144 Args : none
146 =cut
148 sub next_tree
150 my ($self) = @_;
151 my $reader = $self->{'_reader'};
152 my $tree;
153 while ($reader->read)
155 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT)
157 if ($reader->name eq 'phylogeny')
159 $tree = $self->end_element_phylogeny();
160 last;
163 $self->processXMLNode;
165 return $tree;
168 =head2 add_phyloXML_annotation
170 Title : add_phyloXML_annotation
171 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring)
172 Function: add annotations to a node in the phyloXML format string
173 Returns : the node that we added annotations to
174 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
175 -xml => string in phyloXML format that describes the annotation for the node
177 =cut
179 sub add_phyloXML_annotation
181 my ($self, @args) = @_;
182 my ($obj, $xml_string, $attr) = $self->_rearrange([qw(OBJ XML ATTR)], @args);
184 $xml_string = '<phyloxml>'.$xml_string.'</phyloxml>';
185 $self->debug( $xml_string );
186 $self->{'_reader'} = XML::LibXML::Reader->new(
187 string => $xml_string,
188 no_blanks => 1
190 my $reader = $self->{'_reader'};
191 $self->{'_currentannotation'} = []; # holds annotationcollection
192 $self->{'_currenttext'} = '';
193 $self->{'_id_link'} = {};
195 # pretend we saw a <clade> element
196 $self->{'_lastitem'}->{'clade'}++;
197 push @{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes
198 # our object to annotate (nodeI)
199 # push into temporary list
200 push @{$self->{'_currentitems'}}, $obj;
202 $reader->read; #read away the first element 'phyloxml'
203 while ($reader->read)
205 $self->processXMLNode;
208 # pop from temporary list
209 pop @{$self->{'_currentitems'}};
210 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
211 pop @{$self->{'_lastitem'}->{'current'}};
213 return $obj;
217 =head2 write_tree
219 Title : write_tree
220 Usage : $treeio->write_tree($tree);
221 Function: Write a tree out to data stream in phyloxml format
222 Returns : none
223 Args : Bio::Tree::TreeI object
225 =cut
227 sub write_tree
229 my ($self, @trees) = @_;
230 foreach my $tree (@trees) {
231 my $root = $tree->get_root_node;
232 $self->_print("<phylogeny");
233 my @tags = $tree->get_all_tags();
234 my $attr_str = '';
235 foreach my $tag (@tags) {
236 my @values = $tree->get_tag_values($tag);
237 foreach (@values) {
238 $attr_str .= " ".$tag."=\"".$_."\"";
241 $self->_print($attr_str);
242 $self->_print(">");
243 if ($root->isa('Bio::Tree::AnnotatableNode')) {
244 $self->_print($self->_write_tree_Helper_annotatableNode($root));
246 else {
247 $self->_print($self->_write_tree_Helper_generic($root));
250 # print clade relations
251 while (my $str = pop (@{$self->{'_tree_attr'}->{'clade_relation'}})) {
252 $self->_print($str);
254 # print sequence relations
255 while (my $str = pop (@{$self->{'_tree_attr'}->{'sequence_relation'}})) {
256 $self->_print($str);
258 $self->_print("</phylogeny>");
259 $self->_print("\n");
261 $self->flush if $self->_flush_on_write && defined $self->_fh;
262 return;
265 =head2 _write_tree_Helper_annotatableNode
267 Title : _write_tree_Helper_annotatableNode
268 Usage : internal method used by write_tree, not to be used directly
269 Function: recursive helper function of write_tree for the annotatableNodes.
270 translates annotations into xml elements.
271 Returns : string describing the node
272 Args : Bio::Node::AnnotatableNode object, string
274 =cut
276 sub _write_tree_Helper_annotatableNode
278 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
280 my $ac = $node->annotation;
282 # if clade_relation exists
283 my @relations = $ac->get_Annotations('clade_relation');
284 foreach (@relations) {
285 my $clade_rel = $self->_relation_to_string($node, $_, '');
286 # set as tree attr
287 push (@{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel);
290 # start <clade>
291 $str .= '<clade';
292 my ($attr) = $ac->get_Annotations('_attr'); # check id_source
293 if ($attr) {
294 my ($id_source) = $attr->get_Annotations('id_source');
295 if ($id_source) {
296 $str .= " id_source=\"".$id_source->value."\"";
299 $str .= ">";
301 # print all descendent nodes
302 foreach my $child ( $node->each_Descendent() ) {
303 $str = $self->_write_tree_Helper_annotatableNode($child, $str);
306 # print all annotations
307 $str = print_annotation( $node, $str, $ac );
309 # print all sequences
310 if ($node->has_sequence) {
311 foreach my $seq (@{$node->sequence}) {
312 # if sequence_relation exists
313 my @relations = $seq->annotation->get_Annotations('sequence_relation');
314 foreach (@relations) {
315 my $sequence_rel = $self->_relation_to_string($seq, $_, '');
316 # set as tree attr
317 push (@{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel);
319 $str = print_seq_annotation( $node, $str, $seq );
323 $str .= "</clade>";
325 return $str;
328 =head2 _write_tree_Helper_generic
330 Title : _write_tree_Helper_generic
331 Usage : internal method used by write_tree, not to be used directly
332 Function: recursive helper function of write_tree for generic NodesI.
333 all tags are translated into property elements.
334 Returns : string describing the node
335 Args : Bio::Node::NodeI object, string
337 =cut
339 sub _write_tree_Helper_generic
341 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
343 # start <clade>
344 $str .= '<clade>';
346 # print all descendent nodes
347 foreach my $child ( $node->each_Descendent() ) {
348 $str = $self->_write_tree_Helper_generic($child, $str);
351 # print all tags
352 my @tags = $node->get_all_tags();
353 foreach my $tag (@tags) {
354 my @values = $node->get_tag_values($tag);
355 foreach my $val (@values) {
356 $str .= "<property applies_to=\"clade\" ref=\"$tag:$val\"> ";
357 $str .= " </property>";
361 # print NodeI features
362 if ($node->id) {
363 $str .= "<name>";
364 $str .= $node->id;
365 $str .= "</name>";
367 elsif ($node->branch_length) {
368 $str .= "<branch_length>";
369 $str .= $node->branch_length;
370 $str .= "</branch_length>";
372 elsif ($node->bootstrap) {
373 $str .= "<confidence type = \"bootstrap\">";
374 $str .= $node->bootstrap;
375 $str .= "</confidence>";
378 $str .= "</clade>";
379 return $str;
382 =head2 _relation_to_string
384 Title : _relation_to_string
385 Usage : internal method used by write_tree, not to be used directly
386 Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements.
387 Returns : string describing the node
388 Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation,
389 the Annotation::Relation object,
390 the string
392 =cut
394 # It may be more appropriate to make Annotation::Relation have
395 # a to_string callback function,
396 # and have this subroutine set as the callback when we are in
397 # phyloXML context.
398 # I've put it here for now, since write_tree is the only place it is used.
400 sub _relation_to_string {
401 my ($self, $obj, $rel, $str) = @_;
403 my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source
404 if (@attr) {
405 my @id_source = $attr[0]->get_Annotations('id_source');
407 my ($id_ref_0) = $obj->annotation->get_nested_Annotations(
408 '-keys' => ['id_source'],
409 '-recursive' => 1);
410 my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations(
411 '-keys' => ['id_source'],
412 '-recursive' => 1);
413 $str .= "<";
414 $str .= $rel->tagname;
415 $str .= " id_ref_0=\"".$id_ref_0->value."\"";
416 $str .= " id_ref_1=\"".$id_ref_1->value."\"";
417 $str .= " type=\"".$rel->type."\"";
418 $str .= "/>";
419 return $str;
422 =head2 read_annotation
424 Title : read_annotation
425 Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1);
426 Function: read text value (or attribute value) of the annotations corresponding to the element path
427 Returns : list of text values of the annotations matching the path
428 Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI)
429 -path => path of the nested elements
430 -attr => Boolean value to indicate whether to get the attribute of the element or the text value.
431 (default is 0, meaning text value is returned)
433 =cut
435 # It may be more appropriate to make a separate Annotation::phyloXML object
436 # and have this subroutine within that object so it can handle the
437 # reading and writing of the values and attributes.
438 # but since tagTree is a temporary stub and I didn't want to make
439 # a redundant object I've put it here for now.
441 sub read_annotation
443 my ($self, @args) = @_;
444 my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args);
445 my $ac = $obj->annotation;
446 if ($attr) {
447 my @elements = split ('/', $path);
448 my $final = pop @elements;
449 push (@elements, '_attr');
450 push (@elements, $final);
451 $path = join ('/', @elements);
452 return $self->_read_annotation_attr_Helper( [$ac], $path);
454 else {
455 return $self->_read_annotation_text_Helper( [$ac], $path);
459 sub _read_annotation_text_Helper
461 my ($self, $acs, $path) = @_;
462 my @elements = split ('/', $path);
463 my $key = shift @elements;
464 my @nextacs = ();
465 foreach my $ac (@$acs) {
466 foreach my $ann ($ac->get_Annotations($key)) {
467 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
470 if (@elements == 0) {
471 my @values = ();
472 my @texts = map {$_->get_Annotations('_text')} @nextacs;
473 foreach (@texts) {
474 $_ && push (@values, $_->value);
476 return @values;
478 else {
479 $path = join ('/', @elements);
480 return $self->_read_annotation_text_Helper( \@nextacs, $path);
484 sub _read_annotation_attr_Helper
486 my ($self, $acs, $path) = @_;
487 my @elements = split ('/', $path);
488 my $key = shift @elements;
489 my @nextacs = ();
490 foreach my $ac (@$acs) {
491 foreach my $ann ($ac->get_Annotations($key)) {
492 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
495 if (@elements == 1) {
496 my $attrname = $elements[0];
497 my @sv = map {$_->get_Annotations($attrname)} @nextacs;
498 return map {$_->value} @sv;
500 else {
501 $path = join ('/', @elements);
502 return $self->_read_annotation_attr_Helper( \@nextacs, $path);
506 =head1 Methods for parsing the XML document
508 =cut
510 =head2 processXMLNode
512 Title : processXMLNode
513 Usage : $treeio->processXMLNode
514 Function: read the XML node and process according to the node type
515 Returns : none
516 Args : none
518 =cut
520 sub processXMLNode
522 my ($self) = @_;
523 my $reader = $self->{'_reader'};
524 my $nodetype = $reader->nodeType;
525 if ( $nodetype == XML_READER_TYPE_ELEMENT)
527 $self->{'_lastitem'}->{$reader->name}++;
528 push @{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
530 if (exists $self->{'_start_elements'}->{$reader->name}) {
531 my $method = $self->{'_start_elements'}->{$reader->name};
532 $self->$method();
534 else {
535 $self->element_default();
537 if ($reader->isEmptyElement) {
538 # element is complete
539 # set nodetype so it can jump and
540 # do procedures for XML_READER_TYPE_END_ELEMENT
541 $nodetype = XML_READER_TYPE_END_ELEMENT;
544 if ($nodetype == XML_READER_TYPE_TEXT)
546 $self->{'_currenttext'} = $reader->value;
548 if ($nodetype == XML_READER_TYPE_END_ELEMENT)
550 if (exists $self->{'_end_elements'}->{$reader->name}) {
551 my $method = $self->{'_end_elements'}->{$reader->name};
552 $self->$method();
554 else {
555 $self->end_element_default();
557 $self->{'_lastitem'}->{ $reader->name }--;
558 pop @{$self->{'_lastitem'}->{'current'}};
559 $self->{'_currenttext'} = '';
564 =head2 processAttribute
566 Title : processAttribute
567 Usage : $treeio->processAttribute(\%hash_for_attribute);
568 Function: reads the attributes of the current element into a hash
569 Returns : none
570 Args : hash reference where the attributes will be stored.
572 =cut
574 sub processAttribute
576 my ($self, $data) = @_;
577 my $reader = $self->{'_reader'};
579 # several ways of reading attributes:
580 # read all attributes:
581 if ($reader-> moveToFirstAttribute) {
582 do {
583 $data->{$reader->name()} = $reader->value;
584 } while ($reader-> moveToNextAttribute);
585 $reader-> moveToElement;
590 =head2 element_phylogeny
592 Title : element_phylogeny
593 Usage : $treeio->element_phylogeny
594 Function: initialize the parsing of a tree
595 Returns : none
596 Args : none
598 =cut
600 sub element_phylogeny
602 my ($self) = @_;
603 $self->{'_currentitems'} = []; # holds nodes while parsing current level
604 $self->{'_currentnodes'} = []; # holds nodes while constructing tree
605 $self->{'_currentannotation'} = []; # holds annotationcollection
606 $self->{'_currenttext'} = '';
607 $self->{'_levelcnt'} = [];
608 $self->{'_id_link'} = {};
609 $self->{'_tree_attr'} = $self->current_attr;
610 $self->processAttribute($self->current_attr);
611 return;
614 =head2 end_element_phylogeny
616 Title : end_element_phylogeny
617 Usage : $treeio->end_element_phylogeny
618 Function: ends the parsing of a tree building a Tree::TreeI object.
619 Returns : Tree::TreeI
620 Args : none
622 =cut
624 sub end_element_phylogeny
626 my ($self) = @_;
628 my $root;
629 # if there is more than one node in _currentnodes
630 # aggregate the nodes into trees basically ad-hoc.
631 if ( @{$self->{'_currentnodes'}} > 1)
633 $root = $self->nodetype->new(
634 -id => '',
635 tostring => \&node_to_string,
637 while ( @{$self->{'_currentnodes'}} ) {
638 my ($node) = ( shift @{$self->{'_currentnodes'}});
639 $root->add_Descendent($node);
642 # if there is only one node in _currentnodes
643 # that node is root.
644 elsif ( @{$self->{'_currentnodes'}} == 1)
646 $root = shift @{$self->{'_currentnodes'}};
649 my $tree = $self->treetype->new(
650 -root => $root,
651 -id => $self->current_attr->{'name'},
652 %{$self->current_attr}
654 foreach my $tag ( keys %{$self->current_attr} ) {
655 $tree->add_tag_value( $tag, $self->current_attr->{$tag} );
657 return $tree;
660 =head2 element_clade
662 Title : element_clade
663 Usage : $treeio->element_clade
664 Function: initialize the parsing of a node
665 creates a new AnnotatableNode with annotations
666 Returns : none
667 Args : none
669 =cut
671 sub element_clade
673 my ($self) = @_;
674 my $reader = $self->{'_reader'};
675 my %clade_attr = (); # doesn't use current attribute in order to save memory
676 $self->processAttribute(\%clade_attr);
677 # create a node (Annotatable Node)
678 my $tnode = $self->nodetype->new(
679 -id => '',
680 tostring => \&node_to_string,
681 %clade_attr,
683 # add all attributes as annotation collection with tag '_attr'
684 my $ac = $tnode->annotation;
685 my $newattr = Bio::Annotation::Collection->new();
686 foreach my $tag (keys %clade_attr) {
687 my $sv = Bio::Annotation::SimpleValue->new(
688 -value => $clade_attr{$tag}
690 $newattr->add_Annotation($tag, $sv);
692 $ac->add_Annotation('_attr', $newattr);
694 # if there is id_source add clade to _id_link
695 if (exists $clade_attr{'id_source'}) {
696 $self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode;
698 # push into temporary list
699 push @{$self->{'_currentitems'}}, $tnode;
702 =head2 end_element_clade
704 Title : end_element_clade
705 Usage : $treeio->end_element_clade
706 Function: ends the parsing of a node
707 Returns : none
708 Args : none
710 =cut
712 sub end_element_clade
714 my ($self) = @_;
715 my $reader = $self->{'_reader'};
717 my $curcount = scalar @{$self->{'_currentnodes'}};
718 my $level = $reader->depth() - 2;
719 my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
721 # pop from temporary list
722 my $tnode = pop @{$self->{'_currentitems'}};
723 if ( $childcnt > 0) {
724 if( $childcnt > $curcount)
726 $self->throw("something wrong with event construction treelevel ".
727 "$level is recorded as having $childcnt nodes ".
728 "but current nodes at this level is $curcount\n");
730 my @childnodes = splice( @{$self->{'_currentnodes'}}, - $childcnt);
731 for ( @childnodes ) {
732 $tnode->add_Descendent($_);
734 $self->{'_levelcnt'}->[$level+1] = 0;
736 push @{$self->{'_currentnodes'}}, $tnode;
737 $self->{'_levelcnt'}->[$level]++;
741 =head2 element_relation
743 Title : element_relation
744 Usage : $treeio->element_relation
745 Function: starts the parsing of clade relation & sequence relation
746 Returns : none
747 Args : none
749 =cut
751 sub element_relation
753 my ($self) = @_;
754 $self->processAttribute($self->current_attr);
757 =head2 end_element_relation
759 Title : end_element_relation
760 Usage : $treeio->end_element_relation
761 Function: ends the parsing of clade relation & sequence relation
762 Returns : none
763 Args : none
765 =cut
767 sub end_element_relation
769 my ($self) = @_;
770 my $relationtype = $self->current_attr->{'type'};
771 my $id_ref_0 = $self->current_attr->{'id_ref_0'};
772 my $id_ref_1 = $self->current_attr->{'id_ref_1'};
774 my @srcbyidref = ();
775 $srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0};
776 $srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1};
778 # exception when id_ref is defined but id_src is not, or vice versa.
779 if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) {
780 $self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]);
783 # set id_ref_0
784 my $ac0 = $srcbyidref[0]->annotation;
785 my $newann = Bio::Annotation::Relation->new(
786 '-type' => $relationtype,
787 '-to' => $srcbyidref[1],
788 '-tagname' => $self->current_element
790 $ac0->add_Annotation($self->current_element, $newann);
791 # set id_ref_1
792 my $ac1 = $srcbyidref[1]->annotation;
793 $newann = Bio::Annotation::Relation->new(
794 '-type' => $relationtype,
795 '-to' => $srcbyidref[0],
796 '-tagname' => $self->current_element
798 $ac1->add_Annotation($self->current_element, $newann);
803 =head2 element_default
805 Title : element_default
806 Usage : $treeio->element_default
807 Function: starts the parsing of all other elements
808 Returns : none
809 Args : none
811 =cut
813 sub element_default
815 my ($self) = @_;
816 my $reader = $self->{'_reader'};
817 my $current = $self->current_element();
818 my $prev = $self->prev_element();
820 # read attributes of element
821 $self->processAttribute($self->current_attr);
823 # check idref
824 my $idref = '';
825 if (exists $self->current_attr->{'id_ref'}) {
826 $idref = $self->current_attr->{'id_ref'};
829 my $srcbyidref = '';
830 $srcbyidref = $self->{'_id_link'}->{$idref};
832 # exception when id_ref is defined but id_src is not, or vice versa.
833 if ($idref xor $srcbyidref) {
834 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
837 # we are annotating a Node
838 # set _currentannotation
839 if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) {
840 # find node to annotate
841 my $tnode;
842 if ($srcbyidref) {
843 $tnode = $srcbyidref;
845 else {
846 $tnode = $self->{'_currentitems'}->[-1];
848 my $ac = $tnode->annotation();
849 # add the new anncollection with the current element as key
850 my $newann = Bio::Annotation::Collection->new();
851 $ac->add_Annotation($current, $newann);
852 # push to current annotation
853 push (@{$self->{'_currentannotation'}}, $newann);
855 # we are already within an annotation
856 else {
857 my $ac = $self->{'_currentannotation'}->[-1];
858 if ($ac) {
859 # add the new anncollection with the current element as key
860 my $newann = Bio::Annotation::Collection->new();
861 $ac->add_Annotation($current, $newann);
862 push (@{$self->{'_currentannotation'}}, $newann);
868 =head2 end_element_default
870 Title : end_element_default
871 Usage : $treeio->end_element_default
872 Function: ends the parsing of all other elements
873 Returns : none
874 Args : none
876 =cut
878 sub end_element_default
880 my ($self) = @_;
881 my $reader = $self->{'_reader'};
882 my $current = $self->current_element();
883 my $prev = $self->prev_element();
885 # check idsrc
886 my $idsrc = $self->current_attr->{'id_source'};
888 # check idref
889 my $idref = '';
890 if (exists $self->current_attr->{'id_ref'}) {
891 $idref = $self->current_attr->{'id_ref'};
892 delete $self->current_attr->{'id_ref'};
895 my $srcbyidref = '';
896 $srcbyidref = $self->{'_id_link'}->{$idref};
898 # exception when id_ref is defined but id_src is not, or vice versa.
899 if ($idref xor $srcbyidref) {
900 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
903 # we are annotating a Tree
904 if ((!$srcbyidref) && $prev eq 'phylogeny') {
905 # annotate Tree via tree attribute
906 $self->prev_attr->{$current} = $self->{'_currenttext'};
908 # we are within sequence_relation or clade_relation
909 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
910 # we are here only with <confidence>
911 if ($current eq 'confidence') {
912 # need to take care of confidence
913 # not implemented yet..
915 else {
916 $self->throw($current, " is not allowed within <*_relation>");
919 # we are annotating a Node
920 if (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade'))
922 # pop from current annotation
923 my $ac = pop (@{$self->{'_currentannotation'}});
924 $self->annotateNode( $current, $ac);
926 # additional setups for compatibility with NodeI
927 my $tnode;
928 if ($srcbyidref) {
929 $tnode = $srcbyidref;
931 else {
932 $tnode = $self->{'_currentitems'}->[-1];
934 if ($current eq 'name') {
935 $tnode->id($self->{'_currenttext'});
937 elsif ($current eq 'branch_length') {
938 $tnode->branch_length($self->{'_currenttext'});
940 elsif ($current eq 'confidence') {
941 if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
942 $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
945 elsif ($current eq 'sequence') {
946 # if annotation is <sequence>
947 # transform the Bio::Annotation object into a Bio::Seq object
948 my $str = '';
949 # retrieve the sequence
950 if (my ($molseq) = $ac->get_Annotations('mol_seq')) {
951 my ($strac) = $molseq->get_Annotations('_text');
952 $str = $strac->value();
954 # create Seq object with sequence
955 my $newseq = Bio::Seq->new( -seq => $str,
956 -annotation=>$ac,
957 -nowarnonempty=>1);
958 $tnode->sequence($newseq);
959 $ac->remove_Annotations('mol_seq');
960 $tnode->annotation->remove_Annotations($current);
961 # if there is id_source add sequence to _id_link
962 if ($idsrc) {
963 $self->{'_id_link'}->{$idsrc} = $newseq;
966 elsif ($idsrc && $current eq 'taxonomy') {
967 # if there is id_source add sequence to _id_link
968 $self->{'_id_link'}->{$idsrc} = $ac;
971 # we are within a default Annotation
972 else {
973 my $ac = pop (@{$self->{'_currentannotation'}});
974 if ($ac) {
975 $self->annotateNode( $current, $ac);
981 =head2 annotateNode
983 Title : annotateNode
984 Usage : $treeio->annotateNode($element, $ac)
985 Function: adds text value and attributes to the AnnotationCollection
986 that has element name as key. If there are nested elements,
987 optional AnnotationCollections are added recursively,
988 with the nested element name as key.
989 The structure of each AnnotationCollection is
990 'element' => AnnotationCollection {
991 '_text' => SimpleValue (text value)
992 '_attr' => AnnotationCollection {
993 attribute1 => SimpleValue (attribute value 1)
994 attribute2 => SimpleValue (attribute value 2)
997 ['nested element' => AnnotationCollection ]
999 Returns : none
1000 Args : none
1002 =cut
1004 sub annotateNode
1006 my ($self, $element, $newac) = @_;
1007 # if attribute exists then add Annotation::Collection with tag '_attr'
1008 if ( scalar keys %{$self->current_attr} ) {
1009 my $newattr = Bio::Annotation::Collection->new();
1010 foreach my $tag (keys %{$self->current_attr}) {
1011 my $sv = Bio::Annotation::SimpleValue->new(
1012 -value => $self->current_attr->{$tag}
1014 $newattr->add_Annotation($tag, $sv);
1016 $newac->add_Annotation('_attr', $newattr);
1018 # if text exists add text as SimpleValue with tag '_text'
1019 if ( $self->{'_currenttext'} ) {
1020 my $newvalue = Bio::Annotation::SimpleValue->new( -value => $self->{'_currenttext'} );
1021 $newac->add_Annotation('_text', $newvalue);
1026 =head1 Methods for exploring the document
1028 =cut
1030 =head2 current_attr
1032 Title : current_attr
1033 Usage : $attr_hash = $treeio->current_attr;
1034 Function: returns the attribute hash for current item
1035 Returns : reference of the attribute hash
1036 Args : none
1038 =cut
1040 sub current_attr {
1041 my ($self) = @_;
1043 return 0 if ! defined $self->{'_lastitem'} ||
1044 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1045 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1046 (@keys == 1) || die "there should be only one key for each hash";
1047 return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
1050 =head2 prev_attr
1052 Title : prev_attr
1053 Usage : $hash_ref = $treeio->prev_attr
1054 Function: returns the attribute hash for previous item
1055 Returns : reference of the attribute hash
1056 Args : none
1058 =cut
1060 sub prev_attr {
1061 my ($self) = @_;
1063 return 0 if ! defined $self->{'_lastitem'} ||
1064 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1065 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1066 (@keys == 1) || die "there should be only one key for each hash";
1067 return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
1070 =head2 current_element
1072 Title : current_element
1073 Usage : $element = $treeio->current_element
1074 Function: returns the name of the current element
1075 Returns : string (element name)
1076 Args : none
1078 =cut
1080 sub current_element {
1081 my ($self) = @_;
1083 return 0 if ! defined $self->{'_lastitem'} ||
1084 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1085 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1086 (@keys == 1) || die "there should be only one key for each hash";
1087 return $keys[0];
1090 =head2 prev_element
1092 Title : prev_element
1093 Usage : $element = $treeio->current_element
1094 Function: returns the name of the previous element
1095 Returns : string (element name)
1096 Args : none
1098 =cut
1100 sub prev_element {
1101 my ($self) = @_;
1103 return 0 if ! defined $self->{'_lastitem'} ||
1104 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1105 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1106 (@keys == 1) || die "there should be only one key for each hash";
1107 return $keys[0];
1111 =head2 treetype
1113 Title : treetype
1114 Usage : $obj->treetype($newval)
1115 Function: returns the tree type (default is Bio::Tree::Tree)
1116 Returns : value of treetype
1117 Args : newvalue (optional)
1120 =cut
1122 sub treetype{
1123 my ($self,$value) = @_;
1124 if( defined $value) {
1125 $self->{'treetype'} = $value;
1127 return $self->{'treetype'};
1130 =head2 nodetype
1132 Title : nodetype
1133 Usage : $obj->nodetype($newval)
1134 Function: returns the node type (default is Bio::Node::AnnotatableNode)
1135 Returns : value of nodetype
1136 Args : newvalue (optional)
1138 =cut
1140 sub nodetype{
1141 my ($self,$value) = @_;
1142 if( defined $value) {
1143 $self->{'nodetype'} = $value;
1145 return $self->{'nodetype'};
1149 =head1 Methods for implementing to_string callback for AnnotatableNode
1151 =cut
1153 =head2 node_to_string
1155 Title : node_to_string
1156 Usage : $annotatablenode->to_string_callback(\&node_to_string)
1157 Function: set as callback in AnnotatableNode, prints the node information in string
1158 Returns : string of node information
1159 Args : none
1161 =cut
1163 # this function is similar to _write_tree_Helper_annotatableNode,
1164 # but it is not recursive
1165 sub node_to_string
1167 my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
1168 # not a Bio::TreeIO::phyloxml
1169 my $str = '';
1170 my $ac = $self->annotation;
1172 # start <clade>
1173 $str .= '<clade';
1174 my @attr = $ac->get_Annotations('_attr'); # check id_source
1175 if (@attr) {
1176 my @id_source = $attr[0]->get_Annotations('id_source');
1177 if (@id_source) {
1178 $str .= " id_source=\"".$id_source[0]->value."\"";
1181 $str .= '>';
1183 # print all annotations
1184 $str = print_annotation( $self, $str, $ac );
1185 # print all sequences
1186 if ($self->has_sequence) {
1187 foreach my $seq (@{$self->sequence}) {
1188 $str = print_seq_annotation( $self, $str, $seq );
1192 $str .= '</clade>';
1193 return $str;
1196 =head2 print_annotation
1198 Title : print_annotation
1199 Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection)
1200 Function: prints the annotationCollection in a phyloXML format.
1201 Returns : string of annotation information
1202 Args : string to which the Annotation should be concatenated to,
1203 annotationCollection that holds the Annotations
1205 =cut
1207 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1208 # and have this subroutine within that object so it can handle the
1209 # reading and writing of the values and attributes.
1210 # especially since this function is used both by
1211 # Bio::TreeIO::phyloxml (through write_tree) and
1212 # Bio::Node::AnnotatableNode (through node_to_string).
1213 # but since tagTree is a temporary stub and I didn't want to make
1214 # a redundant object I've put it here for now.
1216 sub print_annotation
1218 my ($self, $str, $ac) = @_;
1220 my @all_anns = $ac->get_Annotations();
1221 foreach my $ann (@all_anns) {
1222 my $key = $ann->tagname;
1223 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1224 if ($ann->isa('Bio::Annotation::SimpleValue'))
1226 if ($key eq '_text') {
1227 $str .= $ann->value;
1229 else {
1230 $str .= "<$key>";
1231 $str .= $ann->value;
1232 $str .= "</$key>";
1235 elsif ($ann->isa('Bio::Annotation::Collection'))
1237 my @attrs = $ann->get_Annotations('_attr');
1238 if (@attrs) { # if there is a attribute collection
1239 $str .= "<$key";
1240 $str = print_attr($self, $str, $attrs[0]);
1241 $str .= ">";
1243 else {
1244 $str .= "<$key>";
1246 $str = print_annotation($self, $str, $ann);
1247 $str .= "</$key>";
1250 return $str;
1253 =head2 print_attr
1255 Title : print_attr
1256 Usage : $str = $annotatablenode->print_attr($str, $annotationcollection)
1257 Function: prints the annotationCollection in a phyloXML format.
1258 Returns : string of attributes
1259 Args : string to which the Annotation should be concatenated to,
1260 AnnotationCollection that holds the attributes
1262 =cut
1264 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1265 # and have this subroutine within that object so it can handle the
1266 # reading and writing of the values and attributes.
1267 # especially since this function is used both by
1268 # Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode
1269 # (through print_annotation).
1270 # but since tagTree is a temporary stub and I didn't want to make
1271 # a redundant object I've put it here for now.
1273 sub print_attr
1275 my ($self, $str, $ac) = @_;
1276 my @all_attrs = $ac->get_Annotations();
1277 foreach my $attr (@all_attrs) {
1278 if (!$attr->isa('Bio::Annotation::SimpleValue')) {
1279 $self->throw("attribute should be a SimpleValue");
1281 $str .= ' ';
1282 $str .= $attr->tagname;
1283 $str .= '=';
1284 $str .= $attr->value;
1286 return $str;
1289 =head2 print_sequence_annotation
1291 Title : print_sequence_annotation
1292 Usage : $str = $node->print_seq_annotation( $str, $seq );
1293 Function: prints the Bio::Seq object associated with the node
1294 in a phyloXML format.
1295 Returns : string that describes the sequence
1296 Args : string to which the Annotation should be concatenated to,
1297 Seq object to print in phyloXML
1299 =cut
1301 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1302 # and have this subroutine within that object so it can handle the
1303 # reading and writing of the values and attributes.
1304 # especially since this function is used both by
1305 # Bio::TreeIO::phyloxml (through write_tree) and
1306 # Bio::Node::AnnotatableNode (through node_to_string).
1307 # but since tagTree is a temporary stub and I didn't want to make
1308 # a redundant object I've put it here for now.
1311 sub print_seq_annotation
1313 my ($self, $str, $seq) = @_;
1315 $str .= "<sequence";
1316 my ($attr) = $seq->annotation->get_Annotations('_attr'); # check id_source
1317 if ($attr) {
1318 my ($id_source) = $attr->get_Annotations('id_source');
1319 if ($id_source) {
1320 $str .= " id_source=\"".$id_source->value."\"";
1323 $str .= ">";
1325 my @all_anns = $seq->annotation->get_Annotations();
1326 foreach my $ann (@all_anns) {
1327 my $key = $ann->tagname;
1328 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1329 if ($ann->isa('Bio::Annotation::SimpleValue'))
1331 if ($key eq '_text') {
1332 $str .= $ann->value;
1334 else {
1335 $str .= "<$key>";
1336 $str .= $ann->value;
1337 $str .= "</$key>";
1340 elsif ($ann->isa('Bio::Annotation::Collection'))
1342 my @attrs = $ann->get_Annotations('_attr');
1343 if (@attrs) { # if there is a attribute collection
1344 $str .= "<$key";
1345 $str = print_attr($self, $str, $attrs[0]);
1346 $str .= ">";
1348 else {
1349 $str .= "<$key>";
1351 $str = print_annotation($self, $str, $ann);
1352 $str .= "</$key>";
1355 # print mol_seq
1356 if ($seq->seq()) {
1357 $str .= "<mol_seq>";
1358 $str .= $seq->seq();
1359 $str .= "</mol_seq>";
1362 $str .= "</sequence>";
1363 return $str;