Simplify some code
[bioperl-live.git] / Bio / TreeIO / phyloxml.pm
blob4ffc5a5ce03df073ff8d307807c8e5e5370d70b6
1 # $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
3 # BioPerl module for Bio::TreeIO::phyloxml
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mira Han <mirhan@indiana.edu>
9 # Copyright Mira Han
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
19 =head1 SYNOPSIS
21 # do not use this module directly
22 use Bio::TreeIO;
23 my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
24 -file => 'tree.dnd');
25 my $tree = $treeio->next_tree;
27 =head1 DESCRIPTION
29 This module handles parsing and writing of phyloXML format.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 of the bugs and their resolution. Bug reports can be submitted viax the
57 web:
59 https://github.com/bioperl/bioperl-live/issues
61 =head1 AUTHOR - Mira Han
63 Email mirhan@indiana.edu
65 =head1 APPENDIX
67 The rest of the documentation details each of the object methods.
68 Internal methods are usually preceded with a _
70 =cut
73 # Let the code begin...
76 package Bio::TreeIO::phyloxml;
77 use strict;
79 # Object preamble - inherits from Bio::Root::Root
81 use Bio::Tree::Tree;
82 use Bio::Tree::AnnotatableNode;
83 use Bio::Annotation::SimpleValue;
84 use Bio::Annotation::Relation;
85 use XML::LibXML;
86 use XML::LibXML::Reader;
87 use base qw(Bio::TreeIO);
90 sub _initialize
92 my($self, %args) = @_;
93 $args{-treetype} ||= 'Bio::Tree::Tree';
94 $args{-nodetype} ||= 'Bio::Tree::AnnotatableNode';
95 $self->SUPER::_initialize(%args);
97 # phyloxml TreeIO does not use SAX,
98 # therefore no need to attach EventHandler
99 # instead we will define a reader that is a pull-parser of libXML
100 if ($self->mode eq 'r') {
101 if ($self->_fh) {
102 $self->{'_reader'} = XML::LibXML::Reader->new(
103 IO => $self->_fh,
104 no_blanks => 1
107 if (!$self->{'_reader'}) {
108 $self->throw("XML::LibXML::Reader not initialized");
111 elsif ($self->mode eq 'w') {
112 # print default lines
113 $self->_print('<?xml version="1.0" encoding="UTF-8"?>',"\n");
114 $self->_print('<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">');
117 $self->treetype($args{-treetype});
118 $self->nodetype($args{-nodetype});
119 $self->{'_lastitem'} = {}; # holds open items and the attribute hash
120 $self->_init_func();
123 sub _init_func
125 my ($self) = @_;
126 my %start_elements = (
127 'phylogeny' => \&element_phylogeny,
128 'clade' => \&element_clade,
129 'sequence_relation' => \&element_relation,
130 'clade_relation' => \&element_relation,
132 $self->{'_start_elements'} = \%start_elements;
133 my %end_elements = (
134 'phylogeny' => \&end_element_phylogeny,
135 'clade' => \&end_element_clade,
136 'sequence_relation' => \&end_element_relation,
137 'clade_relation' => \&end_element_relation,
139 $self->{'_end_elements'} = \%end_elements;
142 sub DESTROY {
143 my $self = shift;
144 if ($self->mode eq 'w') {
145 $self->_print('</phyloxml>');
146 $self->flush if $self->_flush_on_write && defined $self->_fh;
148 $self->SUPER::DESTROY;
151 =head2 next_tree
153 Title : next_tree
154 Usage : my $tree = $treeio->next_tree
155 Function: Gets the next tree in the stream
156 Returns : Bio::Tree::TreeI
157 Args : none
159 =cut
161 sub next_tree
163 my ($self) = @_;
164 my $reader = $self->{'_reader'};
165 my $tree;
166 while ($reader->read)
168 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT)
170 if ($reader->name eq 'phylogeny')
172 $tree = $self->end_element_phylogeny();
173 last;
176 $self->processXMLNode;
178 return $tree;
181 =head2 add_attribute
183 Title : add_phyloXML_annotation
184 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -attr=>"id_source = \"A\"")
185 Function: add attributes to an object
186 Returns : the node that we added annotations to
187 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
188 -attr => string in the form "A = B", where A is the attribute name and B is the attribute value
190 =cut
192 sub add_attribute
194 my ($self, @args) = @_;
195 my ($obj, $attr) = $self->_rearrange([qw(OBJ ATTR)], @args);
197 if ($attr) {
198 $attr = '<dummy '.$attr.'/>';
201 my $oldreader = $self->{'_reader'}; # save reader
202 $self->{'_reader'} = XML::LibXML::Reader->new(
203 string => $attr,
204 no_blanks => 1
206 my $reader = $self->{'_reader'};
207 $self->{'_currentannotation'} = []; # holds annotationcollection
208 $self->{'_currenttext'} = '';
209 #$self->{'_id_link'} = {};
211 # pretend we saw a <clade> element
212 $self->{'_lastitem'}->{'dummy'}++;
213 push @{$self->{'_lastitem'}->{'current'}}, { 'dummy'=>{}}; # current holds current element and empty hash for its attributes
215 # push object to annotate
216 push @{$self->{'_currentitems'}}, $obj;
218 # read attributes of element
219 while ($reader->read)
221 #$self->processXMLNode;
222 $self->processAttribute($self->current_attr);
225 # if there is id_source add sequence to _id_link
226 if (exists $self->current_attr->{'id_source'}) {
227 my $idsrc = $self->current_attr->{'id_source'};
228 $self->{'_id_link'}->{$idsrc} = $obj;
231 # check idref
232 my $idref = '';
233 if (exists $self->current_attr->{'id_ref'}) {
234 $idref = $self->current_attr->{'id_ref'};
237 my $srcbyidref = '';
238 $srcbyidref = $self->{'_id_link'}->{$idref};
240 # exception when id_ref is defined but id_src is not, or vice versa.
241 if ($idref xor $srcbyidref) {
242 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
245 # if attribute exists then add Annotation::Collection with tag '_attr'
246 my $newac = $obj->annotation;
247 if ( scalar keys %{$self->current_attr} ) {
248 my $newattr = Bio::Annotation::Collection->new();
249 foreach my $tag (keys %{$self->current_attr}) {
250 my $sv = Bio::Annotation::SimpleValue->new(
251 -value => $self->current_attr->{$tag}
253 $newattr->add_Annotation($tag, $sv);
255 $newac->add_Annotation('_attr', $newattr);
258 # pop from temporary list
259 pop @{$self->{'_currentitems'}};
260 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
261 pop @{$self->{'_lastitem'}->{'current'}};
263 $self->{'_reader'} = $oldreader; # restore reader
264 return $obj;
268 =head2 add_phyloXML_annotation
270 Title : add_phyloXML_annotation
271 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring)
272 my $tree = $treeio->add_phyloXML_annotation('-obj'=>$tree, '-xml'=>'<sequence_relation id_ref_0="A" id_ref_1="B" type="orthology"/>')
274 Function: add annotations to a node in the phyloXML format string
275 Returns : the node that we added annotations to
276 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
277 -xml => string in phyloXML format that describes the annotation for the node
279 =cut
281 sub add_phyloXML_annotation
283 my ($self, @args) = @_;
284 my ($obj, $xml_string) = $self->_rearrange([qw(OBJ XML)], @args);
286 $xml_string = '<phyloxml>'.$xml_string.'</phyloxml>';
287 $self->debug( $xml_string );
289 my $oldreader = $self->{'_reader'}; # save reader
290 $self->{'_reader'} = XML::LibXML::Reader->new(
291 string => $xml_string,
292 no_blanks => 1
294 my $reader = $self->{'_reader'};
295 #$self->{'_currentannotation'} = []; # holds annotationcollection
296 #$self->{'_currenttext'} = '';
297 #$self->{'_id_link'} = {};
299 # pretend we saw a <clade> element
300 $self->{'_lastitem'}->{'clade'}++;
301 push @{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes
302 # our object to annotate (nodeI)
303 # push into temporary list
304 push @{$self->{'_currentitems'}}, $obj;
306 $reader->read; #read away the first element 'phyloxml'
307 while ($reader->read)
309 $self->processXMLNode;
312 # pop from temporary list
313 pop @{$self->{'_currentitems'}};
314 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
315 pop @{$self->{'_lastitem'}->{'current'}};
317 $self->{'_reader'} = $oldreader; # restore reader
318 return $obj;
322 =head2 write_tree
324 Title : write_tree
325 Usage : $treeio->write_tree($tree);
326 Function: Write a tree out to data stream in phyloxml format
327 Returns : none
328 Args : Bio::Tree::TreeI object
330 =cut
332 sub write_tree
334 my ($self, @trees) = @_;
335 foreach my $tree (@trees) {
336 my $root = $tree->get_root_node;
337 $self->_print("<phylogeny");
338 my @tags = $tree->get_all_tags();
339 my $attr_str = '';
340 foreach my $tag (@tags) {
341 my @values = $tree->get_tag_values($tag);
342 foreach (@values) {
343 $attr_str .= " ".$tag."=\"".$_."\"";
346 # check if rooted
347 my ($b_rooted) = $tree->get_tag_values('rooted');
348 if ($b_rooted) {
349 $attr_str .= " rooted=\"true\"";
351 else {
352 if($tree->is_binary($tree->get_root_node)) {
353 $attr_str .= " rooted=\"true\"";
355 else {
356 $attr_str .= " rooted=\"false\"";
359 $self->_print($attr_str);
360 $self->_print(">");
361 if ($root->isa('Bio::Tree::AnnotatableNode')) {
362 $self->_print($self->_write_tree_Helper_annotatableNode($root));
364 else {
365 $self->_print($self->_write_tree_Helper_generic($root));
368 # print clade relations
369 while (my $str = pop (@{$self->{'_tree_attr'}->{'clade_relation'}})) {
370 $self->_print($str);
372 # print sequence relations
373 while (my $str = pop (@{$self->{'_tree_attr'}->{'sequence_relation'}})) {
374 $self->_print($str);
376 $self->_print("</phylogeny>");
378 $self->flush if $self->_flush_on_write && defined $self->_fh;
379 return;
382 =head2 _write_tree_Helper_annotatableNode
384 Title : _write_tree_Helper_annotatableNode
385 Usage : internal method used by write_tree, not to be used directly
386 Function: recursive helper function of write_tree for the annotatableNodes.
387 translates annotations into xml elements.
388 Returns : string describing the node
389 Args : Bio::Node::AnnotatableNode object, string
391 =cut
393 sub _write_tree_Helper_annotatableNode
395 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
397 my $ac = $node->annotation;
399 # if clade_relation exists
400 my @relations = $ac->get_Annotations('clade_relation');
401 foreach (@relations) {
402 my $clade_rel = $self->_relation_to_string($node, $_, '');
403 # set as tree attr
404 push (@{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel);
407 # start <clade>
408 $str .= '<clade';
409 my ($attr) = $ac->get_Annotations('_attr'); # check id_source
410 if ($attr) {
411 my ($id_source) = $attr->get_Annotations('id_source');
412 if ($id_source) {
413 $str .= " id_source=\"".$id_source->value."\"";
416 $str .= ">";
418 # print all descendent nodes
419 foreach my $child ( $node->each_Descendent() ) {
420 $str = $self->_write_tree_Helper_annotatableNode($child, $str);
423 # print all annotations
424 $str = print_annotation( $node, $str, $ac );
426 # print all sequences
427 if ($node->has_sequence) {
428 foreach my $seq (@{$node->sequence}) {
429 # if sequence_relation exists
430 my @relations = $seq->annotation->get_Annotations('sequence_relation');
431 foreach (@relations) {
432 my $sequence_rel = $self->_relation_to_string($seq, $_, '');
433 # set as tree attr
434 push (@{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel);
436 $str = print_seq_annotation( $node, $str, $seq );
440 $str .= "</clade>";
442 return $str;
445 =head2 _write_tree_Helper_generic
447 Title : _write_tree_Helper_generic
448 Usage : internal method used by write_tree, not to be used directly
449 Function: recursive helper function of write_tree for generic NodesI.
450 all tags are translated into property elements.
451 Returns : string describing the node
452 Args : Bio::Node::NodeI object, string
454 =cut
456 sub _write_tree_Helper_generic
458 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
460 # start <clade>
461 $str .= '<clade>';
463 # print all descendent nodes
464 foreach my $child ( $node->each_Descendent() ) {
465 $str = $self->_write_tree_Helper_generic($child, $str);
468 # print all tags
469 my @tags = $node->get_all_tags();
470 foreach my $tag (@tags) {
471 my @values = $node->get_tag_values($tag);
472 foreach my $val (@values) {
473 $str .= "<property datatype=\"xsd:string\" ref=\"tag:$tag\" applies_to=\"clade\">";
474 $str .=$val;
475 $str .= "</property>";
479 # print NodeI features
480 if ($node->id) {
481 $str .= "<name>";
482 $str .= $node->id;
483 $str .= "</name>";
485 if ($node->branch_length) {
486 $str .= "<branch_length>";
487 $str .= $node->branch_length;
488 $str .= "</branch_length>";
490 if ($node->bootstrap) {
491 $str .= "<confidence type = \"bootstrap\">";
492 $str .= $node->bootstrap;
493 $str .= "</confidence>";
496 $str .= "</clade>";
497 return $str;
500 =head2 _relation_to_string
502 Title : _relation_to_string
503 Usage : internal method used by write_tree, not to be used directly
504 Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements.
505 Returns : string describing the node
506 Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation,
507 the Annotation::Relation object,
508 the string
510 =cut
512 # It may be more appropriate to make Annotation::Relation have
513 # a to_string callback function,
514 # and have this subroutine set as the callback when we are in
515 # phyloXML context.
516 # I've put it here for now, since write_tree is the only place it is used.
518 sub _relation_to_string {
519 my ($self, $obj, $rel, $str) = @_;
521 my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source
522 if (@attr) {
523 my @id_source = $attr[0]->get_Annotations('id_source');
525 my ($id_ref_0) = $obj->annotation->get_nested_Annotations(
526 '-keys' => ['id_source'],
527 '-recursive' => 1);
528 my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations(
529 '-keys' => ['id_source'],
530 '-recursive' => 1);
532 my $confidence = $rel->confidence();
533 my $confidence_type = $rel->confidence_type();
534 $str .= "<";
535 $str .= $rel->tagname;
536 $str .= " id_ref_0=\"".$id_ref_0->value."\"";
537 $str .= " id_ref_1=\"".$id_ref_1->value."\"";
538 $str .= " type=\"".$rel->type."\"";
539 if ($confidence) {
540 $str .= " ><confidence";
541 if ($confidence_type) {
542 $str .= " type=\"".$confidence_type."\"";
544 $str .= ">";
545 $str .= $confidence;
546 $str .= "</confidence>";
547 $str .= "</";
548 $str .= $rel->tagname;
549 $str .= ">";
551 else {
552 $str .= "/>";
554 return $str;
557 =head2 read_annotation
559 Title : read_annotation
560 Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1);
561 Function: read text value (or attribute value) of the annotations corresponding to the element path
562 Returns : list of text values of the annotations matching the path
563 Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI)
564 -path => path of the nested elements
565 -attr => Boolean value to indicate whether to get the attribute of the element or the text value.
566 (default is 0, meaning text value is returned)
568 =cut
570 # It may be more appropriate to make a separate Annotation::phyloXML object
571 # and have this subroutine within that object so it can handle the
572 # reading and writing of the values and attributes.
573 # but since tagTree is a temporary stub and I didn't want to make
574 # a redundant object I've put it here for now.
576 sub read_annotation
578 my ($self, @args) = @_;
579 my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args);
580 my $ac = $obj->annotation;
581 if ($attr) {
582 my @elements = split ('/', $path);
583 my $final = pop @elements;
584 push (@elements, '_attr');
585 push (@elements, $final);
586 $path = join ('/', @elements);
587 return $self->_read_annotation_attr_Helper( [$ac], $path);
589 else {
590 return $self->_read_annotation_text_Helper( [$ac], $path);
594 sub _read_annotation_text_Helper
596 my ($self, $acs, $path) = @_;
597 my @elements = split ('/', $path);
598 my $key = shift @elements;
599 my @nextacs = ();
600 foreach my $ac (@$acs) {
601 foreach my $ann ($ac->get_Annotations($key)) {
602 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
605 if (@elements == 0) {
606 my @values = ();
607 my @texts = map {$_->get_Annotations('_text')} @nextacs;
608 foreach (@texts) {
609 $_ && push (@values, $_->value);
611 return @values;
613 else {
614 $path = join ('/', @elements);
615 return $self->_read_annotation_text_Helper( \@nextacs, $path);
619 sub _read_annotation_attr_Helper
621 my ($self, $acs, $path) = @_;
622 my @elements = split ('/', $path);
623 my $key = shift @elements;
624 my @nextacs = ();
625 foreach my $ac (@$acs) {
626 foreach my $ann ($ac->get_Annotations($key)) {
627 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
630 if (@elements == 1) {
631 my $attrname = $elements[0];
632 my @sv = map {$_->get_Annotations($attrname)} @nextacs;
633 return map {$_->value} @sv;
635 else {
636 $path = join ('/', @elements);
637 return $self->_read_annotation_attr_Helper( \@nextacs, $path);
641 =head1 Methods for parsing the XML document
643 =cut
645 =head2 processXMLNode
647 Title : processXMLNode
648 Usage : $treeio->processXMLNode
649 Function: read the XML node and process according to the node type
650 Returns : none
651 Args : none
653 =cut
655 sub processXMLNode
657 my ($self) = @_;
658 my $reader = $self->{'_reader'};
659 my $nodetype = $reader->nodeType;
660 if ( $nodetype == XML_READER_TYPE_ELEMENT)
662 $self->{'_lastitem'}->{$reader->name}++;
663 push @{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
665 if (exists $self->{'_start_elements'}->{$reader->name}) {
666 my $method = $self->{'_start_elements'}->{$reader->name};
667 $self->$method();
669 else {
670 $self->element_default();
672 if ($reader->isEmptyElement) {
673 # element is complete
674 # set nodetype so it can jump and
675 # do procedures for XML_READER_TYPE_END_ELEMENT
676 $nodetype = XML_READER_TYPE_END_ELEMENT;
679 if ($nodetype == XML_READER_TYPE_TEXT)
681 $self->{'_currenttext'} = $reader->value;
683 if ($nodetype == XML_READER_TYPE_END_ELEMENT)
685 if (exists $self->{'_end_elements'}->{$reader->name}) {
686 my $method = $self->{'_end_elements'}->{$reader->name};
687 $self->$method();
689 else {
690 $self->end_element_default();
692 $self->{'_lastitem'}->{ $reader->name }--;
693 pop @{$self->{'_lastitem'}->{'current'}};
694 $self->{'_currenttext'} = '';
699 =head2 processAttribute
701 Title : processAttribute
702 Usage : $treeio->processAttribute(\%hash_for_attribute);
703 Function: reads the attributes of the current element into a hash
704 Returns : none
705 Args : hash reference where the attributes will be stored.
707 =cut
709 sub processAttribute
711 my ($self, $data) = @_;
712 my $reader = $self->{'_reader'};
714 # several ways of reading attributes:
715 # read all attributes:
716 if ($reader-> moveToFirstAttribute) {
717 do {
718 $data->{$reader->name()} = $reader->value;
719 } while ($reader-> moveToNextAttribute);
720 $reader-> moveToElement;
725 =head2 element_phylogeny
727 Title : element_phylogeny
728 Usage : $treeio->element_phylogeny
729 Function: initialize the parsing of a tree
730 Returns : none
731 Args : none
733 =cut
735 sub element_phylogeny
737 my ($self) = @_;
738 $self->{'_currentitems'} = []; # holds nodes while parsing current level
739 $self->{'_currentnodes'} = []; # holds nodes while constructing tree
740 $self->{'_currentannotation'} = []; # holds annotationcollection
741 $self->{'_currenttext'} = '';
742 $self->{'_levelcnt'} = [];
743 $self->{'_id_link'} = {};
744 $self->{'_tree_attr'} = $self->current_attr;
745 $self->processAttribute($self->current_attr);
746 return;
749 =head2 end_element_phylogeny
751 Title : end_element_phylogeny
752 Usage : $treeio->end_element_phylogeny
753 Function: ends the parsing of a tree building a Tree::TreeI object.
754 Returns : Tree::TreeI
755 Args : none
757 =cut
759 sub end_element_phylogeny
761 my ($self) = @_;
763 my $root;
764 # if there is more than one node in _currentnodes
765 # aggregate the nodes into trees basically ad-hoc.
766 if ( @{$self->{'_currentnodes'}} > 1)
768 $root = $self->nodetype->new(
769 -id => '',
770 tostring => \&node_to_string,
772 while ( @{$self->{'_currentnodes'}} ) {
773 my ($node) = ( shift @{$self->{'_currentnodes'}});
774 $root->add_Descendent($node);
777 # if there is only one node in _currentnodes
778 # that node is root.
779 elsif ( @{$self->{'_currentnodes'}} == 1)
781 $root = shift @{$self->{'_currentnodes'}};
784 my $tree = $self->treetype->new(
785 -root => $root,
786 -id => $self->current_attr->{'name'},
787 %{$self->current_attr}
789 foreach my $tag ( keys %{$self->current_attr} ) {
790 $tree->add_tag_value( $tag, $self->current_attr->{$tag} );
792 return $tree;
795 =head2 element_clade
797 Title : element_clade
798 Usage : $treeio->element_clade
799 Function: initialize the parsing of a node
800 creates a new AnnotatableNode with annotations
801 Returns : none
802 Args : none
804 =cut
806 sub element_clade
808 my ($self) = @_;
809 my $reader = $self->{'_reader'};
810 my %clade_attr = (); # doesn't use current attribute in order to save memory
811 $self->processAttribute(\%clade_attr);
812 # create a node (Annotatable Node)
813 my $tnode = $self->nodetype->new(
814 -id => '',
815 tostring => \&node_to_string,
816 %clade_attr,
818 # add all attributes as annotation collection with tag '_attr'
819 my $ac = $tnode->annotation;
820 my $newattr = Bio::Annotation::Collection->new();
821 foreach my $tag (keys %clade_attr) {
822 my $sv = Bio::Annotation::SimpleValue->new(
823 -value => $clade_attr{$tag}
825 $newattr->add_Annotation($tag, $sv);
827 $ac->add_Annotation('_attr', $newattr);
829 # if there is id_source add clade to _id_link
830 if (exists $clade_attr{'id_source'}) {
831 $self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode;
833 # push into temporary list
834 push @{$self->{'_currentitems'}}, $tnode;
837 =head2 end_element_clade
839 Title : end_element_clade
840 Usage : $treeio->end_element_clade
841 Function: ends the parsing of a node
842 Returns : none
843 Args : none
845 =cut
847 sub end_element_clade
849 my ($self) = @_;
850 my $reader = $self->{'_reader'};
852 my $curcount = scalar @{$self->{'_currentnodes'}};
853 my $level = $reader->depth() - 2;
854 my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
856 # pop from temporary list
857 my $tnode = pop @{$self->{'_currentitems'}};
858 if ( $childcnt > 0) {
859 if( $childcnt > $curcount)
861 $self->throw("something wrong with event construction treelevel ".
862 "$level is recorded as having $childcnt nodes ".
863 "but current nodes at this level is $curcount\n");
865 my @childnodes = splice( @{$self->{'_currentnodes'}}, - $childcnt);
866 for ( @childnodes ) {
867 $tnode->add_Descendent($_);
869 $self->{'_levelcnt'}->[$level+1] = 0;
871 push @{$self->{'_currentnodes'}}, $tnode;
872 $self->{'_levelcnt'}->[$level]++;
876 =head2 element_relation
878 Title : element_relation
879 Usage : $treeio->element_relation
880 Function: starts the parsing of clade relation & sequence relation
881 Returns : none
882 Args : none
884 =cut
886 sub element_relation
888 my ($self) = @_;
889 $self->processAttribute($self->current_attr);
890 my $relationtype = $self->current_attr->{'type'};
891 my $id_ref_0 = $self->current_attr->{'id_ref_0'};
892 my $id_ref_1 = $self->current_attr->{'id_ref_1'};
894 my @srcbyidref = ();
895 $srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0};
896 $srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1};
898 # exception when id_ref is defined but id_src is not, or vice versa.
899 if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) {
900 $self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]);
903 # set id_ref_0
904 my $ac0 = $srcbyidref[0]->annotation;
905 my $newann = Bio::Annotation::Relation->new(
906 '-type' => $relationtype,
907 '-to' => $srcbyidref[1],
908 '-tagname' => $self->current_element
910 $ac0->add_Annotation($self->current_element, $newann);
911 # set id_ref_1
912 my $ac1 = $srcbyidref[1]->annotation;
913 $newann = Bio::Annotation::Relation->new(
914 '-type' => $relationtype,
915 '-to' => $srcbyidref[0],
916 '-tagname' => $self->current_element
918 $ac1->add_Annotation($self->current_element, $newann);
919 push (@{$self->{'_currentannotation'}}, $newann);
922 =head2 end_element_relation
924 Title : end_element_relation
925 Usage : $treeio->end_element_relation
926 Function: ends the parsing of clade relation & sequence relation
927 Returns : none
928 Args : none
930 =cut
932 sub end_element_relation
934 my ($self) = @_;
935 my $ac = pop (@{$self->{'_currentannotation'}});
939 =head2 element_default
941 Title : element_default
942 Usage : $treeio->element_default
943 Function: starts the parsing of all other elements
944 Returns : none
945 Args : none
947 =cut
949 sub element_default
951 my ($self) = @_;
952 my $reader = $self->{'_reader'};
953 my $current = $self->current_element();
954 my $prev = $self->prev_element();
956 # read attributes of element
957 $self->processAttribute($self->current_attr);
959 # check idref
960 my $idref = '';
961 if (exists $self->current_attr->{'id_ref'}) {
962 $idref = $self->current_attr->{'id_ref'};
965 my $srcbyidref = '';
966 $srcbyidref = $self->{'_id_link'}->{$idref};
968 # exception when id_ref is defined but id_src is not, or vice versa.
969 if ($idref xor $srcbyidref) {
970 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
973 # we are annotating a Node
974 # set _currentannotation
975 if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) {
976 # find node to annotate
977 my $tnode;
978 if ($srcbyidref) {
979 $tnode = $srcbyidref;
981 else {
982 $tnode = $self->{'_currentitems'}->[-1];
984 my $ac = $tnode->annotation();
985 # add the new anncollection with the current element as key
986 my $newann = Bio::Annotation::Collection->new();
987 $ac->add_Annotation($current, $newann);
988 # push to current annotation
989 push (@{$self->{'_currentannotation'}}, $newann);
991 # we are within sequence_relation or clade_relation
992 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
993 # do nothing?
995 # we are already within an annotation
996 else {
997 my $ac = $self->{'_currentannotation'}->[-1];
998 if ($ac) {
999 # add the new anncollection with the current element as key
1000 my $newann = Bio::Annotation::Collection->new();
1001 $ac->add_Annotation($current, $newann);
1002 push (@{$self->{'_currentannotation'}}, $newann);
1008 =head2 end_element_default
1010 Title : end_element_default
1011 Usage : $treeio->end_element_default
1012 Function: ends the parsing of all other elements
1013 Returns : none
1014 Args : none
1016 =cut
1018 sub end_element_default
1020 my ($self) = @_;
1021 my $reader = $self->{'_reader'};
1022 my $current = $self->current_element();
1023 my $prev = $self->prev_element();
1025 # check idsrc
1026 my $idsrc = $self->current_attr->{'id_source'};
1028 # check idref
1029 my $idref = '';
1030 if (exists $self->current_attr->{'id_ref'}) {
1031 $idref = $self->current_attr->{'id_ref'};
1032 delete $self->current_attr->{'id_ref'};
1035 my $srcbyidref = '';
1036 $srcbyidref = $self->{'_id_link'}->{$idref};
1038 # exception when id_ref is defined but id_src is not, or vice versa.
1039 if ($idref xor $srcbyidref) {
1040 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
1043 # we are annotating a Tree
1044 if ((!$srcbyidref) && $prev eq 'phylogeny') {
1045 # annotate Tree via tree attribute
1046 $self->prev_attr->{$current} = $self->{'_currenttext'};
1048 # we are within sequence_relation or clade_relation
1049 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
1050 my $ann_relation = $self->{'_currentannotation'}->[-1];
1051 # we are here only with <confidence>
1052 if ($current eq 'confidence') {
1053 if (exists $self->current_attr->{'type'}) {
1054 $ann_relation->confidence_type($self->current_attr->{'type'});
1056 $ann_relation->confidence($self->{'_currenttext'});
1058 else {
1059 $self->throw($current, " is not allowed within <*_relation>");
1062 # we are annotating a Node
1063 elsif (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade'))
1065 # pop from current annotation
1066 my $ac = pop (@{$self->{'_currentannotation'}});
1067 $self->annotateNode( $current, $ac);
1069 # additional setups for compatibility with NodeI
1070 my $tnode;
1071 if ($srcbyidref) {
1072 $tnode = $srcbyidref;
1074 else {
1075 $tnode = $self->{'_currentitems'}->[-1];
1077 if ($current eq 'name') {
1078 $tnode->id($self->{'_currenttext'});
1080 elsif ($current eq 'branch_length') {
1081 $tnode->branch_length($self->{'_currenttext'});
1083 elsif ($current eq 'confidence') {
1084 if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
1085 $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
1088 elsif ($current eq 'sequence') {
1089 # if annotation is <sequence>
1090 # transform the Bio::Annotation object into a Bio::Seq object
1091 my $str = '';
1092 # retrieve the sequence
1093 if (my ($molseq) = $ac->get_Annotations('mol_seq')) {
1094 my ($strac) = $molseq->get_Annotations('_text');
1095 $str = $strac->value();
1097 # create Seq object with sequence
1098 my $newseq = Bio::Seq->new( -seq => $str,
1099 -annotation=>$ac,
1100 -nowarnonempty=>1);
1101 $tnode->sequence($newseq);
1102 $ac->remove_Annotations('mol_seq');
1103 $tnode->annotation->remove_Annotations($current);
1104 # if there is id_source add sequence to _id_link
1105 if ($idsrc) {
1106 $self->{'_id_link'}->{$idsrc} = $newseq;
1109 elsif ($idsrc && $current eq 'taxonomy') {
1110 # if there is id_source add sequence to _id_link
1111 $self->{'_id_link'}->{$idsrc} = $ac;
1114 # we are within a default Annotation
1115 else {
1116 my $ac = pop (@{$self->{'_currentannotation'}});
1117 if ($ac) {
1118 $self->annotateNode( $current, $ac);
1124 =head2 annotateNode
1126 Title : annotateNode
1127 Usage : $treeio->annotateNode($element, $ac)
1128 Function: adds text value and attributes to the AnnotationCollection
1129 that has element name as key. If there are nested elements,
1130 optional AnnotationCollections are added recursively,
1131 with the nested element name as key.
1132 The structure of each AnnotationCollection is
1133 'element' => AnnotationCollection {
1134 '_text' => SimpleValue (text value)
1135 '_attr' => AnnotationCollection {
1136 attribute1 => SimpleValue (attribute value 1)
1137 attribute2 => SimpleValue (attribute value 2)
1140 ['nested element' => AnnotationCollection ]
1142 Returns : none
1143 Args : none
1145 =cut
1147 sub annotateNode
1149 my ($self, $element, $newac) = @_;
1150 # if attribute exists then add Annotation::Collection with tag '_attr'
1151 if ( scalar keys %{$self->current_attr} ) {
1152 my $newattr = Bio::Annotation::Collection->new();
1153 foreach my $tag (keys %{$self->current_attr}) {
1154 my $sv = Bio::Annotation::SimpleValue->new(
1155 -value => $self->current_attr->{$tag}
1157 $newattr->add_Annotation($tag, $sv);
1159 $newac->add_Annotation('_attr', $newattr);
1161 # if text exists add text as SimpleValue with tag '_text'
1162 if ( $self->{'_currenttext'} ) {
1163 my $newvalue = Bio::Annotation::SimpleValue->new( -value => $self->{'_currenttext'} );
1164 $newac->add_Annotation('_text', $newvalue);
1169 =head1 Methods for exploring the document
1171 =cut
1173 =head2 current_attr
1175 Title : current_attr
1176 Usage : $attr_hash = $treeio->current_attr;
1177 Function: returns the attribute hash for current item
1178 Returns : reference of the attribute hash
1179 Args : none
1181 =cut
1183 sub current_attr {
1184 my ($self) = @_;
1186 return 0 if ! defined $self->{'_lastitem'} ||
1187 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1188 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1189 (@keys == 1) || die "there should be only one key for each hash";
1190 return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
1193 =head2 prev_attr
1195 Title : prev_attr
1196 Usage : $hash_ref = $treeio->prev_attr
1197 Function: returns the attribute hash for previous item
1198 Returns : reference of the attribute hash
1199 Args : none
1201 =cut
1203 sub prev_attr {
1204 my ($self) = @_;
1206 return 0 if ! defined $self->{'_lastitem'} ||
1207 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1208 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1209 (@keys == 1) || die "there should be only one key for each hash";
1210 return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
1213 =head2 current_element
1215 Title : current_element
1216 Usage : $element = $treeio->current_element
1217 Function: returns the name of the current element
1218 Returns : string (element name)
1219 Args : none
1221 =cut
1223 sub current_element {
1224 my ($self) = @_;
1226 return 0 if ! defined $self->{'_lastitem'} ||
1227 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1228 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1229 (@keys == 1) || die "there should be only one key for each hash";
1230 return $keys[0];
1233 =head2 prev_element
1235 Title : prev_element
1236 Usage : $element = $treeio->current_element
1237 Function: returns the name of the previous element
1238 Returns : string (element name)
1239 Args : none
1241 =cut
1243 sub prev_element {
1244 my ($self) = @_;
1246 return 0 if ! defined $self->{'_lastitem'} ||
1247 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1248 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1249 (@keys == 1) || die "there should be only one key for each hash";
1250 return $keys[0];
1254 =head2 treetype
1256 Title : treetype
1257 Usage : $obj->treetype($newval)
1258 Function: returns the tree type (default is Bio::Tree::Tree)
1259 Returns : value of treetype
1260 Args : newvalue (optional)
1263 =cut
1265 sub treetype{
1266 my ($self,$value) = @_;
1267 if( defined $value) {
1268 $self->{'treetype'} = $value;
1270 return $self->{'treetype'};
1273 =head2 nodetype
1275 Title : nodetype
1276 Usage : $obj->nodetype($newval)
1277 Function: returns the node type (default is Bio::Node::AnnotatableNode)
1278 Returns : value of nodetype
1279 Args : newvalue (optional)
1281 =cut
1283 sub nodetype{
1284 my ($self,$value) = @_;
1285 if( defined $value) {
1286 $self->{'nodetype'} = $value;
1288 return $self->{'nodetype'};
1292 =head1 Methods for implementing to_string callback for AnnotatableNode
1294 =cut
1296 =head2 node_to_string
1298 Title : node_to_string
1299 Usage : $annotatablenode->to_string_callback(\&node_to_string)
1300 Function: set as callback in AnnotatableNode, prints the node information in string
1301 Returns : string of node information
1302 Args : none
1304 =cut
1306 # this function is similar to _write_tree_Helper_annotatableNode,
1307 # but it is not recursive
1308 sub node_to_string
1310 my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
1311 # not a Bio::TreeIO::phyloxml
1312 my $str = '';
1313 my $ac = $self->annotation;
1315 # start <clade>
1316 $str .= '<clade';
1317 my @attr = $ac->get_Annotations('_attr'); # check id_source
1318 if (@attr) {
1319 my @id_source = $attr[0]->get_Annotations('id_source');
1320 if (@id_source) {
1321 $str .= " id_source=\"".$id_source[0]->value."\"";
1324 $str .= '>';
1326 # print all annotations
1327 $str = print_annotation( $self, $str, $ac );
1328 # print all sequences
1329 if ($self->has_sequence) {
1330 foreach my $seq (@{$self->sequence}) {
1331 $str = print_seq_annotation( $self, $str, $seq );
1335 $str .= '</clade>';
1336 return $str;
1339 =head2 print_annotation
1341 Title : print_annotation
1342 Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection)
1343 Function: prints the annotationCollection in a phyloXML format.
1344 Returns : string of annotation information
1345 Args : string to which the Annotation should be concatenated to,
1346 annotationCollection that holds the Annotations
1348 =cut
1350 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1351 # and have this subroutine within that object so it can handle the
1352 # reading and writing of the values and attributes.
1353 # especially since this function is used both by
1354 # Bio::TreeIO::phyloxml (through write_tree) and
1355 # Bio::Node::AnnotatableNode (through node_to_string).
1356 # but since tagTree is a temporary stub and I didn't want to make
1357 # a redundant object I've put it here for now.
1359 sub print_annotation
1361 my ($self, $str, $ac) = @_;
1363 my @all_anns = $ac->get_Annotations();
1364 foreach my $ann (@all_anns) {
1365 my $key = $ann->tagname;
1366 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1367 if ($ann->isa('Bio::Annotation::SimpleValue'))
1369 if ($key eq '_text') {
1370 $str .= $ann->value;
1372 else {
1373 $str .= "<$key>";
1374 $str .= $ann->value;
1375 $str .= "</$key>";
1378 elsif ($ann->isa('Bio::Annotation::Collection'))
1380 my @attrs = $ann->get_Annotations('_attr');
1381 if (@attrs) { # if there is a attribute collection
1382 $str .= "<$key";
1383 $str = print_attr($self, $str, $attrs[0]);
1384 $str .= ">";
1386 else {
1387 $str .= "<$key>";
1389 $str = print_annotation($self, $str, $ann);
1390 $str .= "</$key>";
1393 return $str;
1396 =head2 print_attr
1398 Title : print_attr
1399 Usage : $str = $annotatablenode->print_attr($str, $annotationcollection)
1400 Function: prints the annotationCollection in a phyloXML format.
1401 Returns : string of attributes
1402 Args : string to which the Annotation should be concatenated to,
1403 AnnotationCollection that holds the attributes
1405 =cut
1407 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1408 # and have this subroutine within that object so it can handle the
1409 # reading and writing of the values and attributes.
1410 # especially since this function is used both by
1411 # Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode
1412 # (through print_annotation).
1413 # but since tagTree is a temporary stub and I didn't want to make
1414 # a redundant object I've put it here for now.
1416 sub print_attr
1418 my ($self, $str, $ac) = @_;
1419 my @all_attrs = $ac->get_Annotations();
1420 foreach my $attr (@all_attrs) {
1421 if (!$attr->isa('Bio::Annotation::SimpleValue')) {
1422 $self->throw("attribute should be a SimpleValue");
1424 $str .= ' ';
1425 $str .= $attr->tagname;
1426 $str .= '=';
1427 $str .= '"'.$attr->value.'"';
1429 return $str;
1432 =head2 print_sequence_annotation
1434 Title : print_sequence_annotation
1435 Usage : $str = $node->print_seq_annotation( $str, $seq );
1436 Function: prints the Bio::Seq object associated with the node
1437 in a phyloXML format.
1438 Returns : string that describes the sequence
1439 Args : string to which the Annotation should be concatenated to,
1440 Seq object to print in phyloXML
1442 =cut
1444 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1445 # and have this subroutine within that object so it can handle the
1446 # reading and writing of the values and attributes.
1447 # especially since this function is used both by
1448 # Bio::TreeIO::phyloxml (through write_tree) and
1449 # Bio::Node::AnnotatableNode (through node_to_string).
1450 # but since tagTree is a temporary stub and I didn't want to make
1451 # a redundant object I've put it here for now.
1454 sub print_seq_annotation
1456 my ($self, $str, $seq) = @_;
1458 $str .= "<sequence";
1459 my ($attr) = $seq->annotation->get_Annotations('_attr'); # check id_source
1460 if ($attr) {
1461 my ($id_source) = $attr->get_Annotations('id_source');
1462 if ($id_source) {
1463 $str .= " id_source=\"".$id_source->value."\"";
1466 $str .= ">";
1468 my @all_anns = $seq->annotation->get_Annotations();
1469 foreach my $ann (@all_anns) {
1470 my $key = $ann->tagname;
1471 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1472 if ($ann->isa('Bio::Annotation::SimpleValue'))
1474 if ($key eq '_text') {
1475 $str .= $ann->value;
1477 else {
1478 $str .= "<$key>";
1479 $str .= $ann->value;
1480 $str .= "</$key>";
1483 elsif ($ann->isa('Bio::Annotation::Collection'))
1485 my @attrs = $ann->get_Annotations('_attr');
1486 if (@attrs) { # if there is a attribute collection
1487 $str .= "<$key";
1488 $str = print_attr($self, $str, $attrs[0]);
1489 $str .= ">";
1491 else {
1492 $str .= "<$key>";
1494 $str = print_annotation($self, $str, $ann);
1495 $str .= "</$key>";
1498 # print mol_seq
1499 if ($seq->seq()) {
1500 $str .= "<mol_seq>";
1501 $str .= $seq->seq();
1502 $str .= "</mol_seq>";
1505 $str .= "</sequence>";
1506 return $str;