nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / Tree / Node.pm
blobc76613c31061298bf208478956728858e88e752f
2 # BioPerl module for Bio::Tree::Node
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::Node - A Simple Tree Node
18 =head1 SYNOPSIS
20 use Bio::Tree::Node;
21 my $nodeA = Bio::Tree::Node->new();
22 my $nodeL = Bio::Tree::Node->new();
23 my $nodeR = Bio::Tree::Node->new();
25 my $node = Bio::Tree::Node->new();
26 $node->add_Descendent($nodeL);
27 $node->add_Descendent($nodeR);
29 print "node is not a leaf \n" if( $node->is_leaf);
31 =head1 DESCRIPTION
33 Makes a Tree Node suitable for building a Tree.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via
61 the web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Jason Stajich
67 Email jason-at-bioperl-dot-org
69 =head1 CONTRIBUTORS
71 Aaron Mackey, amackey-at-virginia-dot-edu
72 Sendu Bala, bix@sendu.me.uk
74 =head1 APPENDIX
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
79 =cut
82 # Let the code begin...
84 package Bio::Tree::Node;
85 use vars qw($CREATIONORDER);
86 use strict;
88 use base qw(Bio::Root::Root Bio::Tree::NodeI);
90 BEGIN {
91 $CREATIONORDER = 1;
94 =head2 new
96 Title : new
97 Usage : my $obj = Bio::Tree::Node->new();
98 Function: Builds a new Bio::Tree::Node object
99 Returns : Bio::Tree::Node
100 Args : -descendents => arrayref of descendents (they will be
101 updated s.t. their ancestor point is this
102 node)
103 -branch_length => branch length [integer] (optional)
104 -bootstrap => value bootstrap value (string)
105 -description => description of node
106 -id => human readable id for node
108 =cut
110 sub new {
111 my($class,@args) = @_;
113 my $self = $class->SUPER::new(@args);
114 my ($children, $branchlen,$id,
115 $bootstrap, $desc,$d) = $self->_rearrange([qw(
116 DESCENDENTS
117 BRANCH_LENGTH
119 BOOTSTRAP
120 DESC
121 DESCRIPTION
123 @args);
124 $self->_register_for_cleanup(\&node_cleanup);
125 $self->{'_desc'} = {}; # for descendents
126 if( defined $d && defined $desc ) {
127 $self->warn("can only accept -desc or -description, not both, accepting -description");
128 $desc = $d;
129 } elsif( defined $d && ! defined $desc ) {
130 $desc = $d;
132 defined $desc && $self->description($desc);
133 defined $bootstrap && $self->bootstrap($bootstrap);
134 defined $id && $self->id($id);
135 defined $branchlen && $self->branch_length($branchlen);
136 if( defined $children ) {
137 if( ref($children) !~ /ARRAY/i ) {
138 $self->throw("Must specify a valid ARRAY reference to initialize a Node's Descendents");
140 foreach my $c ( @$children ) {
141 $self->add_Descendent($c);
144 $self->_creation_id($CREATIONORDER++);
145 return $self;
148 =head2 create_node_on_branch
150 Title : create_node_on_branch
151 Usage : $node->create_node_on_branch($at_length)
152 Function: Create a node on the ancestral branch of the calling
153 object.
154 Example :
155 Returns : the created node
156 Args : -POSITION=>$absolute_branch_length_from_caller (default)
157 -FRACTION=>$fraction_of_branch_length_from_caller
158 -ANNOT=>{ -id => "the id", -desc => "the description" }
159 -FORCE, set to allow nodes with zero branch lengths
161 =cut
163 sub create_node_on_branch{
164 my ($self,@args) = @_;
165 my ($pos, $frac, $annot, $force) = $self->_rearrange([qw(POSITION FRACTION ANNOT FORCE)], @args);
166 my ($newpos);
167 my $blen = $self->branch_length;
168 # arg checks
169 $force||=0;
170 $annot||={};
172 unless ($self->ancestor) {
173 $self->throw("Refusing to create nodes above the root--exiting");
175 unless ($blen) {
176 $self->throw("Calling node's branch length is zero") unless $force;
178 unless ((defined $pos && !defined $frac)||(defined $frac && !defined $pos)) {
179 $self->throw("Either position or fraction must be specified, but not both");
181 if (defined $frac) {
182 $self->throw("FRACTION arg must be in the range [0,1]") unless ( (0 <= $frac) && ($frac <= 1) );
183 $newpos = $frac*$blen;
185 elsif (defined $pos) {
186 $self->throw("POSITION arg must be in the range [0,$blen]") unless ( (0 <= $pos) && ($pos <= $blen) );
187 $newpos = $pos;
189 else {
190 $self->throw("How did I get here?");
192 $self->throw("Calling node's branch length will be zero (set -FORCE to force)--exiting") unless ($newpos > 0) || $force;
193 $self->throw("Created nodes branch length would be zero (set -FORCE to force)--exiting") unless ($newpos < $blen) || $force;
195 #guts
196 $annot->{'-branch_length'} = $blen-$newpos;
197 my $node = Bio::Tree::Node->new(%$annot);
198 my $anc = $self->ancestor;
199 # null anc check is above
200 $node->add_Descendent($self);
201 $anc->add_Descendent($node);
202 $anc->remove_Descendent($self);
203 $self->branch_length($newpos);
204 return $node;
207 =head2 add_Descendent
209 Title : add_Descendent
210 Usage : $node->add_Descendent($node);
211 Function: Adds a descendent to a node
212 Returns : number of current descendents for this node
213 Args : Bio::Node::NodeI
214 boolean flag, true if you want to ignore the fact that you are
215 adding a second node with the same unique id (typically memory
216 location reference in this implementation). default is false and
217 will throw an error if you try and overwrite an existing node.
219 =cut
221 sub add_Descendent{
222 my ($self,$node,$ignoreoverwrite) = @_;
223 return -1 if( ! defined $node );
225 if( ! ref($node) ||
226 ref($node) =~ /HASH/ ||
227 ! $node->isa('Bio::Tree::NodeI') ) {
228 $self->throw("Trying to add a Descendent who is not a Bio::Tree::NodeI");
229 return -1;
232 $self->{_adding_descendent} = 1;
233 # avoid infinite recurse
234 $node->ancestor($self) unless $node->{_setting_ancestor};
235 $self->{_adding_descendent} = 0;
237 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
238 $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
240 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
242 $self->invalidate_height();
244 return scalar keys %{$self->{'_desc'}};
247 =head2 each_Descendent
249 Title : each_Descendent($sortby)
250 Usage : my @nodes = $node->each_Descendent;
251 Function: all the descendents for this Node (but not their descendents
252 i.e. not a recursive fetchall)
253 Returns : Array of Bio::Tree::NodeI objects
254 Args : $sortby [optional] "height", "creation", "alpha", "revalpha",
255 or coderef to be used to sort the order of children nodes.
257 =cut
259 sub each_Descendent{
260 my ($self, $sortby) = @_;
262 # order can be based on branch length (and sub branchlength)
263 $sortby ||= 'none';
264 if (ref $sortby eq 'CODE') {
265 my @values = sort { $sortby->($a,$b) } values %{$self->{'_desc'}};
266 return @values;
267 } elsif ($sortby eq 'height') {
268 return map { $_->[0] }
269 sort { $a->[1] <=> $b->[1] ||
270 $a->[2] <=> $b->[2] }
271 map { [$_, $_->height, $_->internal_id ] }
272 values %{$self->{'_desc'}};
273 } elsif( $sortby eq 'alpha' ) {
274 my @set;
275 for my $v ( values %{$self->{'_desc'}} ) {
276 unless( $v->is_Leaf ) {
277 my @lst = ( sort { $a cmp $b } map { $_->id }
278 grep { $_->is_Leaf }
279 $v->get_all_Descendents($sortby));
280 push @set, [$v, $lst[0], $v->internal_id];
281 } else {
282 push @set, [$v, $v->id, $v->internal_id];
285 return map { $_->[0] }
286 sort {$a->[1] cmp $b->[1] || $a->[2] <=> $b->[2] } @set;
287 } elsif( $sortby eq 'revalpha' ) {
288 my @set;
289 for my $v ( values %{$self->{'_desc'}} ) {
290 if( ! defined $v->id &&
291 ! $v->is_Leaf ) {
292 my ($l) = ( sort { $b cmp $a } map { $_->id }
293 grep { $_->is_Leaf }
294 $v->get_all_Descendents($sortby));
295 push @set, [$v, $l, $v->internal_id];
296 } else {
297 push @set, [$v, $v->id, $v->internal_id];
300 return map { $_->[0] }
301 sort {$b->[1] cmp $a->[1] || $b->[2] <=> $a->[2] } @set;
302 } else { # creation
303 return map { $_->[0] }
304 sort { $a->[1] <=> $b->[1] }
305 map { [$_, $_->internal_id ] }
306 grep {defined $_}
307 values %{$self->{'_desc'}};
311 =head2 remove_Descendent
313 Title : remove_Descendent
314 Usage : $node->remove_Descendent($node_foo);
315 Function: Removes a specific node from being a Descendent of this node
316 Returns : nothing
317 Args : An array of Bio::Node::NodeI objects which have been previously
318 passed to the add_Descendent call of this object.
320 =cut
322 sub remove_Descendent{
323 my ($self,@nodes) = @_;
324 my $c= 0;
325 foreach my $n ( @nodes ) {
326 if( $self->{'_desc'}->{$n->internal_id} ) {
327 $self->{_removing_descendent} = 1;
328 $n->ancestor(undef);
329 $self->{_removing_descendent} = 0;
330 # should be redundant
331 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
332 delete $self->{'_desc'}->{$n->internal_id};
333 $c++;
334 } else {
335 if( $self->verbose ) {
336 $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
337 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
344 =head2 remove_all_Descendents
346 Title : remove_all_Descendents
347 Usage : $node->remove_All_Descendents()
348 Function: Cleanup the node's reference to descendents and reset
349 their ancestor pointers to undef, if you don't have a reference
350 to these objects after this call they will be cleaned up - so
351 a get_nodes from the Tree object would be a safe thing to do first
352 Returns : nothing
353 Args : none
355 =cut
357 sub remove_all_Descendents{
358 my ($self) = @_;
359 # This won't cleanup the nodes themselves if you also have
360 # a copy/pointer of them (I think)...
362 # That's true. But that's not a bug; if we retain a reference to them it's
363 # very possible we want to keep them. The only way to truly destroy them is
364 # to call DESTROY on the instance.
366 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
367 delete $self->{'_desc'}->{$node}
369 $self->{'_desc'} = {};
373 =head2 get_all_Descendents
375 Title : get_all_Descendents
376 Usage : my @nodes = $node->get_all_Descendents;
377 Function: Recursively fetch all the nodes and their descendents
378 *NOTE* This is different from each_Descendent
379 Returns : Array or Bio::Tree::NodeI objects
380 Args : none
382 =cut
384 # get_all_Descendents implemented in the interface
386 =head2 ancestor
388 Title : ancestor
389 Usage : $obj->ancestor($newval)
390 Function: Set the Ancestor
391 Returns : ancestral node
392 Args : newvalue (optional)
394 =cut
396 sub ancestor {
397 my $self = shift;
398 if (@_) {
399 my $new_ancestor = shift;
401 # we can set ancestor to undef
402 if ($new_ancestor) {
403 $self->throw("This is [$new_ancestor], not a Bio::Tree::NodeI")
404 unless $new_ancestor->isa('Bio::Tree::NodeI');
407 my $old_ancestor = $self->{'_ancestor'} || '';
408 if (!$old_ancestor ||
409 ($old_ancestor && ( !$new_ancestor ||
410 $new_ancestor ne $old_ancestor)) ) {
411 if( $old_ancestor && ! $old_ancestor->{_removing_descendent}) {
412 $old_ancestor->remove_Descendent($self);
414 if ($new_ancestor &&
415 ! $new_ancestor->{_adding_descendent} ) { # avoid infinite recurse
416 $self->{_setting_ancestor} = 1;
417 $new_ancestor->add_Descendent($self, 1);
418 $self->{_setting_ancestor} = 0;
421 $self->{'_ancestor'} = $new_ancestor;
424 return $self->{'_ancestor'};
427 =head2 branch_length
429 Title : branch_length
430 Usage : $obj->branch_length()
431 Function: Get/Set the branch length
432 Returns : value of branch_length
433 Args : newvalue (optional)
435 =cut
437 sub branch_length{
438 my $self = shift;
439 if( @_ ) {
440 my $bl = shift;
441 if( defined $bl &&
442 $bl =~ s/\[(\d+)\]// ) {
443 $self->bootstrap($1);
445 $self->{'_branch_length'} = $bl;
446 $self->invalidate_height();
448 return $self->{'_branch_length'};
451 =head2 bootstrap
453 Title : bootstrap
454 Usage : $obj->bootstrap($newval)
455 Function: Get/Set the bootstrap value
456 Returns : value of bootstrap
457 Args : newvalue (optional)
459 =cut
461 sub bootstrap {
462 my $self = shift;
463 if( @_ ) {
464 if( $self->has_tag('B') ) {
465 $self->remove_tag('B');
467 $self->add_tag_value('B',shift);
469 return ($self->get_tag_values('B'))[0];
472 =head2 description
474 Title : description
475 Usage : $obj->description($newval)
476 Function: Get/Set the description string
477 Returns : value of description
478 Args : newvalue (optional)
480 =cut
482 sub description {
483 my $self = shift;
484 $self->{'_description'} = shift @_ if @_;
485 return $self->{'_description'};
488 =head2 id
490 Title : id
491 Usage : $obj->id($newval)
492 Function: The human readable identifier for the node
493 Returns : value of human readable id
494 Args : newvalue (optional)
496 "A name can be any string of printable characters except blanks,
497 colons, semicolons, parentheses, and square brackets. Because you may
498 want to include a blank in a name, it is assumed that an underscore
499 character ("_") stands for a blank; any of these in a name will be
500 converted to a blank when it is read in."
502 from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
504 Also note that these objects now support spaces, ();: because we can
505 automatically quote the strings if they contain these characters. The
506 L<id_output> method does this for you so use the id() method to get
507 the raw string while L<id_output> to get the pre-escaped string.
509 =cut
511 sub id {
512 my ($self, $value) = @_;
513 if (defined $value) {
514 #$self->warn("Illegal characters ();: and space in the id [$value], converting to _ ")
515 # if $value =~ /\(\);:/ and $self->verbose >= 0;
516 #$value =~ s/[\(\);:\s]/_/g;
517 $self->{'_id'} = $value;
519 return $self->{'_id'};
522 =head2 Helper Functions
524 =cut
526 =head2 id_output
528 Title : id_output
529 Usage : my $id = $node->id_output;
530 Function: Return an id suitable for output in format like newick
531 so that if it contains spaces or ():; characters it is properly
532 quoted
533 Returns : $id string if $node->id has a value
534 Args : none
536 =cut
538 # implemented in NodeI interface
540 =head2 internal_id
542 Title : internal_id
543 Usage : my $internalid = $node->internal_id
544 Function: Returns the internal unique id for this Node
545 (a monotonically increasing number for this in-memory implementation
546 but could be a database determined unique id in other
547 implementations)
548 Returns : unique id
549 Args : none
551 =cut
553 sub internal_id {
554 return $_[0]->_creation_id;
557 =head2 _creation_id
559 Title : _creation_id
560 Usage : $obj->_creation_id($newval)
561 Function: a private method signifying the internal creation order
562 Returns : value of _creation_id
563 Args : newvalue (optional)
565 =cut
567 sub _creation_id {
568 my $self = shift @_;
569 $self->{'_creation_id'} = shift @_ if( @_);
570 return $self->{'_creation_id'} || 0;
573 =head2 Bio::Node::NodeI decorated interface implemented
575 The following methods are implemented by L<Bio::Node::NodeI> decorated
576 interface.
578 =head2 is_Leaf
580 Title : is_Leaf
581 Usage : if( $node->is_Leaf )
582 Function: Get Leaf status
583 Returns : boolean
584 Args : none
586 =cut
588 sub is_Leaf {
589 my ($self) = @_;
590 my $isleaf = ! (defined $self->{'_desc'} &&
591 (keys %{$self->{'_desc'}} > 0) );
592 return $isleaf;
595 =head2 height
597 Title : height
598 Usage : my $len = $node->height
599 Function: Returns the height of the tree starting at this
600 node. Height is the maximum branchlength to get to the tip.
601 Returns : The longest length (weighting branches with branch_length) to a leaf
602 Args : none
604 =cut
606 sub height {
607 my ($self) = @_;
608 return $self->{'_height'} if( defined $self->{'_height'} );
610 return 0 if( $self->is_Leaf );
611 my $max = 0;
612 foreach my $subnode ( $self->each_Descendent ) {
613 my $bl = $subnode->branch_length;
614 $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
615 my $s = $subnode->height + $bl;
616 if( $s > $max ) { $max = $s; }
618 return ($self->{'_height'} = $max);
621 =head2 invalidate_height
623 Title : invalidate_height
624 Usage : private helper method
625 Function: Invalidate our cached value of the node height in the tree
626 Returns : nothing
627 Args : none
629 =cut
631 sub invalidate_height {
632 my ($self) = @_;
634 $self->{'_height'} = undef;
635 if( defined $self->ancestor ) {
636 $self->ancestor->invalidate_height;
640 =head2 set_tag_value
642 Title : set_tag_value
643 Usage : $node->set_tag_value($tag,$value)
644 $node->set_tag_value($tag,@values)
645 Function: Sets a tag value(s) to a node. Replaces old values.
646 Returns : number of values stored for this tag
647 Args : $tag - tag name
648 $value - value to store for the tag
650 =cut
652 sub set_tag_value{
653 my ($self,$tag,@values) = @_;
654 if( ! defined $tag || ! scalar @values ) {
655 $self->warn("cannot call set_tag_value with an undefined value");
657 $self->remove_tag ($tag);
658 map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
659 return scalar @{$self->{'_tags'}->{$tag}};
663 =head2 add_tag_value
665 Title : add_tag_value
666 Usage : $node->add_tag_value($tag,$value)
667 Function: Adds a tag value to a node
668 Returns : number of values stored for this tag
669 Args : $tag - tag name
670 $value - value to store for the tag
672 =cut
674 sub add_tag_value{
675 my ($self,$tag,$value) = @_;
676 if( ! defined $tag || ! defined $value ) {
677 $self->warn("cannot call add_tag_value with an undefined value".($tag ? " ($tag)" : ''));
678 $self->warn($self->stack_trace_dump,"\n");
680 push @{$self->{'_tags'}->{$tag}}, $value;
681 return scalar @{$self->{'_tags'}->{$tag}};
684 =head2 remove_tag
686 Title : remove_tag
687 Usage : $node->remove_tag($tag)
688 Function: Remove the tag and all values for this tag
689 Returns : boolean representing success (0 if tag does not exist)
690 Args : $tag - tagname to remove
693 =cut
695 sub remove_tag {
696 my ($self,$tag) = @_;
697 if( exists $self->{'_tags'}->{$tag} ) {
698 $self->{'_tags'}->{$tag} = undef;
699 delete $self->{'_tags'}->{$tag};
700 return 1;
702 return 0;
705 =head2 remove_all_tags
707 Title : remove_all_tags
708 Usage : $node->remove_all_tags()
709 Function: Removes all tags
710 Returns : None
711 Args : None
713 =cut
715 sub remove_all_tags{
716 my ($self) = @_;
717 $self->{'_tags'} = {};
718 return;
721 =head2 get_all_tags
723 Title : get_all_tags
724 Usage : my @tags = $node->get_all_tags()
725 Function: Gets all the tag names for this Node
726 Returns : Array of tagnames
727 Args : None
729 =cut
731 sub get_all_tags{
732 my ($self) = @_;
733 my @tags = sort keys %{$self->{'_tags'} || {}};
734 return @tags;
737 =head2 get_tag_values
739 Title : get_tag_values
740 Usage : my @values = $node->get_tag_values($tag)
741 Function: Gets the values for given tag ($tag)
742 Returns : In array context returns an array of values
743 or an empty list if tag does not exist.
744 In scalar context returns the first value or undef.
745 Args : $tag - tag name
747 =cut
749 sub get_tag_values{
750 my ($self,$tag) = @_;
751 return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
752 (@{$self->{'_tags'}->{$tag} || []})[0];
755 =head2 has_tag
757 Title : has_tag
758 Usage : $node->has_tag($tag)
759 Function: Boolean test if tag exists in the Node
760 Returns : Boolean
761 Args : $tag - tagname
763 =cut
765 sub has_tag {
766 my ($self,$tag) = @_;
767 return exists $self->{'_tags'}->{$tag};
770 sub node_cleanup {
771 my $self = shift;
772 return unless defined $self;
774 #*** below is wrong, cleanup doesn't actually occur. Will replace with:
775 # $self->remove_all_Descendents; once further fixes in place..
776 #if( defined $self->{'_desc'} &&
777 # ref($self->{'_desc'}) =~ /HASH/i ) {
778 # while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
779 # $node->ancestor(undef); # insure no circular references
780 # $node = undef;
783 $self->remove_all_Descendents;
785 #$self->{'_desc'} = {};
789 =head2 reverse_edge
791 Title : reverse_edge
792 Usage : $node->reverse_edge(child);
793 Function: makes child be a parent of node
794 Requires: child must be a direct descendent of node
795 Returns : 1 on success, 0 on failure
796 Args : Bio::Tree::NodeI that is in the tree
798 =cut
800 sub reverse_edge {
801 my ($self,$node) = @_;
802 if( $self->delete_edge($node) ) {
803 $node->add_Descendent($self);
804 return 1;
806 return 0;