3 # BioPerl module for Bio::Tree::Node
5 # Cared for by Jason Stajich <jason-at-bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Tree::Node - A Simple Tree Node
20 my $nodeA = Bio::Tree::Node->new();
21 my $nodeL = Bio::Tree::Node->new();
22 my $nodeR = Bio::Tree::Node->new();
24 my $node = Bio::Tree::Node->new();
25 $node->add_Descendent($nodeL);
26 $node->add_Descendent($nodeR);
28 print "node is not a leaf \n" if( $node->is_leaf);
32 Makes a Tree Node suitable for building a Tree.
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 of the bugs and their resolution. Bug reports can be submitted via
51 http://bugzilla.open-bio.org/
53 =head1 AUTHOR - Jason Stajich
55 Email jason-at-bioperl-dot-org
59 Aaron Mackey, amackey-at-virginia-dot-edu
60 Sendu Bala, bix@sendu.me.uk
64 The rest of the documentation details each of the object methods.
65 Internal methods are usually preceded with a _
70 # Let the code begin...
72 package Bio
::Tree
::Node
;
73 use vars
qw($CREATIONORDER);
76 use Scalar::Util qw(weaken isweak);
78 use base
qw(Bio::Root::Root Bio::Tree::NodeI);
87 Usage : my $obj = Bio::Tree::Node->new();
88 Function: Builds a new Bio::Tree::Node object
89 Returns : Bio::Tree::Node
90 Args : -descendents => arrayref of descendents (they will be
91 updated s.t. their ancestor point is this
93 -branch_length => branch length [integer] (optional)
94 -bootstrap => value bootstrap value (string)
95 -description => description of node
96 -id => human readable id for node
101 my($class,@args) = @_;
103 my $self = $class->SUPER::new
(@args);
104 my ($children, $branchlen,$id,
105 $bootstrap, $desc,$d) = $self->_rearrange([qw(
114 $self->_register_for_cleanup(\
&node_cleanup
);
115 $self->{'_desc'} = {}; # for descendents
116 if( defined $d && defined $desc ) {
117 $self->warn("can only accept -desc or -description, not both, accepting -description");
119 } elsif( defined $d && ! defined $desc ) {
122 defined $desc && $self->description($desc);
123 defined $bootstrap && $self->bootstrap($bootstrap);
124 defined $id && $self->id($id);
125 defined $branchlen && $self->branch_length($branchlen);
126 if( defined $children ) {
127 if( ref($children) !~ /ARRAY/i ) {
128 $self->warn("Must specify a valid ARRAY reference to initialize a Node's Descendents");
130 foreach my $c ( @
$children ) {
131 $self->add_Descendent($c);
134 $self->_creation_id($CREATIONORDER++);
138 =head2 add_Descendent
140 Title : add_Descendent
141 Usage : $node->add_Descendent($node);
142 Function: Adds a descendent to a node
143 Returns : number of current descendents for this node
144 Args : Bio::Node::NodeI
145 boolean flag, true if you want to ignore the fact that you are
146 adding a second node with the same unique id (typically memory
147 location reference in this implementation). default is false and
148 will throw an error if you try and overwrite an existing node.
153 my ($self,$node,$ignoreoverwrite) = @_;
154 return -1 if( ! defined $node );
157 ref($node) =~ /HASH/ ||
158 ! $node->isa('Bio::Tree::NodeI') ) {
159 $self->throw("Trying to add a Descendent who is not a Bio::Tree::NodeI");
163 $self->{_adding_descendent
} = 1;
164 # avoid infinite recurse
165 $node->ancestor($self) unless $node->{_setting_ancestor
};
166 $self->{_adding_descendent
} = 0;
168 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
169 $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
171 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
173 $self->invalidate_height();
175 return scalar keys %{$self->{'_desc'}};
178 =head2 each_Descendent
180 Title : each_Descendent($sortby)
181 Usage : my @nodes = $node->each_Descendent;
182 Function: all the descendents for this Node (but not their descendents
183 i.e. not a recursive fetchall)
184 Returns : Array of Bio::Tree::NodeI objects
185 Args : $sortby [optional] "height", "creation", "alpha", "revalpha",
186 or coderef to be used to sort the order of children nodes.
191 my ($self, $sortby) = @_;
193 # order can be based on branch length (and sub branchlength)
195 if (ref $sortby eq 'CODE') {
196 my @values = sort { $sortby->($a,$b) } values %{$self->{'_desc'}};
198 } elsif ($sortby eq 'height') {
199 return map { $_->[0] }
200 sort { $a->[1] <=> $b->[1] ||
201 $a->[2] <=> $b->[2] }
202 map { [$_, $_->height, $_->internal_id ] }
203 values %{$self->{'_desc'}};
204 } elsif( $sortby eq 'alpha' ) {
206 for my $v ( values %{$self->{'_desc'}} ) {
207 unless( $v->is_Leaf ) {
208 my @lst = ( sort { $a cmp $b } map { $_->id }
210 $v->get_all_Descendents($sortby));
211 push @set, [$v, $lst[0], $v->internal_id];
213 push @set, [$v, $v->id, $v->internal_id];
216 return map { $_->[0] }
217 sort {$a->[1] cmp $b->[1] || $a->[2] <=> $b->[2] } @set;
218 } elsif( $sortby eq 'revalpha' ) {
220 for my $v ( values %{$self->{'_desc'}} ) {
221 if( ! defined $v->id &&
223 my ($l) = ( sort { $b cmp $a } map { $_->id }
225 $v->get_all_Descendents($sortby));
226 push @set, [$v, $l, $v->internal_id];
228 push @set, [$v, $v->id, $v->internal_id];
231 return map { $_->[0] }
232 sort {$b->[1] cmp $a->[1] || $b->[2] <=> $a->[2] } @set;
234 return map { $_->[0] }
235 sort { $a->[1] <=> $b->[1] }
236 map { [$_, $_->internal_id ] }
237 values %{$self->{'_desc'}};
241 =head2 remove_Descendent
243 Title : remove_Descendent
244 Usage : $node->remove_Descedent($node_foo);
245 Function: Removes a specific node from being a Descendent of this node
247 Args : An array of Bio::Node::NodeI objects which have been previously
248 passed to the add_Descendent call of this object.
252 sub remove_Descendent
{
253 my ($self,@nodes) = @_;
255 foreach my $n ( @nodes ) {
256 if( $self->{'_desc'}->{$n->internal_id} ) {
257 $self->{_removing_descendent
} = 1;
259 $self->{_removing_descendent
} = 0;
260 # should be redundant
261 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
262 delete $self->{'_desc'}->{$n->internal_id};
265 if( $self->verbose ) {
266 $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
267 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
272 # remove unecessary nodes if we have removed the part
274 my $a1 = $self->ancestor;
276 my $bl = $self->branch_length || 0;
277 my @d = $self->each_Descendent;
278 if (scalar @d == 1) {
279 $d[0]->branch_length($bl + ($d[0]->branch_length || 0));
280 $a1->add_Descendent($d[0]);
281 $a1->remove_Descendent($self);
287 =head2 remove_all_Descendents
289 Title : remove_all_Descendents
290 Usage : $node->remove_All_Descendents()
291 Function: Cleanup the node's reference to descendents and reset
292 their ancestor pointers to undef, if you don't have a reference
293 to these objects after this call they will be cleaned up - so
294 a get_nodes from the Tree object would be a safe thing to do first
300 sub remove_all_Descendents
{
302 # this won't cleanup the nodes themselves if you also have
303 # a copy/pointer of them (I think)...
304 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
305 $val->ancestor(undef);
307 $self->{'_desc'} = {};
311 =head2 get_all_Descendents
313 Title : get_all_Descendents
314 Usage : my @nodes = $node->get_all_Descendents;
315 Function: Recursively fetch all the nodes and their descendents
316 *NOTE* This is different from each_Descendent
317 Returns : Array or Bio::Tree::NodeI objects
322 # get_all_Descendents implemented in the interface
327 Usage : $obj->ancestor($newval)
328 Function: Set the Ancestor
329 Returns : ancestral node
330 Args : newvalue (optional)
337 my $new_ancestor = shift;
339 # we can set ancestor to undef
341 $self->throw("This is [$new_ancestor], not a Bio::Tree::NodeI")
342 unless $new_ancestor->isa('Bio::Tree::NodeI');
345 my $old_ancestor = $self->{'_ancestor'} || '';
346 if (!$old_ancestor ||
347 ($old_ancestor && ( !$new_ancestor ||
348 $new_ancestor ne $old_ancestor)) ) {
349 if( $old_ancestor && ! $old_ancestor->{_removing_descendent
}) {
350 $old_ancestor->remove_Descendent($self);
353 ! $new_ancestor->{_adding_descendent
} ) { # avoid infinite recurse
354 $self->{_setting_ancestor
} = 1;
355 $new_ancestor->add_Descendent($self, 1);
356 $self->{_setting_ancestor
} = 0;
359 weaken
($self->{'_ancestor'} = $new_ancestor);
362 return $self->{'_ancestor'};
367 Title : branch_length
368 Usage : $obj->branch_length()
369 Function: Get/Set the branch length
370 Returns : value of branch_length
371 Args : newvalue (optional)
380 $bl =~ s/\[(\d+)\]// ) {
381 $self->bootstrap($1);
383 $self->{'_branch_length'} = $bl;
384 $self->invalidate_height();
386 return $self->{'_branch_length'};
392 Usage : $obj->bootstrap($newval)
393 Function: Get/Set the bootstrap value
394 Returns : value of bootstrap
395 Args : newvalue (optional)
402 if( $self->has_tag('B') ) {
403 $self->remove_tag('B');
405 $self->add_tag_value('B',shift);
407 return ($self->get_tag_values('B'))[0];
413 Usage : $obj->description($newval)
414 Function: Get/Set the description string
415 Returns : value of description
416 Args : newvalue (optional)
422 $self->{'_description'} = shift @_ if @_;
423 return $self->{'_description'};
429 Usage : $obj->id($newval)
430 Function: The human readable identifier for the node
431 Returns : value of human readable id
432 Args : newvalue (optional)
434 "A name can be any string of printable characters except blanks,
435 colons, semicolons, parentheses, and square brackets. Because you may
436 want to include a blank in a name, it is assumed that an underscore
437 character ("_") stands for a blank; any of these in a name will be
438 converted to a blank when it is read in."
440 from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
442 Also note that these objects now support spaces, ();: because we can
443 automatically quote the strings if they contain these characters. The
444 L<id_output> method does this for you so use the id() method to get
445 the raw string while L<id_output> to get the pre-escaped string.
450 my ($self, $value) = @_;
451 if (defined $value) {
452 #$self->warn("Illegal characters ();: and space in the id [$value], converting to _ ")
453 # if $value =~ /\(\);:/ and $self->verbose >= 0;
454 #$value =~ s/[\(\);:\s]/_/g;
455 $self->{'_id'} = $value;
457 return $self->{'_id'};
460 =head2 Helper Functions
467 Usage : my $id = $node->id_output;
468 Function: Return an id suitable for output in format like newick
469 so that if it contains spaces or ():; characters it is properly
471 Returns : $id string if $node->id has a value
476 # implemented in NodeI interface
481 Usage : my $internalid = $node->internal_id
482 Function: Returns the internal unique id for this Node
483 (a monotonically increasing number for this in-memory implementation
484 but could be a database determined unique id in other
492 return $_[0]->_creation_id;
498 Usage : $obj->_creation_id($newval)
499 Function: a private method signifying the internal creation order
500 Returns : value of _creation_id
501 Args : newvalue (optional)
507 $self->{'_creation_id'} = shift @_ if( @_);
508 return $self->{'_creation_id'} || 0;
511 =head2 Bio::Node::NodeI decorated interface implemented
513 The following methods are implemented by L<Bio::Node::NodeI> decorated
519 Usage : if( $node->is_Leaf )
520 Function: Get Leaf status
528 my $isleaf = ! (defined $self->{'_desc'} &&
529 (keys %{$self->{'_desc'}} > 0) );
536 Usage : my $str = $node->to_string()
537 Function: For debugging, provide a node as a string
544 Usage : my $len = $node->height
545 Function: Returns the height of the tree starting at this
546 node. Height is the maximum branchlength to get to the tip.
547 Returns : The longest length (weighting branches with branch_length) to a leaf
554 return $self->{'_height'} if( defined $self->{'_height'} );
556 return 0 if( $self->is_Leaf );
558 foreach my $subnode ( $self->each_Descendent ) {
559 my $bl = $subnode->branch_length;
560 $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
561 my $s = $subnode->height + $bl;
562 if( $s > $max ) { $max = $s; }
564 return ($self->{'_height'} = $max);
567 =head2 invalidate_height
569 Title : invalidate_height
570 Usage : private helper method
571 Function: Invalidate our cached value of the node height in the tree
577 sub invalidate_height
{
580 $self->{'_height'} = undef;
581 if( defined $self->ancestor ) {
582 $self->ancestor->invalidate_height;
588 Title : set_tag_value
589 Usage : $node->set_tag_value($tag,$value)
590 $node->set_tag_value($tag,@values)
591 Function: Sets a tag value(s) to a node. Replaces old values.
592 Returns : number of values stored for this tag
593 Args : $tag - tag name
594 $value - value to store for the tag
599 my ($self,$tag,@values) = @_;
600 if( ! defined $tag || ! scalar @values ) {
601 $self->warn("cannot call set_tag_value with an undefined value");
603 $self->remove_tag ($tag);
604 map { push @
{$self->{'_tags'}->{$tag}}, $_ } @values;
605 return scalar @
{$self->{'_tags'}->{$tag}};
611 Title : add_tag_value
612 Usage : $node->add_tag_value($tag,$value)
613 Function: Adds a tag value to a node
614 Returns : number of values stored for this tag
615 Args : $tag - tag name
616 $value - value to store for the tag
621 my ($self,$tag,$value) = @_;
622 if( ! defined $tag || ! defined $value ) {
623 $self->warn("cannot call add_tag_value with an undefined value".($tag ?
" ($tag)" : ''));
624 $self->warn($self->stack_trace_dump,"\n");
626 push @
{$self->{'_tags'}->{$tag}}, $value;
627 return scalar @
{$self->{'_tags'}->{$tag}};
633 Usage : $node->remove_tag($tag)
634 Function: Remove the tag and all values for this tag
635 Returns : boolean representing success (0 if tag does not exist)
636 Args : $tag - tagname to remove
642 my ($self,$tag) = @_;
643 if( exists $self->{'_tags'}->{$tag} ) {
644 $self->{'_tags'}->{$tag} = undef;
645 delete $self->{'_tags'}->{$tag};
651 =head2 remove_all_tags
653 Title : remove_all_tags
654 Usage : $node->remove_all_tags()
655 Function: Removes all tags
663 $self->{'_tags'} = {};
670 Usage : my @tags = $node->get_all_tags()
671 Function: Gets all the tag names for this Node
672 Returns : Array of tagnames
679 my @tags = sort keys %{$self->{'_tags'} || {}};
683 =head2 get_tag_values
685 Title : get_tag_values
686 Usage : my @values = $node->get_tag_values($tag)
687 Function: Gets the values for given tag ($tag)
688 Returns : In array context returns an array of values
689 or an empty list if tag does not exist.
690 In scalar context returns the first value or undef.
691 Args : $tag - tag name
696 my ($self,$tag) = @_;
697 return wantarray ? @
{$self->{'_tags'}->{$tag} || []} :
698 (@
{$self->{'_tags'}->{$tag} || []})[0];
704 Usage : $node->has_tag($tag)
705 Function: Boolean test if tag exists in the Node
707 Args : $tag - tagname
712 my ($self,$tag) = @_;
713 return exists $self->{'_tags'}->{$tag};
718 return unless defined $self;
720 #*** below is wrong, cleanup doesn't actually occur. Will replace with:
721 # $self->remove_all_Descendents; once further fixes in place..
722 if( defined $self->{'_desc'} &&
723 ref($self->{'_desc'}) =~ /HASH/i ) {
724 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
725 $node->ancestor(undef); # insure no circular references
729 $self->{'_desc'} = {};
735 Usage : $node->reverse_edge(child);
736 Function: makes child be a parent of node
737 Requires: child must be a direct descendent of node
738 Returns : 1 on success, 0 on failure
739 Args : Bio::Tree::NodeI that is in the tree
744 my ($self,$node) = @_;
745 if( $self->delete_edge($node) ) {
746 $node->add_Descendent($self);