hash ref, not array ref
[bioperl-live.git] / Bio / Tree / Node.pm
blob9d3fd1348590cd144f0bd7e8467ef2a62565b258
1 # $Id$
3 # BioPerl module for Bio::Tree::Node
5 # Cared for by Jason Stajich <jason-at-bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tree::Node - A Simple Tree Node
17 =head1 SYNOPSIS
19 use Bio::Tree::Node;
20 my $nodeA = Bio::Tree::Node->new();
21 my $nodeL = Bio::Tree::Node->new();
22 my $nodeR = Bio::Tree::Node->new();
24 my $node = Bio::Tree::Node->new();
25 $node->add_Descendent($nodeL);
26 $node->add_Descendent($nodeR);
28 print "node is not a leaf \n" if( $node->is_leaf);
30 =head1 DESCRIPTION
32 Makes a Tree Node suitable for building a Tree.
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 =head2 Reporting Bugs
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 of the bugs and their resolution. Bug reports can be submitted via
49 the web:
51 http://bugzilla.open-bio.org/
53 =head1 AUTHOR - Jason Stajich
55 Email jason-at-bioperl-dot-org
57 =head1 CONTRIBUTORS
59 Aaron Mackey, amackey-at-virginia-dot-edu
60 Sendu Bala, bix@sendu.me.uk
62 =head1 APPENDIX
64 The rest of the documentation details each of the object methods.
65 Internal methods are usually preceded with a _
67 =cut
70 # Let the code begin...
72 package Bio::Tree::Node;
73 use vars qw($CREATIONORDER);
74 use strict;
76 use Scalar::Util qw(weaken isweak);
78 use base qw(Bio::Root::Root Bio::Tree::NodeI);
80 BEGIN {
81 $CREATIONORDER = 1;
84 =head2 new
86 Title : new
87 Usage : my $obj = Bio::Tree::Node->new();
88 Function: Builds a new Bio::Tree::Node object
89 Returns : Bio::Tree::Node
90 Args : -descendents => arrayref of descendents (they will be
91 updated s.t. their ancestor point is this
92 node)
93 -branch_length => branch length [integer] (optional)
94 -bootstrap => value bootstrap value (string)
95 -description => description of node
96 -id => human readable id for node
98 =cut
100 sub new {
101 my($class,@args) = @_;
103 my $self = $class->SUPER::new(@args);
104 my ($children, $branchlen,$id,
105 $bootstrap, $desc,$d) = $self->_rearrange([qw(
106 DESCENDENTS
107 BRANCH_LENGTH
109 BOOTSTRAP
110 DESC
111 DESCRIPTION
113 @args);
114 $self->_register_for_cleanup(\&node_cleanup);
115 $self->{'_desc'} = {}; # for descendents
116 if( defined $d && defined $desc ) {
117 $self->warn("can only accept -desc or -description, not both, accepting -description");
118 $desc = $d;
119 } elsif( defined $d && ! defined $desc ) {
120 $desc = $d;
122 defined $desc && $self->description($desc);
123 defined $bootstrap && $self->bootstrap($bootstrap);
124 defined $id && $self->id($id);
125 defined $branchlen && $self->branch_length($branchlen);
126 if( defined $children ) {
127 if( ref($children) !~ /ARRAY/i ) {
128 $self->warn("Must specify a valid ARRAY reference to initialize a Node's Descendents");
130 foreach my $c ( @$children ) {
131 $self->add_Descendent($c);
134 $self->_creation_id($CREATIONORDER++);
135 return $self;
138 =head2 add_Descendent
140 Title : add_Descendent
141 Usage : $node->add_Descendent($node);
142 Function: Adds a descendent to a node
143 Returns : number of current descendents for this node
144 Args : Bio::Node::NodeI
145 boolean flag, true if you want to ignore the fact that you are
146 adding a second node with the same unique id (typically memory
147 location reference in this implementation). default is false and
148 will throw an error if you try and overwrite an existing node.
150 =cut
152 sub add_Descendent{
153 my ($self,$node,$ignoreoverwrite) = @_;
154 return -1 if( ! defined $node );
156 if( ! ref($node) ||
157 ref($node) =~ /HASH/ ||
158 ! $node->isa('Bio::Tree::NodeI') ) {
159 $self->throw("Trying to add a Descendent who is not a Bio::Tree::NodeI");
160 return -1;
163 $self->{_adding_descendent} = 1;
164 # avoid infinite recurse
165 $node->ancestor($self) unless $node->{_setting_ancestor};
166 $self->{_adding_descendent} = 0;
168 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
169 $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
171 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
173 $self->invalidate_height();
175 return scalar keys %{$self->{'_desc'}};
178 =head2 each_Descendent
180 Title : each_Descendent($sortby)
181 Usage : my @nodes = $node->each_Descendent;
182 Function: all the descendents for this Node (but not their descendents
183 i.e. not a recursive fetchall)
184 Returns : Array of Bio::Tree::NodeI objects
185 Args : $sortby [optional] "height", "creation", "alpha", "revalpha",
186 or coderef to be used to sort the order of children nodes.
188 =cut
190 sub each_Descendent{
191 my ($self, $sortby) = @_;
193 # order can be based on branch length (and sub branchlength)
194 $sortby ||= 'none';
195 if (ref $sortby eq 'CODE') {
196 my @values = sort { $sortby->($a,$b) } values %{$self->{'_desc'}};
197 return @values;
198 } elsif ($sortby eq 'height') {
199 return map { $_->[0] }
200 sort { $a->[1] <=> $b->[1] ||
201 $a->[2] <=> $b->[2] }
202 map { [$_, $_->height, $_->internal_id ] }
203 values %{$self->{'_desc'}};
204 } elsif( $sortby eq 'alpha' ) {
205 my @set;
206 for my $v ( values %{$self->{'_desc'}} ) {
207 unless( $v->is_Leaf ) {
208 my @lst = ( sort { $a cmp $b } map { $_->id }
209 grep { $_->is_Leaf }
210 $v->get_all_Descendents($sortby));
211 push @set, [$v, $lst[0], $v->internal_id];
212 } else {
213 push @set, [$v, $v->id, $v->internal_id];
216 return map { $_->[0] }
217 sort {$a->[1] cmp $b->[1] || $a->[2] <=> $b->[2] } @set;
218 } elsif( $sortby eq 'revalpha' ) {
219 my @set;
220 for my $v ( values %{$self->{'_desc'}} ) {
221 if( ! defined $v->id &&
222 ! $v->is_Leaf ) {
223 my ($l) = ( sort { $b cmp $a } map { $_->id }
224 grep { $_->is_Leaf }
225 $v->get_all_Descendents($sortby));
226 push @set, [$v, $l, $v->internal_id];
227 } else {
228 push @set, [$v, $v->id, $v->internal_id];
231 return map { $_->[0] }
232 sort {$b->[1] cmp $a->[1] || $b->[2] <=> $a->[2] } @set;
233 } else { # creation
234 return map { $_->[0] }
235 sort { $a->[1] <=> $b->[1] }
236 map { [$_, $_->internal_id ] }
237 values %{$self->{'_desc'}};
241 =head2 remove_Descendent
243 Title : remove_Descendent
244 Usage : $node->remove_Descedent($node_foo);
245 Function: Removes a specific node from being a Descendent of this node
246 Returns : nothing
247 Args : An array of Bio::Node::NodeI objects which have been previously
248 passed to the add_Descendent call of this object.
250 =cut
252 sub remove_Descendent{
253 my ($self,@nodes) = @_;
254 my $c= 0;
255 foreach my $n ( @nodes ) {
256 if( $self->{'_desc'}->{$n->internal_id} ) {
257 $self->{_removing_descendent} = 1;
258 $n->ancestor(undef);
259 $self->{_removing_descendent} = 0;
260 # should be redundant
261 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
262 delete $self->{'_desc'}->{$n->internal_id};
263 $c++;
264 } else {
265 if( $self->verbose ) {
266 $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
267 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
272 # remove unecessary nodes if we have removed the part
273 # which branches.
274 my $a1 = $self->ancestor;
275 if( $a1 ) {
276 my $bl = $self->branch_length || 0;
277 my @d = $self->each_Descendent;
278 if (scalar @d == 1) {
279 $d[0]->branch_length($bl + ($d[0]->branch_length || 0));
280 $a1->add_Descendent($d[0]);
281 $a1->remove_Descendent($self);
287 =head2 remove_all_Descendents
289 Title : remove_all_Descendents
290 Usage : $node->remove_All_Descendents()
291 Function: Cleanup the node's reference to descendents and reset
292 their ancestor pointers to undef, if you don't have a reference
293 to these objects after this call they will be cleaned up - so
294 a get_nodes from the Tree object would be a safe thing to do first
295 Returns : nothing
296 Args : none
298 =cut
300 sub remove_all_Descendents{
301 my ($self) = @_;
302 # this won't cleanup the nodes themselves if you also have
303 # a copy/pointer of them (I think)...
304 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
305 $val->ancestor(undef);
307 $self->{'_desc'} = {};
311 =head2 get_all_Descendents
313 Title : get_all_Descendents
314 Usage : my @nodes = $node->get_all_Descendents;
315 Function: Recursively fetch all the nodes and their descendents
316 *NOTE* This is different from each_Descendent
317 Returns : Array or Bio::Tree::NodeI objects
318 Args : none
320 =cut
322 # get_all_Descendents implemented in the interface
324 =head2 ancestor
326 Title : ancestor
327 Usage : $obj->ancestor($newval)
328 Function: Set the Ancestor
329 Returns : ancestral node
330 Args : newvalue (optional)
332 =cut
334 sub ancestor {
335 my $self = shift;
336 if (@_) {
337 my $new_ancestor = shift;
339 # we can set ancestor to undef
340 if ($new_ancestor) {
341 $self->throw("This is [$new_ancestor], not a Bio::Tree::NodeI")
342 unless $new_ancestor->isa('Bio::Tree::NodeI');
345 my $old_ancestor = $self->{'_ancestor'} || '';
346 if (!$old_ancestor ||
347 ($old_ancestor && ( !$new_ancestor ||
348 $new_ancestor ne $old_ancestor)) ) {
349 if( $old_ancestor && ! $old_ancestor->{_removing_descendent}) {
350 $old_ancestor->remove_Descendent($self);
352 if ($new_ancestor &&
353 ! $new_ancestor->{_adding_descendent} ) { # avoid infinite recurse
354 $self->{_setting_ancestor} = 1;
355 $new_ancestor->add_Descendent($self, 1);
356 $self->{_setting_ancestor} = 0;
359 weaken($self->{'_ancestor'} = $new_ancestor);
362 return $self->{'_ancestor'};
365 =head2 branch_length
367 Title : branch_length
368 Usage : $obj->branch_length()
369 Function: Get/Set the branch length
370 Returns : value of branch_length
371 Args : newvalue (optional)
373 =cut
375 sub branch_length{
376 my $self = shift;
377 if( @_ ) {
378 my $bl = shift;
379 if( defined $bl &&
380 $bl =~ s/\[(\d+)\]// ) {
381 $self->bootstrap($1);
383 $self->{'_branch_length'} = $bl;
384 $self->invalidate_height();
386 return $self->{'_branch_length'};
389 =head2 bootstrap
391 Title : bootstrap
392 Usage : $obj->bootstrap($newval)
393 Function: Get/Set the bootstrap value
394 Returns : value of bootstrap
395 Args : newvalue (optional)
397 =cut
399 sub bootstrap {
400 my $self = shift;
401 if( @_ ) {
402 if( $self->has_tag('B') ) {
403 $self->remove_tag('B');
405 $self->add_tag_value('B',shift);
407 return ($self->get_tag_values('B'))[0];
410 =head2 description
412 Title : description
413 Usage : $obj->description($newval)
414 Function: Get/Set the description string
415 Returns : value of description
416 Args : newvalue (optional)
418 =cut
420 sub description {
421 my $self = shift;
422 $self->{'_description'} = shift @_ if @_;
423 return $self->{'_description'};
426 =head2 id
428 Title : id
429 Usage : $obj->id($newval)
430 Function: The human readable identifier for the node
431 Returns : value of human readable id
432 Args : newvalue (optional)
434 "A name can be any string of printable characters except blanks,
435 colons, semicolons, parentheses, and square brackets. Because you may
436 want to include a blank in a name, it is assumed that an underscore
437 character ("_") stands for a blank; any of these in a name will be
438 converted to a blank when it is read in."
440 from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
442 Also note that these objects now support spaces, ();: because we can
443 automatically quote the strings if they contain these characters. The
444 L<id_output> method does this for you so use the id() method to get
445 the raw string while L<id_output> to get the pre-escaped string.
447 =cut
449 sub id {
450 my ($self, $value) = @_;
451 if (defined $value) {
452 #$self->warn("Illegal characters ();: and space in the id [$value], converting to _ ")
453 # if $value =~ /\(\);:/ and $self->verbose >= 0;
454 #$value =~ s/[\(\);:\s]/_/g;
455 $self->{'_id'} = $value;
457 return $self->{'_id'};
460 =head2 Helper Functions
462 =cut
464 =head2 id_output
466 Title : id_output
467 Usage : my $id = $node->id_output;
468 Function: Return an id suitable for output in format like newick
469 so that if it contains spaces or ():; characters it is properly
470 quoted
471 Returns : $id string if $node->id has a value
472 Args : none
474 =cut
476 # implemented in NodeI interface
478 =head2 internal_id
480 Title : internal_id
481 Usage : my $internalid = $node->internal_id
482 Function: Returns the internal unique id for this Node
483 (a monotonically increasing number for this in-memory implementation
484 but could be a database determined unique id in other
485 implementations)
486 Returns : unique id
487 Args : none
489 =cut
491 sub internal_id {
492 return $_[0]->_creation_id;
495 =head2 _creation_id
497 Title : _creation_id
498 Usage : $obj->_creation_id($newval)
499 Function: a private method signifying the internal creation order
500 Returns : value of _creation_id
501 Args : newvalue (optional)
503 =cut
505 sub _creation_id {
506 my $self = shift @_;
507 $self->{'_creation_id'} = shift @_ if( @_);
508 return $self->{'_creation_id'} || 0;
511 =head2 Bio::Node::NodeI decorated interface implemented
513 The following methods are implemented by L<Bio::Node::NodeI> decorated
514 interface.
516 =head2 is_Leaf
518 Title : is_Leaf
519 Usage : if( $node->is_Leaf )
520 Function: Get Leaf status
521 Returns : boolean
522 Args : none
524 =cut
526 sub is_Leaf {
527 my ($self) = @_;
528 my $isleaf = ! (defined $self->{'_desc'} &&
529 (keys %{$self->{'_desc'}} > 0) );
530 return $isleaf;
533 =head2 to_string
535 Title : to_string
536 Usage : my $str = $node->to_string()
537 Function: For debugging, provide a node as a string
538 Returns : string
539 Args : none
541 =head2 height
543 Title : height
544 Usage : my $len = $node->height
545 Function: Returns the height of the tree starting at this
546 node. Height is the maximum branchlength to get to the tip.
547 Returns : The longest length (weighting branches with branch_length) to a leaf
548 Args : none
550 =cut
552 sub height {
553 my ($self) = @_;
554 return $self->{'_height'} if( defined $self->{'_height'} );
556 return 0 if( $self->is_Leaf );
557 my $max = 0;
558 foreach my $subnode ( $self->each_Descendent ) {
559 my $bl = $subnode->branch_length;
560 $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
561 my $s = $subnode->height + $bl;
562 if( $s > $max ) { $max = $s; }
564 return ($self->{'_height'} = $max);
567 =head2 invalidate_height
569 Title : invalidate_height
570 Usage : private helper method
571 Function: Invalidate our cached value of the node height in the tree
572 Returns : nothing
573 Args : none
575 =cut
577 sub invalidate_height {
578 my ($self) = @_;
580 $self->{'_height'} = undef;
581 if( defined $self->ancestor ) {
582 $self->ancestor->invalidate_height;
586 =head2 set_tag_value
588 Title : set_tag_value
589 Usage : $node->set_tag_value($tag,$value)
590 $node->set_tag_value($tag,@values)
591 Function: Sets a tag value(s) to a node. Replaces old values.
592 Returns : number of values stored for this tag
593 Args : $tag - tag name
594 $value - value to store for the tag
596 =cut
598 sub set_tag_value{
599 my ($self,$tag,@values) = @_;
600 if( ! defined $tag || ! scalar @values ) {
601 $self->warn("cannot call set_tag_value with an undefined value");
603 $self->remove_tag ($tag);
604 map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
605 return scalar @{$self->{'_tags'}->{$tag}};
609 =head2 add_tag_value
611 Title : add_tag_value
612 Usage : $node->add_tag_value($tag,$value)
613 Function: Adds a tag value to a node
614 Returns : number of values stored for this tag
615 Args : $tag - tag name
616 $value - value to store for the tag
618 =cut
620 sub add_tag_value{
621 my ($self,$tag,$value) = @_;
622 if( ! defined $tag || ! defined $value ) {
623 $self->warn("cannot call add_tag_value with an undefined value".($tag ? " ($tag)" : ''));
624 $self->warn($self->stack_trace_dump,"\n");
626 push @{$self->{'_tags'}->{$tag}}, $value;
627 return scalar @{$self->{'_tags'}->{$tag}};
630 =head2 remove_tag
632 Title : remove_tag
633 Usage : $node->remove_tag($tag)
634 Function: Remove the tag and all values for this tag
635 Returns : boolean representing success (0 if tag does not exist)
636 Args : $tag - tagname to remove
639 =cut
641 sub remove_tag {
642 my ($self,$tag) = @_;
643 if( exists $self->{'_tags'}->{$tag} ) {
644 $self->{'_tags'}->{$tag} = undef;
645 delete $self->{'_tags'}->{$tag};
646 return 1;
648 return 0;
651 =head2 remove_all_tags
653 Title : remove_all_tags
654 Usage : $node->remove_all_tags()
655 Function: Removes all tags
656 Returns : None
657 Args : None
659 =cut
661 sub remove_all_tags{
662 my ($self) = @_;
663 $self->{'_tags'} = {};
664 return;
667 =head2 get_all_tags
669 Title : get_all_tags
670 Usage : my @tags = $node->get_all_tags()
671 Function: Gets all the tag names for this Node
672 Returns : Array of tagnames
673 Args : None
675 =cut
677 sub get_all_tags{
678 my ($self) = @_;
679 my @tags = sort keys %{$self->{'_tags'} || {}};
680 return @tags;
683 =head2 get_tag_values
685 Title : get_tag_values
686 Usage : my @values = $node->get_tag_values($tag)
687 Function: Gets the values for given tag ($tag)
688 Returns : In array context returns an array of values
689 or an empty list if tag does not exist.
690 In scalar context returns the first value or undef.
691 Args : $tag - tag name
693 =cut
695 sub get_tag_values{
696 my ($self,$tag) = @_;
697 return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
698 (@{$self->{'_tags'}->{$tag} || []})[0];
701 =head2 has_tag
703 Title : has_tag
704 Usage : $node->has_tag($tag)
705 Function: Boolean test if tag exists in the Node
706 Returns : Boolean
707 Args : $tag - tagname
709 =cut
711 sub has_tag {
712 my ($self,$tag) = @_;
713 return exists $self->{'_tags'}->{$tag};
716 sub node_cleanup {
717 my $self = shift;
718 return unless defined $self;
720 #*** below is wrong, cleanup doesn't actually occur. Will replace with:
721 # $self->remove_all_Descendents; once further fixes in place..
722 if( defined $self->{'_desc'} &&
723 ref($self->{'_desc'}) =~ /HASH/i ) {
724 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
725 $node->ancestor(undef); # insure no circular references
726 $node = undef;
729 $self->{'_desc'} = {};
732 =head2 reverse_edge
734 Title : reverse_edge
735 Usage : $node->reverse_edge(child);
736 Function: makes child be a parent of node
737 Requires: child must be a direct descendent of node
738 Returns : 1 on success, 0 on failure
739 Args : Bio::Tree::NodeI that is in the tree
741 =cut
743 sub reverse_edge {
744 my ($self,$node) = @_;
745 if( $self->delete_edge($node) ) {
746 $node->add_Descendent($self);
747 return 1;
749 return 0;