tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tree / Tree.pm
blobdc9cd468347c69216228b289b1f3725c1626aaa0
1 # $Id$
3 # BioPerl module for Bio::Tree::Tree
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::Tree - An Implementation of TreeI interface.
19 =head1 SYNOPSIS
21 # like from a TreeIO
22 my $treeio = Bio::TreeIO->new(-format => 'newick', -file => 'treefile.dnd');
23 my $tree = $treeio->next_tree;
24 my @nodes = $tree->get_nodes;
25 my $root = $tree->get_root_node;
28 =head1 DESCRIPTION
30 This object holds handles to Nodes which make up a tree.
32 =head1 IMPLEMENTATION NOTE
34 This implementation of Bio::Tree::Tree contains Bio::Tree:::NodeI; mainly linked
35 via the root node. As NodeI can potentially contain circular references (as
36 nodes will need to refer to both parent and child nodes), Bio::Tree::Tree will
37 remove those circular references when the object is garbage-collected. This has
38 some side effects; primarily, one must keep the Tree in scope or have at least
39 one reference to it if working with nodes. The fix is to count the references to
40 the nodes and if it is greater than expected retain all of them, but it requires
41 an additional prereq and thus may not be worth the effort. This only shows up
42 in minor edge cases, though (see Bug #2869).
44 Example of issue:
46 # tree is not assigned to a variable, so passes from memory after
47 # root node is passed
48 my $root = Bio::TreeIO->new(-format => 'newick', -file => 'foo.txt')->next_tree
49 ->get_root_node;
51 # gets nothing, as all Node links are broken when Tree is garbage-collected above
52 my @descendents = $root->get_all_Descendents;
54 =head1 FEEDBACK
56 =head2 Mailing Lists
58 User feedback is an integral part of the evolution of this and other
59 Bioperl modules. Send your comments and suggestions preferably to
60 the Bioperl mailing list. Your participation is much appreciated.
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 =head2 Support
67 Please direct usage questions or support issues to the mailing list:
69 I<bioperl-l@bioperl.org>
71 rather than to the module maintainer directly. Many experienced and
72 reponsive experts will be able look at the problem and quickly
73 address it. Please include a thorough description of the problem
74 with code and data examples if at all possible.
76 =head2 Reporting Bugs
78 Report bugs to the Bioperl bug tracking system to help us keep track
79 of the bugs and their resolution. Bug reports can be submitted via
80 the web:
82 http://bugzilla.open-bio.org/
84 =head1 AUTHOR - Jason Stajich
86 Email jason@bioperl.org
88 =head1 CONTRIBUTORS
90 Aaron Mackey amackey@virginia.edu
91 Sendu Bala bix@sendu.me.uk
92 Mark A. Jensen maj@fortinbras.us
94 =head1 APPENDIX
96 The rest of the documentation details each of the object methods.
97 Internal methods are usually preceded with a _
99 =cut
102 # Let the code begin...
105 package Bio::Tree::Tree;
106 use strict;
108 # Object preamble - inherits from Bio::Root::Root
111 use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
113 =head2 new
115 Title : new
116 Usage : my $obj = Bio::Tree::Tree->new();
117 Function: Builds a new Bio::Tree::Tree object
118 Returns : Bio::Tree::Tree
119 Args : -root => L<Bio::Tree::NodeI> object which is the root
121 -node => L<Bio::Tree::NodeI> object from which the root will be
122 determined
124 -nodelete => boolean, whether or not to try and cleanup all
125 the nodes when this this tree goes out
126 of scope.
127 -id => optional tree ID
128 -score => optional tree score value
130 =cut
132 sub new {
133 my($class,@args) = @_;
135 my $self = $class->SUPER::new(@args);
136 $self->{'_rootnode'} = undef;
137 $self->{'_maxbranchlen'} = 0;
138 $self->_register_for_cleanup(\&cleanup_tree);
139 my ($root,$node,$nodel,$id,$score)= $self->_rearrange([qw(ROOT NODE NODELETE
140 ID SCORE)], @args);
142 if ($node && ! $root) {
143 $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI');
144 my @lineage = $self->get_lineage_nodes($node);
145 $root = shift(@lineage) || $node;
147 # to stop us pulling in entire database of a Bio::Taxon when we later do
148 # get_nodes() or similar, specifically set ancestor() for each node
149 if ($node->isa('Bio::Taxon')) {
150 push(@lineage, $node) unless $node eq $root;
151 my $ancestor = $root;
152 foreach my $lineage_node (@lineage) {
153 $lineage_node->ancestor($ancestor);
154 } continue { $ancestor = $lineage_node; }
157 if ($root) {
158 $self->set_root_node($root);
161 $self->nodelete($nodel || 0);
162 $self->id($id) if defined $id;
163 $self->score($score) if defined $score;
164 return $self;
168 =head2 nodelete
170 Title : nodelete
171 Usage : $obj->nodelete($newval)
172 Function: Get/Set Boolean whether or not to delete the underlying
173 nodes when it goes out of scope. By default this is false
174 meaning trees are cleaned up.
175 Returns : boolean
176 Args : on set, new boolean value
178 =cut
180 sub nodelete{
181 my $self = shift;
182 return $self->{'nodelete'} = shift if @_;
183 return $self->{'nodelete'};
186 =head2 get_nodes
188 Title : get_nodes
189 Usage : my @nodes = $tree->get_nodes()
190 Function: Return list of Bio::Tree::NodeI objects
191 Returns : array of Bio::Tree::NodeI objects
192 Args : (named values) hash with one value
193 order => 'b|breadth' first order or 'd|depth' first order
195 =cut
197 sub get_nodes{
198 my ($self, @args) = @_;
200 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args);
201 $order ||= 'depth';
202 $sortby ||= 'none';
203 my $node = $self->get_root_node || return;
204 if ($order =~ m/^b|(breadth)$/oi) {
205 my @children = ($node);
206 for (@children) {
207 push @children, $_->each_Descendent($sortby);
209 return @children;
212 if ($order =~ m/^d|(depth)$/oi) {
213 # this is depth-first search I believe
214 my @children = ($node,$node->get_all_Descendents($sortby));
215 return @children;
219 =head2 get_root_node
221 Title : get_root_node
222 Usage : my $node = $tree->get_root_node();
223 Function: Get the Top Node in the tree, in this implementation
224 Trees only have one top node.
225 Returns : Bio::Tree::NodeI object
226 Args : none
228 =cut
231 sub get_root_node{
232 my ($self) = @_;
233 return $self->{'_rootnode'};
236 =head2 set_root_node
238 Title : set_root_node
239 Usage : $tree->set_root_node($node)
240 Function: Set the Root Node for the Tree
241 Returns : Bio::Tree::NodeI
242 Args : Bio::Tree::NodeI
244 =cut
246 sub set_root_node{
247 my $self = shift;
248 if( @_ ) {
249 my $value = shift;
250 if( defined $value &&
251 ! $value->isa('Bio::Tree::NodeI') ) {
252 $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI");
253 return $self->get_root_node;
255 $self->{'_rootnode'} = $value;
257 return $self->get_root_node;
260 =head2 total_branch_length
262 Title : total_branch_length
263 Usage : my $size = $tree->total_branch_length
264 Function: Returns the sum of the length of all branches
265 Returns : real
266 Args : none
268 =cut
270 sub total_branch_length { shift->subtree_length }
272 =head2 subtree_length
274 Title : subtree_length
275 Usage : my $subtree_size = $tree->subtree_length($internal_node)
276 Function: Returns the sum of the length of all branches in a subtree
277 under the node. Calculates the size of the whole tree
278 without an argument (but only if root node is defined)
279 Returns : real or undef
280 Args : Bio::Tree::NodeI object, defaults to the root node
282 =cut
284 sub subtree_length {
285 my $tree = shift;
286 my $node = shift || $tree->get_root_node;
287 return unless $node;
288 my $sum = 0;
289 for ( $node->get_all_Descendents ) {
290 $sum += $_->branch_length || 0;
292 return $sum;
296 =head2 id
298 Title : id
299 Usage : my $id = $tree->id();
300 Function: An id value for the tree
301 Returns : scalar
302 Args : [optional] new value to set
305 =cut
307 sub id{
308 my ($self,$val) = @_;
309 if( defined $val ) {
310 $self->{'_treeid'} = $val;
312 return $self->{'_treeid'};
315 =head2 score
317 Title : score
318 Usage : $obj->score($newval)
319 Function: Sets the associated score with this tree
320 This is a generic slot which is probably best used
321 for log likelihood or other overall tree score
322 Returns : value of score
323 Args : newvalue (optional)
326 =cut
328 sub score{
329 my ($self,$val) = @_;
330 if( defined $val ) {
331 $self->{'_score'} = $val;
333 return $self->{'_score'};
337 # decorated interface TreeI Implements this
339 =head2 height
341 Title : height
342 Usage : my $height = $tree->height
343 Function: Gets the height of tree - this LOG_2($number_nodes)
344 WARNING: this is only true for strict binary trees. The TreeIO
345 system is capable of building non-binary trees, for which this
346 method will currently return an incorrect value!!
347 Returns : integer
348 Args : none
350 =head2 number_nodes
352 Title : number_nodes
353 Usage : my $size = $tree->number_nodes
354 Function: Returns the number of nodes in the tree
355 Returns : integer
356 Args : none
359 =cut
361 =head2 as_text
363 Title : as_text
364 Usage : my $tree_as_string = $tree->as_text($format)
365 Function: Returns the tree as a string representation in the
366 desired format (currently 'newick', 'nhx', or
367 'tabtree')
368 Returns : scalar string
369 Args : format type as specified by Bio::TreeIO
370 Note : This method loads the Bio::TreeIO::$format module
371 on the fly, and commandeers the _write_tree_Helper
372 routine therein to create the tree string.
374 =cut
376 sub as_text {
377 my $self = shift;
378 my $format = shift;
379 my @parms;
380 my $iomod = "Bio::TreeIO::$format";
381 $self->_load_module($iomod);
382 # following currently not really necessary, but who knows?
383 my $io = $iomod->new(-format=>$format, -file=>File::Spec->devnull());
384 no strict "refs";
385 my $iowtH = *{$iomod."::_write_tree_Helper"}{CODE};
386 use strict "refs";
387 for ($format) {
388 /newick/ && do {
389 @parms = ( $io->bootstrap_style, $io->order_by, 0 );
390 last;
392 /nhx/ && do {
393 @parms = ( 0 );
394 last;
396 /tabtree/ && do {
397 @parms = ( "" );
398 last;
400 # default
401 $self->throw("as_text does not allow format '$format'")
405 # newline_each_node...
406 my $data = [$iowtH->($self->get_root_node, @parms)];
408 if ($format eq 'tabtree') {
409 return $$data[0]."\n";
411 else {
412 return join(",", @$data).";\n";
416 =head2 Methods for associating Tag/Values with a Tree
418 These methods associate tag/value pairs with a Tree
420 =head2 set_tag_value
422 Title : set_tag_value
423 Usage : $tree->set_tag_value($tag,$value)
424 $tree->set_tag_value($tag,@values)
425 Function: Sets a tag value(s) to a tree. Replaces old values.
426 Returns : number of values stored for this tag
427 Args : $tag - tag name
428 $value - value to store for the tag
430 =cut
432 sub set_tag_value{
433 my ($self,$tag,@values) = @_;
434 if( ! defined $tag || ! scalar @values ) {
435 $self->warn("cannot call set_tag_value with an undefined value");
437 $self->remove_tag ($tag);
438 map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
439 return scalar @{$self->{'_tags'}->{$tag}};
442 =head2 add_tag_value
444 Title : add_tag_value
445 Usage : $tree->add_tag_value($tag,$value)
446 Function: Adds a tag value to a tree
447 Returns : number of values stored for this tag
448 Args : $tag - tag name
449 $value - value to store for the tag
451 =cut
453 sub add_tag_value{
454 my ($self,$tag,$value) = @_;
455 if( ! defined $tag || ! defined $value ) {
456 $self->warn("cannot call add_tag_value with an undefined value");
458 push @{$self->{'_tags'}->{$tag}}, $value;
459 return scalar @{$self->{'_tags'}->{$tag}};
462 =head2 remove_tag
464 Title : remove_tag
465 Usage : $tree->remove_tag($tag)
466 Function: Remove the tag and all values for this tag
467 Returns : boolean representing success (0 if tag does not exist)
468 Args : $tag - tagname to remove
471 =cut
473 sub remove_tag {
474 my ($self,$tag) = @_;
475 if( exists $self->{'_tags'}->{$tag} ) {
476 $self->{'_tags'}->{$tag} = undef;
477 delete $self->{'_tags'}->{$tag};
478 return 1;
480 return 0;
483 =head2 remove_all_tags
485 Title : remove_all_tags
486 Usage : $tree->remove_all_tags()
487 Function: Removes all tags
488 Returns : None
489 Args : None
491 =cut
493 sub remove_all_tags{
494 my ($self) = @_;
495 $self->{'_tags'} = {};
496 return;
499 =head2 get_all_tags
501 Title : get_all_tags
502 Usage : my @tags = $tree->get_all_tags()
503 Function: Gets all the tag names for this Tree
504 Returns : Array of tagnames
505 Args : None
507 =cut
509 sub get_all_tags{
510 my ($self) = @_;
511 my @tags = sort keys %{$self->{'_tags'} || {}};
512 return @tags;
515 =head2 get_tag_values
517 Title : get_tag_values
518 Usage : my @values = $tree->get_tag_values($tag)
519 Function: Gets the values for given tag ($tag)
520 Returns : Array of values or empty list if tag does not exist
521 Args : $tag - tag name
523 =cut
525 sub get_tag_values{
526 my ($self,$tag) = @_;
527 return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
528 (@{$self->{'_tags'}->{$tag} || []})[0];
531 =head2 has_tag
533 Title : has_tag
534 Usage : $tree->has_tag($tag)
535 Function: Boolean test if tag exists in the Tree
536 Returns : Boolean
537 Args : $tag - tagname
539 =cut
541 sub has_tag {
542 my ($self,$tag) = @_;
543 return exists $self->{'_tags'}->{$tag};
546 # -- private internal methods --
548 sub cleanup_tree {
549 my $self = shift;
550 unless( $self->nodelete ) {
551 for my $node ($self->get_nodes(-order => 'b', -sortby => 'none')) {
552 #$node->ancestor(undef);
553 #$node = undef;
554 $node->node_cleanup;
555 undef $node;
558 $self->{'_rootnode'} = undef;