nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / Tree / Tree.pm
blobab733b5fe1f8b484fff1c3fdeefe7347acdf15f8
2 # BioPerl module for Bio::Tree::Tree
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::Tree - An implementation of the TreeI interface.
19 =head1 SYNOPSIS
21 use Bio::TreeIO;
23 # like from a TreeIO
24 my $treeio = Bio::TreeIO->new(-format => 'newick', -file => 'treefile.dnd');
25 my $tree = $treeio->next_tree;
26 my @nodes = $tree->get_nodes;
27 my $root = $tree->get_root_node;
29 =head1 DESCRIPTION
31 This object holds handles to Nodes which make up a tree.
33 =head1 IMPLEMENTATION NOTE
35 This implementation of Bio::Tree::Tree contains Bio::Tree:::NodeI; mainly linked
36 via the root node. As NodeI can potentially contain circular references (as
37 nodes will need to refer to both parent and child nodes), Bio::Tree::Tree will
38 remove those circular references when the object is garbage-collected. This has
39 some side effects; primarily, one must keep the Tree in scope or have at least
40 one reference to it if working with nodes. The fix is to count the references to
41 the nodes and if it is greater than expected retain all of them, but it requires
42 an additional prereq and thus may not be worth the effort. This only shows up
43 in minor edge cases, though (see Bug #2869).
45 Example of issue:
47 # tree is not assigned to a variable, so passes from memory after
48 # root node is passed
49 my $root = Bio::TreeIO->new(-format => 'newick', -file => 'foo.txt')->next_tree
50 ->get_root_node;
52 # gets nothing, as all Node links are broken when Tree is garbage-collected above
53 my @descendents = $root->get_all_Descendents;
55 =head1 FEEDBACK
57 =head2 Mailing Lists
59 User feedback is an integral part of the evolution of this and other
60 Bioperl modules. Send your comments and suggestions preferably to
61 the Bioperl mailing list. Your participation is much appreciated.
63 bioperl-l@bioperl.org - General discussion
64 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66 =head2 Support
68 Please direct usage questions or support issues to the mailing list:
70 I<bioperl-l@bioperl.org>
72 rather than to the module maintainer directly. Many experienced and
73 reponsive experts will be able look at the problem and quickly
74 address it. Please include a thorough description of the problem
75 with code and data examples if at all possible.
77 =head2 Reporting Bugs
79 Report bugs to the Bioperl bug tracking system to help us keep track
80 of the bugs and their resolution. Bug reports can be submitted via
81 the web:
83 https://github.com/bioperl/bioperl-live/issues
85 =head1 AUTHOR - Jason Stajich
87 Email jason@bioperl.org
89 =head1 CONTRIBUTORS
91 Aaron Mackey amackey@virginia.edu
92 Sendu Bala bix@sendu.me.uk
93 Mark A. Jensen maj@fortinbras.us
95 =head1 APPENDIX
97 The rest of the documentation details each of the object methods.
98 Internal methods are usually preceded with a _
100 =cut
103 # Let the code begin...
106 package Bio::Tree::Tree;
107 use strict;
109 # Object preamble - inherits from Bio::Root::Root
112 use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
114 =head2 new
116 Title : new
117 Usage : my $obj = Bio::Tree::Tree->new();
118 Function: Builds a new Bio::Tree::Tree object
119 Returns : Bio::Tree::Tree
120 Args : -root => L<Bio::Tree::NodeI> object which is the root
122 -node => L<Bio::Tree::NodeI> object from which the root will be
123 determined
125 -nodelete => boolean, whether or not to try and cleanup all
126 the nodes when this this tree goes out of scope.
127 -id => optional tree ID
128 -score => optional tree score value
130 =cut
132 sub new {
133 my ($class, @args) = @_;
135 my $self = $class->SUPER::new(@args);
136 $self->{'_rootnode'} = undef;
137 $self->{'_maxbranchlen'} = 0;
138 $self->_register_for_cleanup(\&cleanup_tree);
139 my ($root, $node, $nodel, $id, $score) =
140 $self->_rearrange([qw(ROOT NODE NODELETE ID SCORE)], @args);
142 if ($node && ! $root) {
143 $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI');
144 my @lineage = $self->get_lineage_nodes($node);
145 $root = shift(@lineage) || $node;
147 # to stop us pulling in entire database of a Bio::Taxon when we later do
148 # get_nodes() or similar, specifically set ancestor() for each node
149 if ($node->isa('Bio::Taxon')) {
150 push(@lineage, $node) unless $node eq $root;
151 my $ancestor = $root;
152 foreach my $lineage_node (@lineage) {
153 $lineage_node->ancestor($ancestor);
154 } continue { $ancestor = $lineage_node; }
157 if ($root) {
158 $self->set_root_node($root);
161 $self->nodelete($nodel || 0);
162 $self->id($id) if defined $id;
163 $self->score($score) if defined $score;
164 return $self;
168 =head2 nodelete
170 Title : nodelete
171 Usage : $obj->nodelete($newval)
172 Function: Get/Set Boolean whether or not to delete the underlying
173 nodes when it goes out of scope. By default this is false
174 meaning trees are cleaned up.
175 Returns : boolean
176 Args : on set, new boolean value
178 =cut
180 sub nodelete {
181 my $self = shift;
182 return $self->{'nodelete'} = shift if @_;
183 return $self->{'nodelete'};
187 =head2 get_nodes
189 Title : get_nodes
190 Usage : my @nodes = $tree->get_nodes()
191 Function: Return list of Bio::Tree::NodeI objects
192 Returns : array of Bio::Tree::NodeI objects
193 Args : (named values) hash with one value
194 order => 'b|breadth' first order or 'd|depth' first order
195 sortby => [optional] "height", "creation", "alpha", "revalpha",
196 or coderef to be used to sort the order of children nodes. See L<Bio::Tree::Node> for details
198 =cut
200 sub get_nodes {
201 my ($self, @args) = @_;
202 my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)], @args);
203 $order ||= 'depth';
204 $sortby ||= 'none';
206 my @children;
207 my $node = $self->get_root_node;
208 if ($node) {
209 if ($order =~ m/^b/oi) { # breadth-first
210 @children = ($node);
211 my @to_process = ($node);
212 while( @to_process ) {
213 my $n = shift @to_process;
214 my @c = $n->each_Descendent($sortby);
215 push @children, @c;
216 push @to_process, @c;
218 } elsif ($order =~ m/^d/oi) { # depth-first
219 @children = ($node, $node->get_all_Descendents($sortby));
220 } else {
221 $self->verbose(1);
222 $self->warn("specified an order '$order' which I don't understan\n");
226 return @children;
230 =head2 get_root_node
232 Title : get_root_node
233 Usage : my $node = $tree->get_root_node();
234 Function: Get the Top Node in the tree, in this implementation
235 Trees only have one top node.
236 Returns : Bio::Tree::NodeI object
237 Args : none
239 =cut
241 sub get_root_node {
242 my ($self) = @_;
243 return $self->{'_rootnode'};
247 =head2 set_root_node
249 Title : set_root_node
250 Usage : $tree->set_root_node($node)
251 Function: Set the Root Node for the Tree
252 Returns : Bio::Tree::NodeI
253 Args : Bio::Tree::NodeI
255 =cut
257 sub set_root_node {
258 my $self = shift;
259 if ( @_ ) {
260 my $value = shift;
261 if ( defined $value && ! $value->isa('Bio::Tree::NodeI') ) {
262 $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI");
263 return $self->get_root_node;
265 $self->{'_rootnode'} = $value;
267 return $self->get_root_node;
271 =head2 total_branch_length
273 Title : total_branch_length
274 Usage : my $size = $tree->total_branch_length
275 Function: Returns the sum of the length of all branches
276 Returns : real
277 Args : none
279 =cut
281 sub total_branch_length { shift->subtree_length }
284 =head2 subtree_length
286 Title : subtree_length
287 Usage : my $subtree_size = $tree->subtree_length($internal_node)
288 Function: Returns the sum of the length of all branches in a subtree
289 under the node. Calculates the size of the whole tree
290 without an argument (but only if root node is defined)
291 Returns : real or undef
292 Args : Bio::Tree::NodeI object, defaults to the root node
294 =cut
296 sub subtree_length {
297 my $tree = shift;
298 my $node = shift || $tree->get_root_node;
299 return unless $node;
300 my $sum = 0;
301 for ( $node->get_all_Descendents ) {
302 $sum += $_->branch_length || 0;
304 return $sum;
308 =head2 id
310 Title : id
311 Usage : my $id = $tree->id();
312 Function: An id value for the tree
313 Returns : scalar
314 Args : [optional] new value to set
316 =cut
318 sub id {
319 my ($self, $val) = @_;
320 if ( defined $val ) {
321 $self->{'_treeid'} = $val;
323 return $self->{'_treeid'};
327 =head2 score
329 Title : score
330 Usage : $obj->score($newval)
331 Function: Sets the associated score with this tree
332 This is a generic slot which is probably best used
333 for log likelihood or other overall tree score
334 Returns : value of score
335 Args : newvalue (optional)
337 =cut
339 sub score {
340 my ($self, $val) = @_;
341 if ( defined $val ) {
342 $self->{'_score'} = $val;
344 return $self->{'_score'};
348 # decorated interface TreeI Implements this
350 =head2 height
352 Title : height
353 Usage : my $height = $tree->height
354 Function: Gets the height of tree - this LOG_2($number_nodes)
355 WARNING: this is only true for strict binary trees. The TreeIO
356 system is capable of building non-binary trees, for which this
357 method will currently return an incorrect value!!
358 Returns : integer
359 Args : none
361 =head2 number_nodes
363 Title : number_nodes
364 Usage : my $size = $tree->number_nodes
365 Function: Returns the number of nodes in the tree
366 Returns : integer
367 Args : none
369 =head2 as_text
371 Title : as_text
372 Usage : my $tree_as_string = $tree->as_text($format)
373 Function: Returns the tree as a string representation in the
374 desired format, e.g.: 'newick', 'nhx' or 'tabtree' (the default)
375 Returns : scalar string
376 Args : format type as specified by Bio::TreeIO
377 Note : This method loads the Bio::TreeIO::$format module
378 on the fly, and commandeers the _write_tree_Helper
379 routine therein to create the tree string.
381 =cut
383 sub as_text {
384 my $self = shift;
385 my $format = shift || 'tabtree';
386 my $params_input = shift || {};
388 my $iomod = "Bio::TreeIO::$format";
389 $self->_load_module($iomod);
391 my $string = '';
392 open my $fh, '>', \$string or $self->throw("Could not write '$string' as file: $!");
393 my $test = $iomod->new( -format => $format, -fh => $fh );
395 # Get the default params for the given IO module.
396 $test->set_params($params_input);
398 $test->write_tree($self);
399 close $fh;
400 return $string;
404 =head2 Methods for associating Tag/Values with a Tree
406 These methods associate tag/value pairs with a Tree
408 =head2 set_tag_value
410 Title : set_tag_value
411 Usage : $tree->set_tag_value($tag,$value)
412 $tree->set_tag_value($tag,@values)
413 Function: Sets a tag value(s) to a tree. Replaces old values.
414 Returns : number of values stored for this tag
415 Args : $tag - tag name
416 $value - value to store for the tag
418 =cut
420 sub set_tag_value {
421 my ($self, $tag, @values) = @_;
422 if ( ! defined $tag || ! scalar @values ) {
423 $self->warn("cannot call set_tag_value with an undefined value");
425 $self->remove_tag ($tag);
426 map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
427 return scalar @{$self->{'_tags'}->{$tag}};
431 =head2 add_tag_value
433 Title : add_tag_value
434 Usage : $tree->add_tag_value($tag,$value)
435 Function: Adds a tag value to a tree
436 Returns : number of values stored for this tag
437 Args : $tag - tag name
438 $value - value to store for the tag
440 =cut
442 sub add_tag_value {
443 my ($self, $tag, $value) = @_;
444 if ( ! defined $tag || ! defined $value ) {
445 $self->warn("cannot call add_tag_value with an undefined value");
447 push @{$self->{'_tags'}->{$tag}}, $value;
448 return scalar @{$self->{'_tags'}->{$tag}};
452 =head2 remove_tag
454 Title : remove_tag
455 Usage : $tree->remove_tag($tag)
456 Function: Remove the tag and all values for this tag
457 Returns : boolean representing success (0 if tag does not exist)
458 Args : $tag - tagname to remove
460 =cut
462 sub remove_tag {
463 my ($self, $tag) = @_;
464 if ( exists $self->{'_tags'}->{$tag} ) {
465 $self->{'_tags'}->{$tag} = undef;
466 delete $self->{'_tags'}->{$tag};
467 return 1;
469 return 0;
473 =head2 remove_all_tags
475 Title : remove_all_tags
476 Usage : $tree->remove_all_tags()
477 Function: Removes all tags
478 Returns : None
479 Args : None
481 =cut
483 sub remove_all_tags {
484 my ($self) = @_;
485 $self->{'_tags'} = {};
486 return;
490 =head2 get_all_tags
492 Title : get_all_tags
493 Usage : my @tags = $tree->get_all_tags()
494 Function: Gets all the tag names for this Tree
495 Returns : Array of tagnames
496 Args : None
498 =cut
500 sub get_all_tags {
501 my ($self) = @_;
502 my @tags = sort keys %{$self->{'_tags'} || {}};
503 return @tags;
507 =head2 get_tag_values
509 Title : get_tag_values
510 Usage : my @values = $tree->get_tag_values($tag)
511 Function: Gets the values for given tag ($tag)
512 Returns : Array of values or empty list if tag does not exist
513 Args : $tag - tag name
515 =cut
517 sub get_tag_values {
518 my ($self, $tag) = @_;
519 return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
520 (@{$self->{'_tags'}->{$tag} || []})[0];
524 =head2 has_tag
526 Title : has_tag
527 Usage : $tree->has_tag($tag)
528 Function: Boolean test if tag exists in the Tree
529 Returns : Boolean
530 Args : $tag - tagname
532 =cut
534 sub has_tag {
535 my ($self, $tag) = @_;
536 return exists $self->{'_tags'}->{$tag};
540 # safe tree clone that doesn't seg fault
542 =head2 clone
544 Title : clone
545 Alias : _clone
546 Usage : $tree_copy = $tree->clone();
547 $subtree_copy = $tree->clone($internal_node);
548 Function: Safe tree clone that doesn't segfault
549 Returns : Bio::Tree::Tree object
550 Args : [optional] $start_node, Bio::Tree::Node object
552 =cut
554 sub clone {
555 my ($self, $parent, $parent_clone) = @_;
556 $parent ||= $self->get_root_node;
557 $parent_clone ||= $self->_clone_node($parent);
559 foreach my $node ($parent->each_Descendent()) {
560 my $child = $self->_clone_node($node);
561 $child->ancestor($parent_clone);
562 $self->_clone($node, $child);
564 $parent->ancestor && return;
566 my $tree = $self->new(-root => $parent_clone);
567 return $tree;
571 # -- private internal methods --
573 sub cleanup_tree {
574 my $self = shift;
575 unless( $self->nodelete ) {
576 for my $node ($self->get_nodes(-order => 'b', -sortby => 'none')) {
577 $node->node_cleanup;
580 $self->{'_rootnode'} = undef;