sync with main trunk completely (a few tests failing)
[bioperl-live.git] / Bio / Tree / TreeFunctionsI.pm
blob572fa51aadf69a987db522f5f12264e464c68e48
1 # $Id$
3 # BioPerl module for Bio::Tree::TreeFunctionsI
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
19 =head1 SYNOPSIS
21 use Bio::TreeIO;
22 my $in = Bio::TreeIO->new(-format => 'newick', -file => 'tree.tre');
24 my $tree = $in->next_tree;
26 my @nodes = $tree->find_node('id1');
28 if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
29 #...
32 =head1 DESCRIPTION
34 This interface provides a set of implementated Tree functions which
35 only use the defined methods in the TreeI or NodeI interface.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 L<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via the
63 web:
65 http://bugzilla.open-bio.org/
67 =head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
69 Email jason-at-bioperl-dot-org
70 Email amackey-at-virginia.edu
71 Email jtr4v-at-virginia.edu
73 =head1 CONTRIBUTORS
75 Sendu Bala, bix@sendu.me.uk
77 Rerooting code was worked on by
79 Daniel Barker d.barker-at-reading.ac.uk
80 Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
82 =head1 APPENDIX
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
87 =cut
89 # Let the code begin...
91 package Bio::Tree::TreeFunctionsI;
92 use strict;
94 use UNIVERSAL qw(isa);
96 use base qw(Bio::Tree::TreeI);
98 =head2 find_node
100 Title : find_node
101 Usage : my @nodes = $self->find_node(-id => 'node1');
102 Function: returns all nodes that match a specific field, by default this
103 is id, but different branch_length,
104 Returns : List of nodes which matched search
105 Args : text string to search for
107 -fieldname => $textstring
109 =cut
111 sub find_node {
112 my ($self,$type,$field) = @_;
113 if( ! defined $type ) {
114 $self->warn("Must request a either a string or field and string when searching");
117 # all this work for a '-' named field
118 # is so that we could potentially
119 # expand to other constraints in
120 # different implementations
121 # like 'find all nodes with boostrap < XX'
123 if( ! defined $field ) {
124 # only 1 argument, default to searching by id
125 $field= $type;
126 $type = 'id';
127 } else {
128 $type =~ s/^-//;
131 # could actually do this by testing $rootnode->can($type) but
132 # it is possible that a tree is implemeted with different node types
133 # - although it is unlikely that the root node would be richer than the
134 # leaf nodes. Can't handle NHX tags right now
136 my @nodes = grep { $_->can($type) && defined $_->$type() &&
137 $_->$type() eq $field } $self->get_nodes();
139 if ( wantarray) {
140 return @nodes;
141 } else {
142 if( @nodes > 1 ) {
143 $self->warn("More than 1 node found but caller requested scalar, only returning first node");
145 return shift @nodes;
149 =head2 remove_Node
151 Title : remove_Node
152 Usage : $tree->remove_Node($node)
153 Function: Removes a node from the tree
154 Returns : boolean represent status of success
155 Args : either Bio::Tree::NodeI or string of the node id
157 =cut
159 sub remove_Node {
160 my ($self,$input) = @_;
161 my $node = undef;
162 unless( ref($input) ) {
163 $node = $self->find_node($input);
164 } elsif( ! $input->isa('Bio::Tree::NodeI') ) {
165 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
166 return 0;
167 } else {
168 $node = $input;
170 if( ! $node->ancestor &&
171 $self->get_root_node->internal_id != $node->internal_id) {
172 $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
173 } else {
174 $node->ancestor->remove_Descendent($node);
178 =head2 get_lineage_nodes
180 Title : get_lineage_nodes
181 Usage : my @nodes = $tree->get_lineage_nodes($node);
182 Function: Get the full lineage of a node (all its ancestors, in the order
183 root->most recent ancestor)
184 Returns : list of nodes
185 Args : either Bio::Tree::NodeI or string of the node id
187 =cut
189 sub get_lineage_nodes {
190 my ($self, $input) = @_;
191 my $node;
192 unless (ref $input) {
193 $node = $self->find_node($input);
195 elsif (! $input->isa('Bio::Tree::NodeI')) {
196 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
197 return;
199 else {
200 $node = $input;
203 # when dealing with Bio::Taxon objects with databases, the root will always
204 # be the database's root, ignoring this Tree's set root node; prefer the
205 # Tree's idea of root.
206 my $root = $self->get_root_node || '';
208 my @lineage;
209 while ($node) {
210 $node = $node->ancestor || last;
211 unshift(@lineage, $node);
212 $node eq $root && last;
214 return @lineage;
217 =head2 splice
219 Title : splice
220 Usage : $tree->splice(-remove_id => \@ids);
221 Function: Remove all the nodes from a tree that correspond to the supplied
222 args, making all the descendents of a removed node the descendents
223 of the removed node's ancestor.
224 You can ask to explicitly remove certain nodes by using -remove_*,
225 remove them conditionally by using -remove_* in combination with
226 -keep_*, or remove everything except certain nodes by using only
227 -keep_*.
228 Returns : n/a
229 Args : just a list of Bio::Tree::NodeI objects to remove, OR
230 -key => value pairs, where -key has the prefix 'remove' or 'keep',
231 followed by an underscore, followed by a fieldname (like for the
232 method find_node). Value should be a scalar or an array ref of
233 scalars (again, like you might supply to find_node).
235 So (-remove_id => [1, 2]) will remove all nodes from the tree that
236 have an id() of '1' or '2', while
237 (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
238 an id() of '1'.
239 (-keep_id => [2]) will remove all nodes unless they have an id() of
240 '2' (note, no -remove_*).
242 -preserve_lengths => 1 : setting this argument will splice out
243 intermediate nodes, preserving the original total length between
244 the ancestor and the descendants of the spliced node. Undef
245 by default.
247 =cut
249 sub splice {
250 my ($self, @args) = @_;
251 $self->throw("Must supply some arguments") unless @args > 0;
252 my $preserve_lengths = 0;
253 my @nodes_to_remove;
254 if (ref($args[0])) {
255 $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
256 @nodes_to_remove = @args;
258 else {
259 $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
260 my %args = @args;
261 my @keep_nodes;
262 my @remove_nodes;
263 my $remove_all = 1;
264 while (my ($key, $value) = each %args) {
265 my @values = ref($value) ? @{$value} : ($value);
267 if ($key =~ s/remove_//) {
268 $remove_all = 0;
269 foreach my $value (@values) {
270 push(@remove_nodes, $self->find_node($key => $value));
273 elsif ($key =~ s/keep_//) {
274 foreach my $value (@values) {
275 push(@keep_nodes, $self->find_node($key => $value));
278 elsif ($key =~ /preserve/) {
279 $preserve_lengths = $value;
283 if ($remove_all) {
284 if (@keep_nodes == 0) {
285 $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
286 return;
289 @remove_nodes = $self->get_nodes;
291 if (@keep_nodes > 0) {
292 my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
293 foreach my $node (@remove_nodes) {
294 push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
297 else {
298 @nodes_to_remove = @remove_nodes;
301 # do the splicing
302 #*** the algorithm here hasn't really been thought through and tested much,
303 # will probably need revising
304 my %root_descs;
305 my $reroot = 0;
306 foreach my $node (@nodes_to_remove) {
307 my @descs = $node->each_Descendent;
308 my $ancestor = $node->ancestor;
309 if (! $ancestor && ! $reroot) {
310 # we're going to remove the tree root, so will have to re-root the
311 # tree later
312 $reroot = 1;
313 %root_descs = map { $_->internal_id => $_ } @descs;
314 $node->remove_all_Descendents;
315 next;
317 if (exists $root_descs{$node->internal_id}) {
318 # well, this one can't be the future root anymore
319 delete $root_descs{$node->internal_id};
320 # but maybe one of this one's descs will become the root
321 foreach my $desc (@descs) {
322 $root_descs{$desc->internal_id} = $desc;
325 # make the ancestor of our descendents our own ancestor, and give us
326 # no ancestor of our own to remove us from the tree
327 foreach my $desc (@descs) {
328 $desc->ancestor($ancestor);
329 $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths;
331 $node->ancestor(undef);
333 if ($reroot) {
334 my @candidates = values %root_descs;
335 $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
336 $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
337 $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
341 =head2 get_lca
343 Title : get_lca
344 Usage : get_lca(-nodes => \@nodes ); OR
345 get_lca(@nodes);
346 Function: given two or more nodes, returns the lowest common ancestor (aka most
347 recent common ancestor)
348 Returns : node object or undef if there is no common ancestor
349 Args : -nodes => arrayref of nodes to test, OR
350 just a list of nodes
352 =cut
354 sub get_lca {
355 my ($self, @args) = @_;
356 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
357 my @nodes;
358 if (ref($nodes) eq 'ARRAY') {
359 @nodes = @{$nodes};
361 else {
362 @nodes = @args;
364 @nodes >= 2 or $self->throw("At least 2 nodes are required");
365 # We must go root->leaf to get the correct answer to lca (in a world where
366 # internal_id might not be uniquely assigned), but leaf->root is more
367 # forgiving (eg. lineages may not all have the same root, or they may have
368 # different numbers of 'minor' taxa inbeteen 'major' ones).
370 # I use root->leaf so that we can easily do multiple nodes at once - no
371 # matter what taxa are below the lca, the lca and all its ancestors ought to
372 # be identical.
373 my @paths;
374 foreach my $node (@nodes) {
375 unless(ref($node) && $node->isa('Bio::Tree::NodeI')) {
376 $self->throw("Cannot process get_lca() with a non-NodeI object ($node)\n");
378 my @path = ($self->get_lineage_nodes($node), $node);
379 push(@paths, \@path);
381 return unless @paths >= 2;
382 my $lca;
383 LEVEL: while ($paths[0] > 0) {
384 my %node_ids;
385 my $node;
386 foreach my $path (@paths) {
387 $node = shift(@{$path}) || last LEVEL;
388 my $node_id = $node->internal_id;
389 unless (defined $node_id) {
390 $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
391 return;
393 $node_ids{$node_id}++;
395 if (keys %node_ids == 1) {
396 $lca = $node;
398 else {
399 # at this point in the lineage the nodes are different; the previous
400 # loop had the lca
401 last LEVEL;
404 # If the tree that we are contains the lca (get_lca could have been called
405 # on an empty tree, since it works with plain Nodes), prefer to return the
406 # node object that belongs to us
407 if ($lca && $self->number_nodes > 0) {
408 my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
409 $lca = $own_lca if $own_lca;
411 return $lca;
414 =head2 merge_lineage
416 Title : merge_lineage
417 Usage : merge_lineage($node)
418 Function: Merge a lineage of nodes with this tree.
419 Returns : n/a
420 Args : Bio::Tree::TreeI with only one leaf, OR
421 Bio::Tree::NodeI which has an ancestor
423 For example, if we are the tree $tree:
425 +---B
429 +---C
431 and we want to merge the lineage $other_tree:
433 A---C---D
435 After calling $tree->merge_lineage($other_tree), $tree looks like:
437 +---B
441 +---C---D
443 =cut
445 sub merge_lineage {
446 my ($self, $thing) = @_;
447 $self->throw("Must supply an object reference") unless ref($thing);
449 my ($lineage_tree, $lineage_leaf);
450 if ($thing->isa('Bio::Tree::TreeI')) {
451 my @leaves = $thing->get_leaf_nodes;
452 $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
453 $lineage_tree = $thing;
454 $lineage_leaf = shift(@leaves);
456 elsif ($thing->isa('Bio::Tree::NodeI')) {
457 $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
458 $lineage_tree = $self->new(-node => $thing);
459 $lineage_leaf = $thing;
462 # see if any node in the supplied lineage is in our tree - that will be
463 # our lca and we can merge at the node below
464 my @lineage = ($lineage_leaf, reverse($self->get_lineage_nodes($lineage_leaf)));
465 my $merged = 0;
466 for my $i (0..$#lineage) {
467 my $lca = $self->find_node(-internal_id => $lineage[$i]->internal_id) || next;
469 if ($i == 0) {
470 # the supplied thing to merge is already in the tree, nothing to do
471 return;
473 # $i is the lca, so the previous node is new to the tree and should
474 # be merged on
475 $lca->add_Descendent($lineage[$i-1]);
476 $merged = 1;
477 last;
479 $merged || ($self->warn("Couldn't merge the lineage of ".$lineage_leaf->id." with the rest of the tree!\n") && return);
482 =head2 contract_linear_paths
484 Title : contract_linear_paths
485 Usage : contract_linear_paths()
486 Function: Splices out all nodes in the tree that have an ancestor and only one
487 descendent.
488 Returns : n/a
489 Args : none for normal behaviour, true to dis-regard the ancestor requirment
490 and re-root the tree as necessary
492 For example, if we are the tree $tree:
494 +---E
496 A---B---C---D
498 +---F
500 After calling $tree->contract_linear_paths(), $tree looks like:
502 +---E
504 A---D
506 +---F
508 Instead, $tree->contract_linear_paths(1) would have given:
510 +---E
514 +---F
516 =cut
518 sub contract_linear_paths {
519 my $self = shift;
520 my $reroot = shift;
521 my @remove;
522 foreach my $node ($self->get_nodes) {
523 if ($node->ancestor && $node->each_Descendent == 1) {
524 push(@remove, $node);
527 $self->splice(@remove) if @remove;
528 if ($reroot) {
529 my $root = $self->get_root_node;
530 my @descs = $root->each_Descendent;
531 if (@descs == 1) {
532 my $new_root = shift(@descs);
533 $self->set_root_node($new_root);
534 $new_root->ancestor(undef);
539 =head2 is_binary
541 Example : is_binary(); is_binary($node);
542 Description: Finds if the tree or subtree defined by
543 the internal node is a true binary tree
544 without polytomies
545 Returns : boolean
546 Exceptions :
547 Args : Internal node Bio::Tree::NodeI, optional
550 =cut
552 sub is_binary;
554 sub is_binary {
555 my $self = shift;
556 my $node = shift || $self->get_root_node;
558 my $binary = 1;
559 my @descs = $node->each_Descendent;
560 $binary = 0 unless @descs == 2 or @descs == 0;
561 #print "$binary, ", scalar @descs, "\n";
563 # recurse
564 foreach my $desc (@descs) {
565 $binary += $self->is_binary($desc) -1;
567 $binary = 0 if $binary < 0;
568 return $binary;
572 =head2 force_binary
574 Title : force_binary
575 Usage : force_binary()
576 Function: Forces the tree into a binary tree, splitting branches arbitrarily
577 and creating extra nodes as necessary, such that all nodes have
578 exactly two or zero descendants.
579 Returns : n/a
580 Args : none
582 For example, if we are the tree $tree:
584 +---G
586 +---F
588 +---E
592 +---D
594 +---C
596 +---B
598 (A has 6 descendants B-G)
600 After calling $tree->force_binary(), $tree looks like:
602 +---X
604 +---X
606 | +---X
608 +---X
610 | | +---G
611 | | |
612 | +---X
614 | +---F
616 | +---E
618 | +---X
619 | | |
620 | | +---D
622 +---X
624 | +---C
626 +---X
628 +---B
630 (Where X are artificially created nodes with ids 'artificial_n', where n is
631 an integer making the id unique within the tree)
633 =cut
635 sub force_binary {
636 my $self = shift;
637 my $node = shift || $self->get_root_node;
639 my @descs = $node->each_Descendent;
640 if (@descs > 2) {
641 $self->warn("Node ".($node->can('node_name') ? ($node->node_name || $node->id) : $node->id).
642 " has more than two descendants\n(".
643 join(", ", map { $node->can('node_name') ? ($node->node_name || $node->id) : $node->id } @descs).
644 ")\nWill do an arbitrary balanced split");
645 my @working = @descs;
646 # create an even set of artifical nodes on which to later hang the descs
647 my $half = @working / 2;
648 $half++ if $half > int($half);
649 $half = int($half);
650 my @artificials;
651 while ($half > 1) {
652 my @this_level;
653 foreach my $top_node (@artificials || $node) {
654 for (1..2) {
655 my $art = $top_node->new(-id => "artificial_".++$self->{_art_num});
656 $top_node->add_Descendent($art);
657 push(@this_level, $art);
660 @artificials = @this_level;
661 $half--;
663 # attach two descs to each artifical leaf
664 foreach my $art (@artificials) {
665 for (1..2) {
666 my $desc = shift(@working) || $node->new(-id => "artificial_".++$self->{_art_num});
667 $desc->ancestor($art);
671 elsif (@descs == 1) {
672 # ensure that all nodes have 2 descs
673 $node->add_Descendent($node->new(-id => "artificial_".++$self->{_art_num}));
675 # recurse
676 foreach my $desc (@descs) {
677 $self->force_binary($desc);
681 =head2 simplify_to_leaves_string
683 Title : simplify_to_leaves_string
684 Usage : my $leaves_string = $tree->simplify_to_leaves_string()
685 Function: Creates a simple textual representation of the relationship between
686 leaves in self. It forces the tree to be binary, so the result may
687 not strictly correspond to the tree (if the tree wasn't binary), but
688 will be as close as possible. The tree object is not altered. Only
689 leaf node ids are output, in a newick-like format.
690 Returns : string
691 Args : none
693 =cut
695 sub simplify_to_leaves_string {
696 my $self = shift;
698 # Before contracting and forcing binary we need to clone self, but Clone.pm
699 # clone() seg faults and fails to make the clone, whilst Storable dclone
700 # needs $self->{_root_cleanup_methods} deleted (code ref) and seg faults at
701 # end of script. Let's make our own clone...
702 my $tree = $self->_clone;
704 $tree->contract_linear_paths(1);
705 $tree->force_binary;
706 foreach my $node ($tree->get_nodes) {
707 my $id = $node->id;
708 $id = ($node->is_Leaf && $id !~ /^artificial/) ? $id : '';
709 $node->id($id);
712 my %paired;
713 my @data = $self->_simplify_helper($tree->get_root_node, \%paired);
715 return join(',', @data);
718 # safe tree clone that doesn't seg fault
720 =head2 clone()
722 Title : clone
723 Alias : _clone
724 Usage : $tree_copy = $tree->clone();
725 $subtree_copy = $tree->clone($internal_node);
726 Function: Safe tree clone that doesn't segfault
727 (of Sendu)
728 Returns : Bio::Tree::Tree object
729 Args : [optional] $start_node, Bio::Tree::Node object
731 =cut
733 sub clone {
734 my ($self, $parent, $parent_clone) = @_;
735 $parent ||= $self->get_root_node;
736 $parent_clone ||= $self->_clone_node($parent);
738 foreach my $node ($parent->each_Descendent()) {
739 my $child = $self->_clone_node($node);
740 $child->ancestor($parent_clone);
741 $self->_clone($node, $child);
743 $parent->ancestor && return;
745 my $tree = $self->new(-root => $parent_clone);
746 return $tree;
749 # alias
750 sub _clone { shift->clone(@_) }
752 # safe node clone that doesn't seg fault, but deliberately loses ancestors and
753 # descendents
754 sub _clone_node {
755 my ($self, $node) = @_;
756 my $clone = $node->new;
758 while (my ($key, $val) = each %{$node}) {
759 if ($key eq '_desc' || $key eq '_ancestor') {
760 next;
762 ${$clone}{$key} = $val;
765 return $clone;
768 # tree string generator for simplify_to_leaves_string, based on
769 # Bio::TreeIO::newick::_write_tree_Helper
770 sub _simplify_helper {
771 my ($self, $node, $paired) = @_;
772 return () if (!defined $node);
774 my @data = ();
775 foreach my $node ($node->each_Descendent()) {
776 push(@data, $self->_simplify_helper($node, $paired));
779 my $id = $node->id_output || '';
780 if (@data) {
781 unless (exists ${$paired}{"@data"} || @data == 1) {
782 $data[0] = "(" . $data[0];
783 $data[-1] .= ")";
784 ${$paired}{"@data"} = 1;
787 elsif ($id) {
788 push(@data, $id);
791 return @data;
794 =head2 distance
796 Title : distance
797 Usage : distance(-nodes => \@nodes )
798 Function: returns the distance between two given nodes
799 Returns : numerical distance
800 Args : -nodes => arrayref of nodes to test
802 =cut
804 sub distance {
805 my ($self,@args) = @_;
806 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
807 if( ! defined $nodes ) {
808 $self->warn("Must supply -nodes parameter to distance() method");
809 return;
811 $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
813 my $lca = $self->get_lca(@{$nodes});
814 unless($lca) {
815 $self->warn("could not find the lca of supplied nodes; can't find distance either");
816 return;
819 my $cumul_dist = 0;
820 my $warned = 0;
821 foreach my $current_node (@{$nodes}) {
822 while (1) {
823 last if $current_node eq $lca;
824 if ($current_node->branch_length) {
825 $cumul_dist += $current_node->branch_length;
827 elsif (! $warned) {
828 $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
829 $warned = 1;
832 $current_node = $current_node->ancestor || last;
836 return $cumul_dist;
839 =head2 is_monophyletic
841 Title : is_monophyletic
842 Usage : if( $tree->is_monophyletic(-nodes => \@nodes,
843 -outgroup => $outgroup)
844 Function: Will do a test of monophyly for the nodes specified
845 in comparison to a chosen outgroup
846 Returns : boolean
847 Args : -nodes => arrayref of nodes to test
848 -outgroup => outgroup to serve as a reference
851 =cut
853 sub is_monophyletic{
854 my ($self,@args) = @_;
855 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
857 if( ! defined $nodes || ! defined $outgroup ) {
858 $self->warn("Must supply -nodes and -outgroup parameters to the method
859 is_monophyletic");
860 return;
862 if( ref($nodes) !~ /ARRAY/i ) {
863 $self->warn("Must provide a valid array reference for -nodes");
866 my $clade_root = $self->get_lca(@{$nodes});
867 unless( defined $clade_root ) {
868 $self->warn("could not find clade root via lca");
869 return;
872 my $og_ancestor = $outgroup->ancestor;
873 while( defined ($og_ancestor ) ) {
874 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
875 # monophyly is violated
876 return 0;
878 $og_ancestor = $og_ancestor->ancestor;
880 return 1;
883 =head2 is_paraphyletic
885 Title : is_paraphyletic
886 Usage : if( $tree->is_paraphyletic(-nodes =>\@nodes,
887 -outgroup => $node) ){ }
888 Function: Tests whether or not a given set of nodes are paraphyletic
889 (representing the full clade) given an outgroup
890 Returns : [-1,0,1] , -1 if the group is not monophyletic
891 0 if the group is not paraphyletic
892 1 if the group is paraphyletic
893 Args : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
894 -outgroup => a Bio::Tree::NodeI to compare the nodes to
897 =cut
899 sub is_paraphyletic{
900 my ($self,@args) = @_;
901 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
903 if( ! defined $nodes || ! defined $outgroup ) {
904 $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
905 return;
907 if( ref($nodes) !~ /ARRAY/i ) {
908 $self->warn("Must provide a valid array reference for -nodes");
909 return;
912 # Algorithm
913 # Find the lca
914 # Find all the nodes beneath the lca
915 # Test to see that none are missing from the nodes list
916 my %nodehash;
917 foreach my $n ( @$nodes ) {
918 $nodehash{$n->internal_id} = $n;
921 my $clade_root = $self->get_lca(-nodes => $nodes );
922 unless( defined $clade_root ) {
923 $self->warn("could not find clade root via lca");
924 return;
927 my $og_ancestor = $outgroup->ancestor;
929 # Is this necessary/correct for paraphyly test?
930 while( defined ($og_ancestor ) ) {
931 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
932 # monophyly is violated, could be paraphyletic
933 return -1;
935 $og_ancestor = $og_ancestor->ancestor;
937 my $tree = Bio::Tree::Tree->new(-root => $clade_root,
938 -nodelete => 1);
940 foreach my $n ( $tree->get_nodes() ) {
941 next unless $n->is_Leaf();
942 # if any leaf node is not in the list
943 # then it is part of the clade and so the list
944 # must be paraphyletic
945 return 1 unless ( $nodehash{$n->internal_id} );
947 return 0;
951 =head2 reroot
953 Title : reroot
954 Usage : $tree->reroot($node);
955 Function: Reroots a tree making a new node the root
956 Returns : 1 on success, 0 on failure
957 Args : Bio::Tree::NodeI that is in the tree, but is not the current root
959 =cut
961 sub reroot {
962 my ($self,$new_root) = @_;
963 unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
964 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
965 return 0;
968 my $old_root = $self->get_root_node;
969 if( $new_root == $old_root ) {
970 $self->warn("Node requested for reroot is already the root node!");
971 return 0;
973 my $anc = $new_root->ancestor;
974 unless( $anc ) {
975 # this is already the root
976 $self->warn("Node requested for reroot is already the root node!"); return 0;
978 my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1);
979 # reverse the ancestor & children pointers
980 my $former_anc = $tmp_node->ancestor;
981 my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node);
982 for (my $i = 0; $i < $#path_from_oldroot; $i++) {
983 my $current = $path_from_oldroot[$i];
984 my $next = $path_from_oldroot[$i + 1];
985 $current->remove_Descendent($next);
986 $current->branch_length($next->branch_length);
987 $current->bootstrap($next->bootstrap) if defined $next->bootstrap;
988 $next->remove_tag('B');
989 $next->add_Descendent($current);
992 $new_root->add_Descendent($former_anc);
993 $tmp_node->remove_Descendent($former_anc);
995 $tmp_node = undef;
996 $new_root->branch_length(undef);
997 $new_root->remove_tag('B');
999 $old_root = undef;
1000 $self->set_root_node($new_root);
1002 return 1;
1005 =head2 reroot_at_midpoint
1007 Title : reroot_at_midpoint
1008 Usage : $tree->reroot_at_midpoint($node, $new_root_id);
1009 Function: Reroots a tree on a new node created halfway between the
1010 argument and its ancestor
1011 Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure
1012 Args : non-root Bio::Tree::NodeI currently in $tree
1013 scalar string, id for new node (optional)
1015 =cut
1017 sub reroot_at_midpoint {
1018 my $self = shift;
1019 my $node = shift;
1020 my $id = shift;
1022 unless (defined $node && $node->isa("Bio::Tree::NodeI")) {
1023 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1024 return 0;
1027 my $midpt = $node->create_node_on_branch(-FRACTION=>0.5);
1028 if (defined $id) {
1029 $self->warn("ID argument is not a scalar") if (ref $id);
1030 $midpt->id($id) if defined($id) && !ref($id);
1032 $self->reroot($midpt);
1033 return $midpt;
1036 =head2 findnode_by_id
1038 Title : findnode_by_id
1039 Usage : my $node = $tree->findnode_by_id($id);
1040 Function: Get a node by its id (which should be
1041 unique for the tree)
1042 Returns : L<Bio::Tree::NodeI>
1043 Args : node id
1046 =cut
1049 sub findnode_by_id {
1050 my $tree = shift;
1051 $tree->deprecated("use of findnode_by_id() is deprecated; ".
1052 "use find_node() instead");
1053 my $id = shift;
1054 my $rootnode = $tree->get_root_node;
1055 if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
1056 return $rootnode;
1058 # process all the children
1059 foreach my $node ( $rootnode->get_Descendents ) {
1060 if ( ($node->id) and ($node->id eq $id ) ) {
1061 return $node;
1066 =head2 move_id_to_bootstrap
1068 Title : move_id_to_bootstrap
1069 Usage : $tree->move_id_to_bootstrap
1070 Function: Move internal IDs to bootstrap slot
1071 Returns : undef
1072 Args : undef
1075 =cut
1077 sub move_id_to_bootstrap{
1078 my ($tree) = shift;
1079 for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
1080 $node->bootstrap($node->id);
1081 $node->id('');
1086 =head2 add_traits
1088 Example : $key = $stat->add_traits($tree, $trait_file, 3);
1089 Description: Add traits to a Bio::Tree:Tree nodes
1090 of a tree from a file.
1091 Returns : trait name
1092 Exceptions : log an error if a node has no value in the file
1093 Caller : main()
1095 The trait file is a tab-delimied text file and need to have a header
1096 line giving names to traits. The first column contains the leaf node
1097 ids. Subsequent columns contain different trait value sets. Columns
1098 numbering starts from 0. The default trait column is the second
1099 (1). The returned hashref has one special key, my_trait_name, that
1100 holds the trait name. Single or double quotes are removed.
1102 =cut
1104 sub _read_trait_file {
1105 my $self = shift;
1106 my $file = shift;
1107 my $column = shift || 1;
1109 my $traits;
1110 open my $TRAIT, "<", $file or $self->("Can't find file $file: $!\n");
1112 my $first_line = 1;
1113 while (<$TRAIT>) {
1114 if ($first_line) {
1115 $first_line = 0;
1116 s/['"]//g;
1117 my @line = split;
1118 $traits->{'my_trait_name'} = $line[$column];
1119 next;
1121 s/['"]//g;
1122 my @line = split;
1123 last unless $line[0];
1124 $traits->{$line[0]} = $line[$column];
1126 return $traits;
1129 sub add_trait {
1130 my $self = shift;
1131 my $file = shift;
1132 my $column = shift;
1134 my $traits = $self->_read_trait_file($file, $column); # filename, trait column
1135 my $key = $traits->{'my_trait_name'};
1136 #use YAML; print Dump $traits; exit;
1137 foreach my $node ($self->get_leaf_nodes) {
1138 # strip quotes from the node id
1139 $node->id($1) if $node->id =~ /^['"]+(.*)['"]+$/;
1140 eval {
1141 $node->verbose(2);
1142 $node->add_tag_value($key, $traits->{ $node->id } );
1144 $self->throw("ERROR: No trait for node [".
1145 $node->id. "/". $node->internal_id. "]")
1146 if $@;
1148 return $key;