don't match strings that wrap lines, use fuzzy match instead
[bioperl-live.git] / Bio / Tree / TreeFunctionsI.pm
blob1a64b13756f2bc87cde25b2b694bda4004b9803d
2 # BioPerl module for Bio::Tree::TreeFunctionsI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
18 =head1 SYNOPSIS
20 use Bio::TreeIO;
21 my $in = Bio::TreeIO->new(-format => 'newick', -file => 'tree.tre');
23 my $tree = $in->next_tree;
25 my @nodes = $tree->find_node('id1');
27 if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
28 #...
31 =head1 DESCRIPTION
33 This interface provides a set of implementated Tree functions which
34 only use the defined methods in the TreeI or NodeI interface.
36 =head1 FEEDBACK
38 =head2 Mailing Lists
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via the
62 web:
64 https://redmine.open-bio.org/projects/bioperl/
66 =head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
68 Email jason-at-bioperl-dot-org
69 Email amackey-at-virginia.edu
70 Email jtr4v-at-virginia.edu
72 =head1 CONTRIBUTORS
74 Sendu Bala, bix@sendu.me.uk
76 Rerooting code was worked on by
78 Daniel Barker d.barker-at-reading.ac.uk
79 Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
81 =head1 APPENDIX
83 The rest of the documentation details each of the object methods.
84 Internal methods are usually preceded with a _
86 =cut
88 # Let the code begin...
90 package Bio::Tree::TreeFunctionsI;
91 use strict;
93 use base qw(Bio::Tree::TreeI);
95 =head2 find_node
97 Title : find_node
98 Usage : my @nodes = $self->find_node(-id => 'node1');
99 Function: returns all nodes that match a specific field, by default this
100 is id, but different branch_length,
101 Returns : List of nodes which matched search
102 Args : text string to search for
104 -fieldname => $textstring
106 =cut
108 sub find_node {
109 my ($self,$type,$field) = @_;
110 if( ! defined $type ) {
111 $self->warn("Must request a either a string or field and string when searching");
114 # all this work for a '-' named field
115 # is so that we could potentially
116 # expand to other constraints in
117 # different implementations
118 # like 'find all nodes with boostrap < XX'
120 if( ! defined $field ) {
121 # only 1 argument, default to searching by id
122 $field= $type;
123 $type = 'id';
124 } else {
125 $type =~ s/^-//;
128 # could actually do this by testing $rootnode->can($type) but
129 # it is possible that a tree is implemeted with different node types
130 # - although it is unlikely that the root node would be richer than the
131 # leaf nodes. Can't handle NHX tags right now
133 my @nodes = grep { $_->can($type) && defined $_->$type() &&
134 $_->$type() eq $field } $self->get_nodes();
136 if ( wantarray) {
137 return @nodes;
138 } else {
139 if( @nodes > 1 ) {
140 $self->warn("More than 1 node found but caller requested scalar, only returning first node");
142 return shift @nodes;
146 =head2 remove_Node
148 Title : remove_Node
149 Usage : $tree->remove_Node($node)
150 Function: Removes a node from the tree
151 Returns : boolean represent status of success
152 Args : either Bio::Tree::NodeI or string of the node id
154 =cut
156 sub remove_Node {
157 my ($self,$input) = @_;
158 my $node = undef;
159 unless( ref($input) ) {
160 $node = $self->find_node($input);
161 } elsif( ! $input->isa('Bio::Tree::NodeI') ) {
162 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
163 return 0;
164 } else {
165 $node = $input;
167 if( ! $node->ancestor &&
168 $self->get_root_node->internal_id != $node->internal_id) {
169 $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
170 } else {
171 $node->ancestor->remove_Descendent($node);
175 =head2 get_lineage_nodes
177 Title : get_lineage_nodes
178 Usage : my @nodes = $tree->get_lineage_nodes($node);
179 Function: Get the full lineage of a node (all its ancestors, in the order
180 root->most recent ancestor)
181 Returns : list of nodes
182 Args : either Bio::Tree::NodeI or string of the node id
184 =cut
186 sub get_lineage_nodes {
187 my ($self, $input) = @_;
188 my $node;
189 unless (ref $input) {
190 $node = $self->find_node($input);
192 elsif (! $input->isa('Bio::Tree::NodeI')) {
193 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
194 return;
196 else {
197 $node = $input;
200 # when dealing with Bio::Taxon objects with databases, the root will always
201 # be the database's root, ignoring this Tree's set root node; prefer the
202 # Tree's idea of root.
203 my $root = $self->get_root_node || '';
205 my @lineage;
206 while ($node) {
207 $node = $node->ancestor || last;
208 unshift(@lineage, $node);
209 $node eq $root && last;
211 return @lineage;
214 =head2 splice
216 Title : splice
217 Usage : $tree->splice(-remove_id => \@ids);
218 Function: Remove all the nodes from a tree that correspond to the supplied
219 args, making all the descendents of a removed node the descendents
220 of the removed node's ancestor.
221 You can ask to explicitly remove certain nodes by using -remove_*,
222 remove them conditionally by using -remove_* in combination with
223 -keep_*, or remove everything except certain nodes by using only
224 -keep_*.
225 Returns : n/a
226 Args : just a list of Bio::Tree::NodeI objects to remove, OR
227 -key => value pairs, where -key has the prefix 'remove' or 'keep',
228 followed by an underscore, followed by a fieldname (like for the
229 method find_node). Value should be a scalar or an array ref of
230 scalars (again, like you might supply to find_node).
232 So (-remove_id => [1, 2]) will remove all nodes from the tree that
233 have an id() of '1' or '2', while
234 (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
235 an id() of '1'.
236 (-keep_id => [2]) will remove all nodes unless they have an id() of
237 '2' (note, no -remove_*).
239 -preserve_lengths => 1 : setting this argument will splice out
240 intermediate nodes, preserving the original total length between
241 the ancestor and the descendants of the spliced node. Undef
242 by default.
244 =cut
246 sub splice {
247 my ($self, @args) = @_;
248 $self->throw("Must supply some arguments") unless @args > 0;
249 my $preserve_lengths = 0;
250 my @nodes_to_remove;
251 if (ref($args[0])) {
252 $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
253 @nodes_to_remove = @args;
255 else {
256 $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
257 my %args = @args;
258 my @keep_nodes;
259 my @remove_nodes;
260 my $remove_all = 1;
261 while (my ($key, $value) = each %args) {
262 my @values = ref($value) ? @{$value} : ($value);
264 if ($key =~ s/remove_//) {
265 $remove_all = 0;
266 foreach my $value (@values) {
267 push(@remove_nodes, $self->find_node($key => $value));
270 elsif ($key =~ s/keep_//) {
271 foreach my $value (@values) {
272 push(@keep_nodes, $self->find_node($key => $value));
275 elsif ($key =~ /preserve/) {
276 $preserve_lengths = $value;
280 if ($remove_all) {
281 if (@keep_nodes == 0) {
282 $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
283 return;
286 @remove_nodes = $self->get_nodes;
288 if (@keep_nodes > 0) {
289 my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
290 foreach my $node (@remove_nodes) {
291 push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
294 else {
295 @nodes_to_remove = @remove_nodes;
298 # do the splicing
299 #*** the algorithm here hasn't really been thought through and tested much,
300 # will probably need revising
301 my %root_descs;
302 my $reroot = 0;
303 foreach my $node (@nodes_to_remove) {
304 my @descs = $node->each_Descendent;
305 my $ancestor = $node->ancestor;
306 if (! $ancestor && ! $reroot) {
307 # we're going to remove the tree root, so will have to re-root the
308 # tree later
309 $reroot = 1;
310 %root_descs = map { $_->internal_id => $_ } @descs;
311 $node->remove_all_Descendents;
312 next;
314 if (exists $root_descs{$node->internal_id}) {
315 # well, this one can't be the future root anymore
316 delete $root_descs{$node->internal_id};
317 # but maybe one of this one's descs will become the root
318 foreach my $desc (@descs) {
319 $root_descs{$desc->internal_id} = $desc;
322 # make the ancestor of our descendents our own ancestor, and give us
323 # no ancestor of our own to remove us from the tree
324 foreach my $desc (@descs) {
325 $desc->ancestor($ancestor);
326 $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths;
328 $node->ancestor(undef);
330 if ($reroot) {
331 my @candidates = values %root_descs;
332 $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
333 $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
334 $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
338 =head2 get_lca
340 Title : get_lca
341 Usage : get_lca(-nodes => \@nodes ); OR
342 get_lca(@nodes);
343 Function: given two or more nodes, returns the lowest common ancestor (aka most
344 recent common ancestor)
345 Returns : node object or undef if there is no common ancestor
346 Args : -nodes => arrayref of nodes to test, OR
347 just a list of nodes
349 =cut
351 sub get_lca {
352 my ($self, @args) = @_;
353 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
354 my @nodes;
355 if (ref($nodes) eq 'ARRAY') {
356 @nodes = @{$nodes};
358 else {
359 @nodes = @args;
361 @nodes >= 2 or $self->throw("At least 2 nodes are required");
362 # We must go root->leaf to get the correct answer to lca (in a world where
363 # internal_id might not be uniquely assigned), but leaf->root is more
364 # forgiving (eg. lineages may not all have the same root, or they may have
365 # different numbers of 'minor' taxa inbeteen 'major' ones).
367 # I use root->leaf so that we can easily do multiple nodes at once - no
368 # matter what taxa are below the lca, the lca and all its ancestors ought to
369 # be identical.
370 my @paths;
371 foreach my $node (@nodes) {
372 unless(ref($node) && $node->isa('Bio::Tree::NodeI')) {
373 $self->throw("Cannot process get_lca() with a non-NodeI object ($node)\n");
375 my @path = ($self->get_lineage_nodes($node), $node);
376 push(@paths, \@path);
378 return unless @paths >= 2;
379 my $lca;
380 LEVEL: while ($paths[0] > 0) {
381 my %node_ids;
382 my $node;
383 foreach my $path (@paths) {
384 $node = shift(@{$path}) || last LEVEL;
385 my $node_id = $node->internal_id;
386 unless (defined $node_id) {
387 $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
388 return;
390 $node_ids{$node_id}++;
392 if (keys %node_ids == 1) {
393 $lca = $node;
395 else {
396 # at this point in the lineage the nodes are different; the previous
397 # loop had the lca
398 last LEVEL;
401 # If the tree that we are contains the lca (get_lca could have been called
402 # on an empty tree, since it works with plain Nodes), prefer to return the
403 # node object that belongs to us
404 if ($lca && $self->number_nodes > 0) {
405 my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
406 $lca = $own_lca if $own_lca;
408 return $lca;
411 =head2 merge_lineage
413 Title : merge_lineage
414 Usage : merge_lineage($node)
415 Function: Merge a lineage of nodes with this tree.
416 Returns : n/a
417 Args : Bio::Tree::TreeI with only one leaf, OR
418 Bio::Tree::NodeI which has an ancestor
420 For example, if we are the tree $tree:
422 +---B
426 +---C
428 and we want to merge the lineage $other_tree:
430 A---C---D
432 After calling $tree->merge_lineage($other_tree), $tree looks like:
434 +---B
438 +---C---D
440 =cut
442 sub merge_lineage {
443 my ($self, $thing) = @_;
444 $self->throw("Must supply an object reference") unless ref($thing);
446 my ($lineage_tree, $lineage_leaf);
447 if ($thing->isa('Bio::Tree::TreeI')) {
448 my @leaves = $thing->get_leaf_nodes;
449 $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
450 $lineage_tree = $thing;
451 $lineage_leaf = shift(@leaves);
453 elsif ($thing->isa('Bio::Tree::NodeI')) {
454 $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
455 $lineage_tree = $self->new(-node => $thing);
456 $lineage_leaf = $thing;
459 # see if any node in the supplied lineage is in our tree - that will be
460 # our lca and we can merge at the node below
461 my @lineage = ($lineage_leaf, reverse($self->get_lineage_nodes($lineage_leaf)));
462 my $merged = 0;
463 for my $i (0..$#lineage) {
464 my $lca = $self->find_node(-internal_id => $lineage[$i]->internal_id) || next;
466 if ($i == 0) {
467 # the supplied thing to merge is already in the tree, nothing to do
468 return;
470 # $i is the lca, so the previous node is new to the tree and should
471 # be merged on
472 $lca->add_Descendent($lineage[$i-1]);
473 $merged = 1;
474 last;
476 $merged || ($self->warn("Couldn't merge the lineage of ".$lineage_leaf->id." with the rest of the tree!\n") && return);
479 =head2 contract_linear_paths
481 Title : contract_linear_paths
482 Usage : contract_linear_paths()
483 Function: Splices out all nodes in the tree that have an ancestor and only one
484 descendent.
485 Returns : n/a
486 Args : none for normal behaviour, true to dis-regard the ancestor requirment
487 and re-root the tree as necessary
489 For example, if we are the tree $tree:
491 +---E
493 A---B---C---D
495 +---F
497 After calling $tree->contract_linear_paths(), $tree looks like:
499 +---E
501 A---D
503 +---F
505 Instead, $tree->contract_linear_paths(1) would have given:
507 +---E
511 +---F
513 =cut
515 sub contract_linear_paths {
516 my $self = shift;
517 my $reroot = shift;
518 my @remove;
519 foreach my $node ($self->get_nodes) {
520 if ($node->ancestor && $node->each_Descendent == 1) {
521 push(@remove, $node);
524 $self->splice(@remove) if @remove;
525 if ($reroot) {
526 my $root = $self->get_root_node;
527 my @descs = $root->each_Descendent;
528 if (@descs == 1) {
529 my $new_root = shift(@descs);
530 $self->set_root_node($new_root);
531 $new_root->ancestor(undef);
536 =head2 is_binary
538 Example : is_binary(); is_binary($node);
539 Description: Finds if the tree or subtree defined by
540 the internal node is a true binary tree
541 without polytomies
542 Returns : boolean
543 Exceptions :
544 Args : Internal node Bio::Tree::NodeI, optional
547 =cut
549 sub is_binary;
551 sub is_binary {
552 my $self = shift;
553 my $node = shift || $self->get_root_node;
555 my $binary = 1;
556 my @descs = $node->each_Descendent;
557 $binary = 0 unless @descs == 2 or @descs == 0;
558 #print "$binary, ", scalar @descs, "\n";
560 # recurse
561 foreach my $desc (@descs) {
562 $binary += $self->is_binary($desc) -1;
564 $binary = 0 if $binary < 0;
565 return $binary;
569 =head2 force_binary
571 Title : force_binary
572 Usage : force_binary()
573 Function: Forces the tree into a binary tree, splitting branches arbitrarily
574 and creating extra nodes as necessary, such that all nodes have
575 exactly two or zero descendants.
576 Returns : n/a
577 Args : none
579 For example, if we are the tree $tree:
581 +---G
583 +---F
585 +---E
589 +---D
591 +---C
593 +---B
595 (A has 6 descendants B-G)
597 After calling $tree->force_binary(), $tree looks like:
599 +---X
601 +---X
603 | +---X
605 +---X
607 | | +---G
608 | | |
609 | +---X
611 | +---F
613 | +---E
615 | +---X
616 | | |
617 | | +---D
619 +---X
621 | +---C
623 +---X
625 +---B
627 (Where X are artificially created nodes with ids 'artificial_n', where n is
628 an integer making the id unique within the tree)
630 =cut
632 sub force_binary {
633 my $self = shift;
634 my $node = shift || $self->get_root_node;
636 my @descs = $node->each_Descendent;
637 if (@descs > 2) {
638 # Removed overly verbose warning - cjfields 3-12-11
640 # Many nodes have no identifying names, a simple warning is probably
641 # enough.
643 $self->warn("Node has more than two descendants\nWill do an arbitrary balanced split");
644 my @working = @descs;
645 # create an even set of artifical nodes on which to later hang the descs
646 my $half = @working / 2;
647 $half++ if $half > int($half);
648 $half = int($half);
649 my @artificials;
650 while ($half > 1) {
651 my @this_level;
652 foreach my $top_node (@artificials || $node) {
653 for (1..2) {
654 my $art = $top_node->new(-id => "artificial_".++$self->{_art_num});
655 $top_node->add_Descendent($art);
656 push(@this_level, $art);
659 @artificials = @this_level;
660 $half--;
662 # attach two descs to each artifical leaf
663 foreach my $art (@artificials) {
664 for (1..2) {
665 my $desc = shift(@working) || $node->new(-id => "artificial_".++$self->{_art_num});
666 $desc->ancestor($art);
670 elsif (@descs == 1) {
671 # ensure that all nodes have 2 descs
672 $node->add_Descendent($node->new(-id => "artificial_".++$self->{_art_num}));
674 # recurse
675 foreach my $desc (@descs) {
676 $self->force_binary($desc);
680 =head2 simplify_to_leaves_string
682 Title : simplify_to_leaves_string
683 Usage : my $leaves_string = $tree->simplify_to_leaves_string()
684 Function: Creates a simple textual representation of the relationship between
685 leaves in self. It forces the tree to be binary, so the result may
686 not strictly correspond to the tree (if the tree wasn't binary), but
687 will be as close as possible. The tree object is not altered. Only
688 leaf node ids are output, in a newick-like format.
689 Returns : string
690 Args : none
692 =cut
694 sub simplify_to_leaves_string {
695 my $self = shift;
697 # Before contracting and forcing binary we need to clone self, but Clone.pm
698 # clone() seg faults and fails to make the clone, whilst Storable dclone
699 # needs $self->{_root_cleanup_methods} deleted (code ref) and seg faults at
700 # end of script. Let's make our own clone...
701 my $tree = $self->_clone;
703 $tree->contract_linear_paths(1);
704 $tree->force_binary;
705 foreach my $node ($tree->get_nodes) {
706 my $id = $node->id;
707 $id = ($node->is_Leaf && $id !~ /^artificial/) ? $id : '';
708 $node->id($id);
711 my %paired;
712 my @data = $self->_simplify_helper($tree->get_root_node, \%paired);
714 return join(',', @data);
717 # alias
718 sub _clone { shift->clone(@_) }
720 # safe node clone that doesn't seg fault, but deliberately loses ancestors and
721 # descendents
722 sub _clone_node {
723 my ($self, $node) = @_;
724 my $clone = $node->new;
726 while (my ($key, $val) = each %{$node}) {
727 if ($key eq '_desc' || $key eq '_ancestor') {
728 next;
730 ${$clone}{$key} = $val;
733 return $clone;
736 # tree string generator for simplify_to_leaves_string, based on
737 # Bio::TreeIO::newick::_write_tree_Helper
738 sub _simplify_helper {
739 my ($self, $node, $paired) = @_;
740 return () if (!defined $node);
742 my @data = ();
743 foreach my $node ($node->each_Descendent()) {
744 push(@data, $self->_simplify_helper($node, $paired));
747 my $id = $node->id_output || '';
748 if (@data) {
749 unless (exists ${$paired}{"@data"} || @data == 1) {
750 $data[0] = "(" . $data[0];
751 $data[-1] .= ")";
752 ${$paired}{"@data"} = 1;
755 elsif ($id) {
756 push(@data, $id);
759 return @data;
762 =head2 distance
764 Title : distance
765 Usage : distance(-nodes => \@nodes )
766 Function: returns the distance between two given nodes
767 Returns : numerical distance
768 Args : -nodes => arrayref of nodes to test
769 or ($node1, $node2)
771 =cut
773 sub distance {
774 my ($self,@args) = @_;
775 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
776 if( ! defined $nodes ) {
777 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
778 return;
780 elsif (ref($nodes) eq 'ARRAY') {
783 elsif ( @args == 2) { # assume these are nodes...
784 $nodes = \@args;
786 else {
787 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
788 return;
790 $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
792 my $lca = $self->get_lca(@{$nodes});
793 unless($lca) {
794 $self->warn("could not find the lca of supplied nodes; can't find distance either");
795 return;
798 my $cumul_dist = 0;
799 my $warned = 0;
800 foreach my $current_node (@{$nodes}) {
801 while (1) {
802 last if $current_node eq $lca;
803 if ($current_node->branch_length) {
804 $cumul_dist += $current_node->branch_length;
806 elsif (! $warned) {
807 $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
808 $warned = 1;
811 $current_node = $current_node->ancestor || last;
815 return $cumul_dist;
818 =head2 is_monophyletic
820 Title : is_monophyletic
821 Usage : if( $tree->is_monophyletic(-nodes => \@nodes,
822 -outgroup => $outgroup)
823 Function: Will do a test of monophyly for the nodes specified
824 in comparison to a chosen outgroup
825 Returns : boolean
826 Args : -nodes => arrayref of nodes to test
827 -outgroup => outgroup to serve as a reference
830 =cut
832 sub is_monophyletic{
833 my ($self,@args) = @_;
834 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
836 if( ! defined $nodes || ! defined $outgroup ) {
837 $self->warn("Must supply -nodes and -outgroup parameters to the method
838 is_monophyletic");
839 return;
841 if( ref($nodes) !~ /ARRAY/i ) {
842 $self->warn("Must provide a valid array reference for -nodes");
845 my $clade_root = $self->get_lca(@{$nodes});
846 unless( defined $clade_root ) {
847 $self->warn("could not find clade root via lca");
848 return;
851 my $og_ancestor = $outgroup->ancestor;
852 while( defined ($og_ancestor ) ) {
853 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
854 # monophyly is violated
855 return 0;
857 $og_ancestor = $og_ancestor->ancestor;
859 return 1;
862 =head2 is_paraphyletic
864 Title : is_paraphyletic
865 Usage : if( $tree->is_paraphyletic(-nodes =>\@nodes,
866 -outgroup => $node) ){ }
867 Function: Tests whether or not a given set of nodes are paraphyletic
868 (representing the full clade) given an outgroup
869 Returns : [-1,0,1] , -1 if the group is not monophyletic
870 0 if the group is not paraphyletic
871 1 if the group is paraphyletic
872 Args : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
873 -outgroup => a Bio::Tree::NodeI to compare the nodes to
876 =cut
878 sub is_paraphyletic{
879 my ($self,@args) = @_;
880 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
882 if( ! defined $nodes || ! defined $outgroup ) {
883 $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
884 return;
886 if( ref($nodes) !~ /ARRAY/i ) {
887 $self->warn("Must provide a valid array reference for -nodes");
888 return;
891 # Algorithm
892 # Find the lca
893 # Find all the nodes beneath the lca
894 # Test to see that none are missing from the nodes list
895 my %nodehash;
896 foreach my $n ( @$nodes ) {
897 $nodehash{$n->internal_id} = $n;
900 my $clade_root = $self->get_lca(-nodes => $nodes );
901 unless( defined $clade_root ) {
902 $self->warn("could not find clade root via lca");
903 return;
906 my $og_ancestor = $outgroup->ancestor;
908 # Is this necessary/correct for paraphyly test?
909 while( defined ($og_ancestor ) ) {
910 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
911 # monophyly is violated, could be paraphyletic
912 return -1;
914 $og_ancestor = $og_ancestor->ancestor;
916 my $tree = Bio::Tree::Tree->new(-root => $clade_root,
917 -nodelete => 1);
919 foreach my $n ( $tree->get_nodes() ) {
920 next unless $n->is_Leaf();
921 # if any leaf node is not in the list
922 # then it is part of the clade and so the list
923 # must be paraphyletic
924 return 1 unless ( $nodehash{$n->internal_id} );
926 return 0;
930 =head2 reroot
932 Title : reroot
933 Usage : $tree->reroot($node);
934 Function: Reroots a tree making a new node the root
935 Returns : 1 on success, 0 on failure
936 Args : Bio::Tree::NodeI that is in the tree, but is not the current root
938 =cut
940 sub reroot {
941 my ($self,$new_root) = @_;
942 unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
943 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
944 return 0;
947 my $old_root = $self->get_root_node;
948 if( $new_root == $old_root ) {
949 $self->warn("Node requested for reroot is already the root node!");
950 return 0;
952 my $anc = $new_root->ancestor;
953 unless( $anc ) {
954 # this is already the root
955 $self->warn("Node requested for reroot is already the root node!"); return 0;
957 my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1);
958 # reverse the ancestor & children pointers
959 my $former_anc = $tmp_node->ancestor;
960 my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node);
961 for (my $i = 0; $i < $#path_from_oldroot; $i++) {
962 my $current = $path_from_oldroot[$i];
963 my $next = $path_from_oldroot[$i + 1];
964 $current->remove_Descendent($next);
965 $current->branch_length($next->branch_length);
966 $current->bootstrap($next->bootstrap) if defined $next->bootstrap;
967 $next->remove_tag('B');
968 $next->add_Descendent($current);
971 $new_root->add_Descendent($former_anc);
972 $tmp_node->remove_Descendent($former_anc);
974 $tmp_node = undef;
975 $new_root->branch_length(undef);
977 $old_root = undef;
978 $self->set_root_node($new_root);
980 return 1;
983 =head2 reroot_at_midpoint
985 Title : reroot_at_midpoint
986 Usage : $tree->reroot_at_midpoint($node, $new_root_id);
987 Function: Reroots a tree on a new node created halfway between the
988 argument and its ancestor
989 Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure
990 Args : non-root Bio::Tree::NodeI currently in $tree
991 scalar string, id for new node (optional)
993 =cut
995 sub reroot_at_midpoint {
996 my $self = shift;
997 my $node = shift;
998 my $id = shift;
1000 unless (defined $node && $node->isa("Bio::Tree::NodeI")) {
1001 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1002 return 0;
1005 my $midpt = $node->create_node_on_branch(-FRACTION=>0.5);
1006 if (defined $id) {
1007 $self->warn("ID argument is not a scalar") if (ref $id);
1008 $midpt->id($id) if defined($id) && !ref($id);
1010 $self->reroot($midpt);
1011 return $midpt;
1014 =head2 findnode_by_id
1016 Title : findnode_by_id
1017 Usage : my $node = $tree->findnode_by_id($id);
1018 Function: Get a node by its id (which should be
1019 unique for the tree)
1020 Returns : L<Bio::Tree::NodeI>
1021 Args : node id
1024 =cut
1027 sub findnode_by_id {
1028 my $tree = shift;
1029 $tree->deprecated("use of findnode_by_id() is deprecated; ".
1030 "use find_node() instead");
1031 my $id = shift;
1032 my $rootnode = $tree->get_root_node;
1033 if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
1034 return $rootnode;
1036 # process all the children
1037 foreach my $node ( $rootnode->get_Descendents ) {
1038 if ( ($node->id) and ($node->id eq $id ) ) {
1039 return $node;
1044 =head2 move_id_to_bootstrap
1046 Title : move_id_to_bootstrap
1047 Usage : $tree->move_id_to_bootstrap
1048 Function: Move internal IDs to bootstrap slot
1049 Returns : undef
1050 Args : undef
1053 =cut
1055 sub move_id_to_bootstrap{
1056 my ($tree) = shift;
1057 for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
1058 $node->bootstrap($node->id || '');
1059 $node->id('');
1064 =head2 add_trait
1066 Example : $key = $tree->add_trait($trait_file, 3);
1067 Description: Add traits to a Bio::Tree:Tree nodes
1068 of a tree from a file.
1069 Returns : trait name
1070 Exceptions : log an error if a node has no value in the file
1071 Args : name of trait file (scalar string),
1072 index of trait file column (scalar int)
1073 Caller : main()
1075 The trait file is a tab-delimited text file and needs to have a header
1076 line giving names to traits. The first column contains the leaf node
1077 ids. Subsequent columns contain different trait value sets. Columns
1078 numbering starts from 0. The default trait column is the second
1079 (1). The returned hashref has one special key, my_trait_name, that
1080 holds the trait name. Single or double quotes are removed.
1082 =cut
1084 sub _read_trait_file {
1085 my $self = shift;
1086 my $file = shift;
1087 my $column = shift || 1;
1089 my $traits;
1090 open my $TRAIT, "<", $file or $self->("Can't find file $file: $!\n");
1092 my $first_line = 1;
1093 while (<$TRAIT>) {
1094 if ($first_line) {
1095 $first_line = 0;
1096 s/['"]//g;
1097 my @line = split;
1098 $traits->{'my_trait_name'} = $line[$column];
1099 next;
1101 s/['"]//g;
1102 my @line = split;
1103 last unless $line[0];
1104 $traits->{$line[0]} = $line[$column];
1106 return $traits;
1109 sub add_trait {
1110 my $self = shift;
1111 my $file = shift;
1112 my $column = shift;
1114 my $traits = $self->_read_trait_file($file, $column); # filename, trait column
1115 my $key = $traits->{'my_trait_name'};
1116 #use YAML; print Dump $traits; exit;
1117 foreach my $node ($self->get_leaf_nodes) {
1118 # strip quotes from the node id
1119 $node->id($1) if $node->id =~ /^['"]+(.*)['"]+$/;
1120 eval {
1121 $node->verbose(2);
1122 $node->add_tag_value($key, $traits->{ $node->id } );
1124 $self->throw("ERROR: No trait for node [".
1125 $node->id. "/". $node->internal_id. "]")
1126 if $@;
1128 return $key;