2 # BioPerl module for Bio::Tree::Compatible
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Gabriel Valiente <valiente@lsi.upc.edu>
8 # Copyright Gabriel Valiente
10 # You may distribute this module under the same terms as Perl itself
12 # POD documentation - main docs before the code
16 Bio::Tree::Compatible - Testing compatibility of phylogenetic trees
21 use Bio::Tree::Compatible;
23 my $input = Bio::TreeIO->new('-format' => 'newick',
24 '-file' => 'input.tre');
25 my $t1 = $input->next_tree;
26 my $t2 = $input->next_tree;
28 my ($incompat, $ilabels, $inodes) = Bio::Tree::Compatible::is_compatible($t1,$t2);
30 my %cluster1 = %{ Bio::Tree::Compatible::cluster_representation($t1) };
31 my %cluster2 = %{ Bio::Tree::Compatible::cluster_representation($t2) };
32 print "incompatible trees\n";
33 if (scalar(@$ilabels)) {
34 foreach my $label (@$ilabels) {
35 my $node1 = $t1->find_node(-id => $label);
36 my $node2 = $t2->find_node(-id => $label);
37 my @c1 = sort @{ $cluster1{$node1} };
38 my @c2 = sort @{ $cluster2{$node2} };
40 print " cluster"; map { print " ",$_ } @c1;
41 print " cluster"; map { print " ",$_ } @c2; print "\n";
44 if (scalar(@$inodes)) {
46 my $node1 = shift @$inodes;
47 my $node2 = shift @$inodes;
48 my @c1 = sort @{ $cluster1{$node1} };
49 my @c2 = sort @{ $cluster2{$node2} };
50 print "cluster"; map { print " ",$_ } @c1;
51 print " properly intersects cluster";
52 map { print " ",$_ } @c2; print "\n";
56 print "compatible trees\n";
61 NB: This module has exclusively class methods that work on Bio::Tree::TreeI
62 objects. An instance of Bio::Tree::Compatible cannot itself represent a tree,
63 and so typically there is no need to create one.
65 Bio::Tree::Compatible is a Perl tool for testing compatibility of
66 phylogenetic trees with nested taxa represented as Bio::Tree::Tree
67 objects. It is based on a recent characterization of ancestral
68 compatibility of semi-labeled trees in terms of their cluster
71 A semi-labeled tree is a phylogenetic tree with some of its internal
72 nodes labeled, and it can represent a classification tree as well as a
73 phylogenetic tree with nested taxa, with labeled internal nodes
74 corresponding to taxa at a higher level of aggregation or nesting than
75 that of their descendents.
77 Two semi-labeled trees are compatible if their topological
78 restrictions to the common labels are such that for each node label,
79 the smallest clusters containing it in each of the trees coincide and,
80 furthermore, no cluster in one of the trees properly intersects a
81 cluster of the other tree.
83 Future extensions of Bio::Tree::Compatible include a
84 Bio::Tree::Supertree module for combining compatible phylogenetic
85 trees with nested taxa into a common supertree.
91 User feedback is an integral part of the evolution of this and other
92 Bioperl modules. Send your comments and suggestions preferably to the
93 Bioperl mailing list. Your participation is much appreciated.
95 bioperl-l@bioperl.org - General discussion
96 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
100 Please direct usage questions or support issues to the mailing list:
102 I<bioperl-l@bioperl.org>
104 rather than to the module maintainer directly. Many experienced and
105 reponsive experts will be able look at the problem and quickly
106 address it. Please include a thorough description of the problem
107 with code and data examples if at all possible.
109 =head2 Reporting Bugs
111 Report bugs to the Bioperl bug tracking system to help us keep track
112 of the bugs and their resolution. Bug reports can be submitted via the
115 https://github.com/bioperl/bioperl-live/issues
121 =item * Philip Daniel and Charles Semple. Supertree Algorithms for
122 Nested Taxa. In: Olaf R. P. Bininda-Emonds (ed.) Phylogenetic
123 Supertrees: Combining Information to Reveal the Tree of Life,
124 I<Computational Biology>, vol. 4, chap. 7, pp. 151-171. Kluwer (2004).
126 =item * Charles Semple, Philip Daniel, Wim Hordijk, Roderic
127 D. M. Page, and Mike Steel: Supertree Algorithms for Ancestral
128 Divergence Dates and Nested Taxa. Bioinformatics B<20>(15), 2355-2360
131 =item * Merce Llabres, Jairo Rocha, Francesc Rossello, and Gabriel
132 Valiente: On the Ancestral Compatibility of Two Phylogenetic Trees
133 with Nested Taxa. J. Math. Biol. B<53>(3), 340-364 (2006).
137 =head1 AUTHOR - Gabriel Valiente
139 Email valiente@lsi.upc.edu
143 The rest of the documentation details each of the object methods.
147 package Bio
::Tree
::Compatible
;
150 # Object preamble - inherits from Bio::Root::Root
154 use base
qw(Bio::Root::Root);
156 =head2 postorder_traversal
158 Title : postorder_traversal
159 Usage : my @nodes = @{ $tree->postorder_traversal }
160 Function: Return list of nodes in postorder
161 Returns : reference to array of Bio::Tree::Node
164 For example, the postorder traversal of the tree
165 C<(((A,B)C,D),(E,F,G));> is a reference to an array of nodes with
166 internal_id 0 through 9, because the Newick standard representation
167 for phylogenetic trees is based on a postorder traversal.
175 + +-------D 9 +-------3
179 +-----+-----F +-----8-----6
185 sub postorder_traversal
{
189 push @stack, $self->get_root_node;
191 my $node = pop @stack;
193 foreach my $child ($node->each_Descendent(-sortby
=> 'internal_id')) {
197 my @postorder = reverse @queue;
201 =head2 cluster_representation
203 Title : cluster_representation
204 Usage : my %cluster = %{ $tree->cluster_representation }
205 Function: Compute the cluster representation of a tree
206 Returns : reference to hash of array of string indexed by
210 For example, the cluster representation of the tree
211 C<(((A,B)C,D),(E,F,G));> is a reference to a hash associating an array
212 of string (descendent labels) to each node, as follows:
223 9 --> [A,B,C,D,E,F,G]
227 sub cluster_representation
{
230 my @postorder = @
{ postorder_traversal
($tree) };
231 foreach my $node ( @postorder ) {
232 my @labeled = map { $_->id } grep { $_->id } $node->get_Descendents;
233 push @labeled, $node->id if $node->id;
234 $cluster{$node} = \
@labeled;
241 Title : common_labels
242 Usage : my $labels = $tree1->common_labels($tree2);
243 Function: Return set of common node labels
244 Returns : Set::Scalar
245 Args : Bio::Tree::Tree
247 For example, the common labels of the tree C<(((A,B)C,D),(E,F,G));>
248 and the tree C<((A,B)H,E,(J,(K)G)I);> are: C<[A,B,E,G]>.
256 + +-------D +-----------E
258 | +-----E | +-------J
268 my @labels1 = map { $_->id } grep { $_->id } $self->get_nodes;
269 my $common = Set
::Scalar
->new( @labels1 );
270 my @labels2 = map { $_->id } grep { $_->id } $arg->get_nodes;
271 my $temp = Set
::Scalar
->new( @labels2 );
272 return $common->intersection($temp);
275 =head2 topological_restriction
277 Title : topological_restriction
278 Usage : $tree->topological_restriction($labels)
279 Function: Compute the topological restriction of a tree to a subset
281 Returns : Bio::Tree::Tree
284 For example, the topological restrictions of each of the trees
285 C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to the labels
286 C<[A,B,E,G]> are as follows:
302 sub topological_restriction
{
303 my ($tree, $labels) = @_;
304 for my $node ( @
{ postorder_traversal
($tree) } ) {
305 unless (ref($node)) { # skip $node if already removed
306 my @cluster = map { $_->id } grep { $_->id } $node->get_Descendents;
307 push @cluster, $node->id if $node->id;
308 my $cluster = Set
::Scalar
->new(@cluster);
309 if ($cluster->is_disjoint($labels)) {
310 $tree->remove_Node($node);
312 if ($node->id and not $labels->has($node->id)) {
313 $node->{'_id'} = undef;
322 Title : is_compatible
323 Usage : $tree1->is_compatible($tree2)
324 Function: Test compatibility of two trees
326 Args : Bio::Tree::Tree
328 For example, the topological restrictions of the trees
329 C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to their common
330 labels, C<[A,B,E,G]>, are compatible. The respective cluster
331 representations are as follows:
341 As a second example, the trees C<(A,B);> and C<((B)A);> are
342 incompatible. Their respective cluster representations are as follows:
348 The reason is, the smallest cluster containing label C<A> is C<[A]> in
349 the first tree but C<[A,B]> in the second tree.
357 As a second example, the trees C<(((B,A),C),D);> and C<((A,(D,B)),C);>
358 are also incompatible. Their respective cluster representations are as
369 The reason is, cluster C<[A,B]> properly intersects cluster
370 C<[B,D]>. There are further incompatibilities between these trees:
371 C<[A,B,C]> properly intersects both C<[B,D]> and C<[A,B,D]>.
381 +-----------D +-----------C
386 my ($tree1, $tree2) = @_;
387 my $common = $tree1->Bio::Tree
::Compatible
::common_labels
($tree2);
388 $tree1->Bio::Tree
::Compatible
::topological_restriction
($common);
389 $tree2->Bio::Tree
::Compatible
::topological_restriction
($common);
390 my @postorder1 = @
{ postorder_traversal
($tree1) };
391 my @postorder2 = @
{ postorder_traversal
($tree2) };
392 my %cluster1 = %{ cluster_representation
($tree1) };
393 my %cluster2 = %{ cluster_representation
($tree2) };
394 my $incompat = 0; # false
396 foreach my $label ( $common->elements ) {
397 my $node1 = $tree1->find_node(-id
=> $label);
398 my @labels1 = @
{ $cluster1{$node1} };
399 my $cluster1 = Set
::Scalar
->new(@labels1);
400 my $node2 = $tree2->find_node(-id
=> $label);
401 my @labels2 = @
{ $cluster2{$node2} };
402 my $cluster2 = Set
::Scalar
->new(@labels2);
403 unless ( $cluster1->is_equal($cluster2) ) {
404 $incompat = 1; # true
405 push @labels, $label;
409 foreach my $node1 ( @postorder1 ) {
410 my @labels1 = @
{ $cluster1{$node1} };
411 my $cluster1 = Set
::Scalar
->new(@labels1);
412 foreach my $node2 ( @postorder2 ) {
413 my @labels2 = @
{$cluster2{$node2} };
414 my $cluster2 = Set
::Scalar
->new(@labels2);
415 if ($cluster1->is_properly_intersecting($cluster2)) {
416 $incompat = 1; # true
417 push @nodes, $node1, $node2;
421 return ($incompat, \
@labels, \
@nodes);